forked from josch/img2pdf
Add support for pikepdf
This commit is contained in:
parent
65d9aed630
commit
bca3f802ac
4 changed files with 144 additions and 93 deletions
1
setup.py
1
setup.py
|
@ -5,6 +5,7 @@ VERSION = "0.3.6"
|
||||||
|
|
||||||
INSTALL_REQUIRES = (
|
INSTALL_REQUIRES = (
|
||||||
'Pillow',
|
'Pillow',
|
||||||
|
'pikepdf',
|
||||||
)
|
)
|
||||||
|
|
||||||
TESTS_REQUIRE = (
|
TESTS_REQUIRE = (
|
||||||
|
|
192
src/img2pdf.py
192
src/img2pdf.py
|
@ -40,6 +40,12 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
have_pdfrw = False
|
have_pdfrw = False
|
||||||
|
|
||||||
|
have_pikepdf = True
|
||||||
|
try:
|
||||||
|
import pikepdf
|
||||||
|
except ImportError:
|
||||||
|
have_pikepdf = False
|
||||||
|
|
||||||
__version__ = "0.3.6"
|
__version__ = "0.3.6"
|
||||||
default_dpi = 96.0
|
default_dpi = 96.0
|
||||||
papersizes = {
|
papersizes = {
|
||||||
|
@ -483,13 +489,12 @@ class MyPdfArray(list):
|
||||||
|
|
||||||
|
|
||||||
class MyPdfWriter:
|
class MyPdfWriter:
|
||||||
def __init__(self, version="1.3"):
|
def __init__(self):
|
||||||
self.objects = []
|
self.objects = []
|
||||||
# create an incomplete pages object so that a /Parent entry can be
|
# create an incomplete pages object so that a /Parent entry can be
|
||||||
# added to each page
|
# added to each page
|
||||||
self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0)
|
self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0)
|
||||||
self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog)
|
self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog)
|
||||||
self.version = version # default pdf version 1.3
|
|
||||||
self.pagearray = []
|
self.pagearray = []
|
||||||
|
|
||||||
def addobj(self, obj):
|
def addobj(self, obj):
|
||||||
|
@ -497,7 +502,7 @@ class MyPdfWriter:
|
||||||
obj.identifier = newid
|
obj.identifier = newid
|
||||||
self.objects.append(obj)
|
self.objects.append(obj)
|
||||||
|
|
||||||
def tostream(self, info, stream):
|
def tostream(self, info, stream, version="1.3"):
|
||||||
xreftable = list()
|
xreftable = list()
|
||||||
|
|
||||||
# justification of the random binary garbage in the header from
|
# justification of the random binary garbage in the header from
|
||||||
|
@ -514,7 +519,7 @@ class MyPdfWriter:
|
||||||
#
|
#
|
||||||
# the choice of binary characters is arbitrary but those four seem to
|
# the choice of binary characters is arbitrary but those four seem to
|
||||||
# be used elsewhere.
|
# be used elsewhere.
|
||||||
pdfheader = ("%%PDF-%s\n" % self.version).encode("ascii")
|
pdfheader = ("%%PDF-%s\n" % version).encode("ascii")
|
||||||
pdfheader += b"%\xe2\xe3\xcf\xd3\n"
|
pdfheader += b"%\xe2\xe3\xcf\xd3\n"
|
||||||
stream.write(pdfheader)
|
stream.write(pdfheader)
|
||||||
|
|
||||||
|
@ -616,12 +621,18 @@ class pdfdoc(object):
|
||||||
fullscreen=False,
|
fullscreen=False,
|
||||||
):
|
):
|
||||||
if engine is None:
|
if engine is None:
|
||||||
if have_pdfrw:
|
if have_pikepdf:
|
||||||
|
engine = Engine.pdfrw
|
||||||
|
elif have_pdfrw:
|
||||||
engine = Engine.pdfrw
|
engine = Engine.pdfrw
|
||||||
else:
|
else:
|
||||||
engine = Engine.internal
|
engine = Engine.internal
|
||||||
|
|
||||||
if engine == Engine.pdfrw:
|
if engine == Engine.pikepdf:
|
||||||
|
PdfWriter = pikepdf.new
|
||||||
|
PdfDict = pikepdf.Dictionary
|
||||||
|
PdfName = pikepdf.Name
|
||||||
|
elif engine == Engine.pdfrw:
|
||||||
from pdfrw import PdfWriter, PdfDict, PdfName, PdfString
|
from pdfrw import PdfWriter, PdfDict, PdfName, PdfString
|
||||||
elif engine == Engine.internal:
|
elif engine == Engine.internal:
|
||||||
PdfWriter = MyPdfWriter
|
PdfWriter = MyPdfWriter
|
||||||
|
@ -631,47 +642,43 @@ class pdfdoc(object):
|
||||||
else:
|
else:
|
||||||
raise ValueError("unknown engine: %s" % engine)
|
raise ValueError("unknown engine: %s" % engine)
|
||||||
|
|
||||||
now = datetime.now()
|
self.writer = PdfWriter()
|
||||||
self.info = PdfDict(indirect=True)
|
if engine != Engine.pikepdf:
|
||||||
|
self.writer.docinfo = PdfDict(indirect=True)
|
||||||
|
|
||||||
def datetime_to_pdfdate(dt):
|
def datetime_to_pdfdate(dt):
|
||||||
return dt.strftime("%Y%m%d%H%M%SZ")
|
return dt.strftime("%Y%m%d%H%M%SZ")
|
||||||
|
|
||||||
if title is not None:
|
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
|
||||||
self.info[PdfName.Title] = PdfString.encode(title)
|
v = locals()[k.lower()]
|
||||||
if author is not None:
|
if v is None or v == "":
|
||||||
self.info[PdfName.Author] = PdfString.encode(author)
|
continue
|
||||||
if creator is not None:
|
if engine != Engine.pikepdf:
|
||||||
self.info[PdfName.Creator] = PdfString.encode(creator)
|
v = PdfString.encode(v)
|
||||||
if producer is not None and producer != "":
|
self.writer.docinfo[getattr(PdfName,k)] = v
|
||||||
self.info[PdfName.Producer] = PdfString.encode(producer)
|
|
||||||
if creationdate is not None:
|
|
||||||
self.info[PdfName.CreationDate] = PdfString.encode(
|
|
||||||
"D:" + datetime_to_pdfdate(creationdate)
|
|
||||||
)
|
|
||||||
elif not nodate:
|
|
||||||
self.info[PdfName.CreationDate] = PdfString.encode(
|
|
||||||
"D:" + datetime_to_pdfdate(now)
|
|
||||||
)
|
|
||||||
if moddate is not None:
|
|
||||||
self.info[PdfName.ModDate] = PdfString.encode(
|
|
||||||
"D:" + datetime_to_pdfdate(moddate)
|
|
||||||
)
|
|
||||||
elif not nodate:
|
|
||||||
self.info[PdfName.ModDate] = PdfString.encode(
|
|
||||||
"D:" + datetime_to_pdfdate(now)
|
|
||||||
)
|
|
||||||
if subject is not None:
|
|
||||||
self.info[PdfName.Subject] = PdfString.encode(subject)
|
|
||||||
if keywords is not None:
|
|
||||||
self.info[PdfName.Keywords] = PdfString.encode(",".join(keywords))
|
|
||||||
|
|
||||||
self.writer = PdfWriter()
|
now = datetime.now()
|
||||||
self.writer.version = version
|
for k in ["CreationDate", "ModDate"]:
|
||||||
|
v = locals()[k.lower()]
|
||||||
|
if v is None and nodate:
|
||||||
|
continue
|
||||||
|
if v is None:
|
||||||
|
v = now
|
||||||
|
v = ("D:" + datetime_to_pdfdate(v)).encode("ascii")
|
||||||
|
self.writer.docinfo[getattr(PdfName, k)] = v
|
||||||
|
if keywords is not None:
|
||||||
|
if engine == Engine.pikepdf:
|
||||||
|
self.writer.docinfo[PdfName.Keywords] = ",".join(keywords)
|
||||||
|
else:
|
||||||
|
self.writer.docinfo[PdfName.Keywords] = PdfString.encode(
|
||||||
|
",".join(keywords)
|
||||||
|
)
|
||||||
|
|
||||||
|
if engine != Engine.pikepdf:
|
||||||
# this is done because pdfrw adds info, catalog and pages as the first
|
# this is done because pdfrw adds info, catalog and pages as the first
|
||||||
# three objects in this order
|
# three objects in this order
|
||||||
if engine == Engine.internal:
|
if engine == Engine.internal:
|
||||||
self.writer.addobj(self.info)
|
self.writer.addobj(self.writer.docinfo)
|
||||||
self.writer.addobj(self.writer.catalog)
|
self.writer.addobj(self.writer.catalog)
|
||||||
self.writer.addobj(self.writer.pages)
|
self.writer.addobj(self.writer.pages)
|
||||||
|
|
||||||
|
@ -683,6 +690,7 @@ class pdfdoc(object):
|
||||||
self.center_window = center_window
|
self.center_window = center_window
|
||||||
self.fullscreen = fullscreen
|
self.fullscreen = fullscreen
|
||||||
self.engine = engine
|
self.engine = engine
|
||||||
|
self.output_version = version
|
||||||
|
|
||||||
def add_imagepage(
|
def add_imagepage(
|
||||||
self,
|
self,
|
||||||
|
@ -707,9 +715,14 @@ class pdfdoc(object):
|
||||||
trimborder=None,
|
trimborder=None,
|
||||||
artborder=None,
|
artborder=None,
|
||||||
):
|
):
|
||||||
if self.engine == Engine.pdfrw:
|
if self.engine == Engine.pikepdf:
|
||||||
|
PdfArray = pikepdf.Array
|
||||||
|
PdfDict = pikepdf.Dictionary
|
||||||
|
PdfName = pikepdf.Name
|
||||||
|
elif self.engine == Engine.pdfrw:
|
||||||
from pdfrw import PdfDict, PdfName, PdfObject, PdfString
|
from pdfrw import PdfDict, PdfName, PdfObject, PdfString
|
||||||
from pdfrw.py23_diffs import convert_load
|
from pdfrw.py23_diffs import convert_load
|
||||||
|
|
||||||
elif self.engine == Engine.internal:
|
elif self.engine == Engine.internal:
|
||||||
PdfDict = MyPdfDict
|
PdfDict = MyPdfDict
|
||||||
PdfName = MyPdfName
|
PdfName = MyPdfName
|
||||||
|
@ -718,6 +731,8 @@ class pdfdoc(object):
|
||||||
convert_load = my_convert_load
|
convert_load = my_convert_load
|
||||||
else:
|
else:
|
||||||
raise ValueError("unknown engine: %s" % self.engine)
|
raise ValueError("unknown engine: %s" % self.engine)
|
||||||
|
TrueObject = True if self.engine == Engine.pikepdf else PdfObject("true")
|
||||||
|
FalseObject = False if self.engine == Engine.pikepdf else PdfObject("false")
|
||||||
|
|
||||||
if color == Colorspace["1"] or color == Colorspace.L:
|
if color == Colorspace["1"] or color == Colorspace.L:
|
||||||
colorspace = PdfName.DeviceGray
|
colorspace = PdfName.DeviceGray
|
||||||
|
@ -727,16 +742,27 @@ class pdfdoc(object):
|
||||||
colorspace = PdfName.DeviceCMYK
|
colorspace = PdfName.DeviceCMYK
|
||||||
elif color == Colorspace.P:
|
elif color == Colorspace.P:
|
||||||
if self.engine == Engine.pdfrw:
|
if self.engine == Engine.pdfrw:
|
||||||
|
# https://github.com/pmaupin/pdfrw/issues/128
|
||||||
|
# https://github.com/pmaupin/pdfrw/issues/147
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"pdfrw does not support hex strings for "
|
"pdfrw does not support hex strings for "
|
||||||
"palette image input, re-run with "
|
"palette image input, re-run with "
|
||||||
"--engine=internal"
|
"--engine=internal or --engine=pikepdf"
|
||||||
)
|
)
|
||||||
|
assert len(palette) % 3 == 0
|
||||||
colorspace = [
|
colorspace = [
|
||||||
PdfName.Indexed,
|
PdfName.Indexed,
|
||||||
PdfName.DeviceRGB,
|
PdfName.DeviceRGB,
|
||||||
len(palette) - 1,
|
(len(palette) // 3) - 1,
|
||||||
PdfString.encode(palette, hextype=True),
|
bytes(palette)
|
||||||
|
if self.engine == Engine.pikepdf
|
||||||
|
else PdfString.encode(
|
||||||
|
[
|
||||||
|
int.from_bytes(palette[i : i + 3], "big")
|
||||||
|
for i in range(0, len(palette), 3)
|
||||||
|
],
|
||||||
|
hextype=True,
|
||||||
|
),
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
raise UnsupportedColorspaceError("unsupported color space: %s" % color.name)
|
raise UnsupportedColorspaceError("unsupported color space: %s" % color.name)
|
||||||
|
@ -746,12 +772,15 @@ class pdfdoc(object):
|
||||||
ofilter = PdfName.DCTDecode
|
ofilter = PdfName.DCTDecode
|
||||||
elif imgformat is ImageFormat.JPEG2000:
|
elif imgformat is ImageFormat.JPEG2000:
|
||||||
ofilter = PdfName.JPXDecode
|
ofilter = PdfName.JPXDecode
|
||||||
self.writer.version = "1.5" # jpeg2000 needs pdf 1.5
|
self.output_version = "1.5" # jpeg2000 needs pdf 1.5
|
||||||
elif imgformat is ImageFormat.CCITTGroup4:
|
elif imgformat is ImageFormat.CCITTGroup4:
|
||||||
ofilter = [PdfName.CCITTFaxDecode]
|
ofilter = [PdfName.CCITTFaxDecode]
|
||||||
else:
|
else:
|
||||||
ofilter = PdfName.FlateDecode
|
ofilter = PdfName.FlateDecode
|
||||||
|
|
||||||
|
if self.engine == Engine.pikepdf:
|
||||||
|
image = self.writer.make_stream(imgdata)
|
||||||
|
else:
|
||||||
image = PdfDict(stream=convert_load(imgdata))
|
image = PdfDict(stream=convert_load(imgdata))
|
||||||
|
|
||||||
image[PdfName.Type] = PdfName.XObject
|
image[PdfName.Type] = PdfName.XObject
|
||||||
|
@ -764,7 +793,7 @@ class pdfdoc(object):
|
||||||
|
|
||||||
if color == Colorspace["CMYK;I"]:
|
if color == Colorspace["CMYK;I"]:
|
||||||
# Inverts all four channels
|
# Inverts all four channels
|
||||||
image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
|
image[PdfName.Decode] = [1, 0, 1, 0, 1, 0, 1, 0]
|
||||||
|
|
||||||
if imgformat is ImageFormat.CCITTGroup4:
|
if imgformat is ImageFormat.CCITTGroup4:
|
||||||
decodeparms = PdfDict()
|
decodeparms = PdfDict()
|
||||||
|
@ -772,9 +801,9 @@ class pdfdoc(object):
|
||||||
# encoding. We set it to -1 because we want Group 4 encoding.
|
# encoding. We set it to -1 because we want Group 4 encoding.
|
||||||
decodeparms[PdfName.K] = -1
|
decodeparms[PdfName.K] = -1
|
||||||
if inverted:
|
if inverted:
|
||||||
decodeparms[PdfName.BlackIs1] = PdfObject("false")
|
decodeparms[PdfName.BlackIs1] = FalseObject
|
||||||
else:
|
else:
|
||||||
decodeparms[PdfName.BlackIs1] = PdfObject("true")
|
decodeparms[PdfName.BlackIs1] = TrueObject
|
||||||
decodeparms[PdfName.Columns] = imgwidthpx
|
decodeparms[PdfName.Columns] = imgwidthpx
|
||||||
decodeparms[PdfName.Rows] = imgheightpx
|
decodeparms[PdfName.Rows] = imgheightpx
|
||||||
image[PdfName.DecodeParms] = [decodeparms]
|
image[PdfName.DecodeParms] = [decodeparms]
|
||||||
|
@ -794,9 +823,15 @@ class pdfdoc(object):
|
||||||
% (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)
|
% (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)
|
||||||
).encode("ascii")
|
).encode("ascii")
|
||||||
|
|
||||||
|
if self.engine == Engine.pikepdf:
|
||||||
|
content = self.writer.make_stream(text)
|
||||||
|
else:
|
||||||
content = PdfDict(stream=convert_load(text))
|
content = PdfDict(stream=convert_load(text))
|
||||||
resources = PdfDict(XObject=PdfDict(Im0=image))
|
resources = PdfDict(XObject=PdfDict(Im0=image))
|
||||||
|
|
||||||
|
if self.engine == Engine.pikepdf:
|
||||||
|
page = self.writer.add_blank_page(page_size=(pagewidth, pageheight))
|
||||||
|
else:
|
||||||
page = PdfDict(indirect=True)
|
page = PdfDict(indirect=True)
|
||||||
page[PdfName.Type] = PdfName.Page
|
page[PdfName.Type] = PdfName.Page
|
||||||
page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
|
page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
|
||||||
|
@ -848,10 +883,11 @@ class pdfdoc(object):
|
||||||
page[PdfName.Rotate] = rotate
|
page[PdfName.Rotate] = rotate
|
||||||
if userunit is not None:
|
if userunit is not None:
|
||||||
# /UserUnit requires PDF 1.6
|
# /UserUnit requires PDF 1.6
|
||||||
if self.writer.version < "1.6":
|
if self.output_version < "1.6":
|
||||||
self.writer.version = "1.6"
|
self.output_version = "1.6"
|
||||||
page[PdfName.UserUnit] = userunit
|
page[PdfName.UserUnit] = userunit
|
||||||
|
|
||||||
|
if self.engine != Engine.pikepdf:
|
||||||
self.writer.addpage(page)
|
self.writer.addpage(page)
|
||||||
|
|
||||||
if self.engine == Engine.internal:
|
if self.engine == Engine.internal:
|
||||||
|
@ -864,8 +900,13 @@ class pdfdoc(object):
|
||||||
return stream.getvalue()
|
return stream.getvalue()
|
||||||
|
|
||||||
def tostream(self, outputstream):
|
def tostream(self, outputstream):
|
||||||
if self.engine == Engine.pdfrw:
|
if self.engine == Engine.pikepdf:
|
||||||
|
PdfArray = pikepdf.Array
|
||||||
|
PdfDict = pikepdf.Dictionary
|
||||||
|
PdfName = pikepdf.Name
|
||||||
|
elif self.engine == Engine.pdfrw:
|
||||||
from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
|
from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
|
||||||
|
|
||||||
elif self.engine == Engine.internal:
|
elif self.engine == Engine.internal:
|
||||||
PdfDict = MyPdfDict
|
PdfDict = MyPdfDict
|
||||||
PdfName = MyPdfName
|
PdfName = MyPdfName
|
||||||
|
@ -873,8 +914,8 @@ class pdfdoc(object):
|
||||||
PdfArray = MyPdfArray
|
PdfArray = MyPdfArray
|
||||||
else:
|
else:
|
||||||
raise ValueError("unknown engine: %s" % self.engine)
|
raise ValueError("unknown engine: %s" % self.engine)
|
||||||
NullObject = PdfObject("null")
|
NullObject = None if self.engine == Engine.pikepdf else PdfObject("null")
|
||||||
TrueObject = PdfObject("true")
|
TrueObject = True if self.engine == Engine.pikepdf else PdfObject("true")
|
||||||
|
|
||||||
# We fill the catalog with more information like /ViewerPreferences,
|
# We fill the catalog with more information like /ViewerPreferences,
|
||||||
# /PageMode, /PageLayout or /OpenAction because the latter refers to a
|
# /PageMode, /PageLayout or /OpenAction because the latter refers to a
|
||||||
|
@ -884,7 +925,9 @@ class pdfdoc(object):
|
||||||
# is added, so we can only start using it after all pages have been
|
# is added, so we can only start using it after all pages have been
|
||||||
# written.
|
# written.
|
||||||
|
|
||||||
if self.engine == Engine.pdfrw:
|
if self.engine == Engine.pikepdf:
|
||||||
|
catalog = self.writer.Root
|
||||||
|
elif self.engine == Engine.pdfrw:
|
||||||
catalog = self.writer.trailer.Root
|
catalog = self.writer.trailer.Root
|
||||||
elif self.engine == Engine.internal:
|
elif self.engine == Engine.internal:
|
||||||
catalog = self.writer.catalog
|
catalog = self.writer.catalog
|
||||||
|
@ -945,12 +988,18 @@ class pdfdoc(object):
|
||||||
# FitBV - Fits the height of the page bounding box to the window.
|
# FitBV - Fits the height of the page bounding box to the window.
|
||||||
|
|
||||||
# by default the initial page is the first one
|
# by default the initial page is the first one
|
||||||
|
if self.engine == Engine.pikepdf:
|
||||||
|
initial_page = self.writer.pages[0]
|
||||||
|
else:
|
||||||
initial_page = self.writer.pagearray[0]
|
initial_page = self.writer.pagearray[0]
|
||||||
# we set the open action here to make sure we open on the requested
|
# we set the open action here to make sure we open on the requested
|
||||||
# initial page but this value might be overwritten by a custom open
|
# initial page but this value might be overwritten by a custom open
|
||||||
# action later while still taking the requested initial page into
|
# action later while still taking the requested initial page into
|
||||||
# account
|
# account
|
||||||
if self.initial_page is not None:
|
if self.initial_page is not None:
|
||||||
|
if self.engine == Engine.pikepdf:
|
||||||
|
initial_page = self.writer.pages[self.initial_page - 1]
|
||||||
|
else:
|
||||||
initial_page = self.writer.pagearray[self.initial_page - 1]
|
initial_page = self.writer.pagearray[self.initial_page - 1]
|
||||||
catalog[PdfName.OpenAction] = PdfArray(
|
catalog[PdfName.OpenAction] = PdfArray(
|
||||||
[initial_page, PdfName.XYZ, NullObject, NullObject, 0]
|
[initial_page, PdfName.XYZ, NullObject, NullObject, 0]
|
||||||
|
@ -992,11 +1041,16 @@ class pdfdoc(object):
|
||||||
raise ValueError("unknown page layout: %s" % self.page_layout)
|
raise ValueError("unknown page layout: %s" % self.page_layout)
|
||||||
|
|
||||||
# now write out the PDF
|
# now write out the PDF
|
||||||
if self.engine == Engine.pdfrw:
|
if self.engine == Engine.pikepdf:
|
||||||
self.writer.trailer.Info = self.info
|
self.writer.save(outputstream, min_version=self.output_version, linearize=True)
|
||||||
|
elif self.engine == Engine.pdfrw:
|
||||||
|
self.writer.trailer.Info = self.writer.docinfo
|
||||||
|
# setting the version attribute of the pdfrw PdfWriter object will
|
||||||
|
# influence the behaviour of the write() function
|
||||||
|
self.writer.version = self.output_version
|
||||||
self.writer.write(outputstream)
|
self.writer.write(outputstream)
|
||||||
elif self.engine == Engine.internal:
|
elif self.engine == Engine.internal:
|
||||||
self.writer.tostream(self.info, outputstream)
|
self.writer.tostream(self.writer.docinfo, outputstream, self.output_version)
|
||||||
else:
|
else:
|
||||||
raise ValueError("unknown engine: %s" % self.engine)
|
raise ValueError("unknown engine: %s" % self.engine)
|
||||||
|
|
||||||
|
@ -1170,7 +1224,7 @@ def transcode_monochrome(imgdata):
|
||||||
|
|
||||||
def parse_png(rawdata):
|
def parse_png(rawdata):
|
||||||
pngidat = b""
|
pngidat = b""
|
||||||
palette = []
|
palette = b""
|
||||||
i = 16
|
i = 16
|
||||||
while i < len(rawdata):
|
while i < len(rawdata):
|
||||||
# once we can require Python >= 3.2 we can use int.from_bytes() instead
|
# once we can require Python >= 3.2 we can use int.from_bytes() instead
|
||||||
|
@ -1180,20 +1234,7 @@ def parse_png(rawdata):
|
||||||
if rawdata[i - 4 : i] == b"IDAT":
|
if rawdata[i - 4 : i] == b"IDAT":
|
||||||
pngidat += rawdata[i : i + n]
|
pngidat += rawdata[i : i + n]
|
||||||
elif rawdata[i - 4 : i] == b"PLTE":
|
elif rawdata[i - 4 : i] == b"PLTE":
|
||||||
# This could be as simple as saying "palette = rawdata[i:i+n]" but
|
palette += rawdata[i : i + n]
|
||||||
# pdfrw does only escape parenthesis and backslashes in the raw
|
|
||||||
# byte stream. But raw carriage return bytes are interpreted as
|
|
||||||
# line feed bytes by ghostscript. So instead we use the hex string
|
|
||||||
# format. pdfrw cannot write it but at least ghostscript is happy
|
|
||||||
# with it. We would also write out the palette in binary format
|
|
||||||
# (and escape more bytes) but since we cannot use pdfrw anyways,
|
|
||||||
# we choose the more human readable variant.
|
|
||||||
# See https://github.com/pmaupin/pdfrw/issues/147
|
|
||||||
for j in range(i, i + n, 3):
|
|
||||||
# with int.from_bytes() we would not have to prepend extra
|
|
||||||
# zeroes
|
|
||||||
color, = struct.unpack(">I", b"\x00" + rawdata[j : j + 3])
|
|
||||||
palette.append(color)
|
|
||||||
i += n
|
i += n
|
||||||
i += 12
|
i += 12
|
||||||
return pngidat, palette
|
return pngidat, palette
|
||||||
|
@ -3119,9 +3160,12 @@ RGB.""",
|
||||||
"--engine",
|
"--engine",
|
||||||
metavar="engine",
|
metavar="engine",
|
||||||
type=parse_enginearg,
|
type=parse_enginearg,
|
||||||
help="Choose PDF engine. Can be either internal or pdfrw. The "
|
help="Choose PDF engine. Can be either internal, pikepdf or pdfrw. "
|
||||||
"internal engine does not have additional requirements and writes out "
|
"The internal engine does not have additional requirements and writes "
|
||||||
"a human readable PDF. The pdfrw engine requires the pdfrw Python "
|
"out a human readable PDF. The pikepdf engine requires the pikepdf "
|
||||||
|
"Python module and qpdf library, is most featureful, can "
|
||||||
|
"linearize PDFs (\"fast web view\") and can compress more parts of it."
|
||||||
|
"The pdfrw engine requires the pdfrw Python "
|
||||||
"module but does not support unicode metadata (See "
|
"module but does not support unicode metadata (See "
|
||||||
"https://github.com/pmaupin/pdfrw/issues/39) or palette data (See "
|
"https://github.com/pmaupin/pdfrw/issues/39) or palette data (See "
|
||||||
"https://github.com/pmaupin/pdfrw/issues/128).",
|
"https://github.com/pmaupin/pdfrw/issues/128).",
|
||||||
|
|
|
@ -476,7 +476,7 @@ def test_suite():
|
||||||
setattr(TestImg2Pdf, "test_layout_%03d_im2" % i, layout_handler_im2)
|
setattr(TestImg2Pdf, "test_layout_%03d_im2" % i, layout_handler_im2)
|
||||||
|
|
||||||
files = os.listdir(os.path.join(HERE, "input"))
|
files = os.listdir(os.path.join(HERE, "input"))
|
||||||
for engine, test_name in [(a, b) for a in [img2pdf.Engine.internal, img2pdf.Engine.pdfrw]
|
for engine, test_name in [(a, b) for a in [img2pdf.Engine.internal, img2pdf.Engine.pdfrw, img2pdf.Engine.pikepdf]
|
||||||
for b in files]:
|
for b in files]:
|
||||||
inputf = os.path.join(HERE, "input", test_name)
|
inputf = os.path.join(HERE, "input", test_name)
|
||||||
if not os.path.isfile(inputf):
|
if not os.path.isfile(inputf):
|
||||||
|
@ -546,6 +546,7 @@ def test_suite():
|
||||||
{"/XObject"})
|
{"/XObject"})
|
||||||
self.assertEqual(cur_page.Resources.XObject.keys(),
|
self.assertEqual(cur_page.Resources.XObject.keys(),
|
||||||
{"/Im0"})
|
{"/Im0"})
|
||||||
|
if engine != img2pdf.Engine.pikepdf:
|
||||||
self.assertEqual(cur_page.Contents.Length,
|
self.assertEqual(cur_page.Contents.Length,
|
||||||
len(cur_page.Contents.read_bytes()))
|
len(cur_page.Contents.read_bytes()))
|
||||||
self.assertEqual(cur_page.Contents.read_bytes(),
|
self.assertEqual(cur_page.Contents.read_bytes(),
|
||||||
|
@ -666,6 +667,8 @@ def test_suite():
|
||||||
pass
|
pass
|
||||||
if engine == img2pdf.Engine.internal:
|
if engine == img2pdf.Engine.internal:
|
||||||
setattr(TestImg2Pdf, "test_%s_internal" % test_name, handle)
|
setattr(TestImg2Pdf, "test_%s_internal" % test_name, handle)
|
||||||
|
elif engine == img2pdf.Engine.pikepdf:
|
||||||
|
setattr(TestImg2Pdf, "test_%s_pikepdf" % test_name, handle)
|
||||||
elif engine == img2pdf.Engine.pdfrw:
|
elif engine == img2pdf.Engine.pdfrw:
|
||||||
setattr(TestImg2Pdf, "test_%s_pdfrw" % test_name, handle)
|
setattr(TestImg2Pdf, "test_%s_pdfrw" % test_name, handle)
|
||||||
else:
|
else:
|
||||||
|
|
3
test.sh
3
test.sh
|
@ -137,6 +137,9 @@ available_engines="internal"
|
||||||
if python3 -c "import pdfrw" 2>/dev/null; then
|
if python3 -c "import pdfrw" 2>/dev/null; then
|
||||||
available_engines="$available_engines pdfrw"
|
available_engines="$available_engines pdfrw"
|
||||||
fi
|
fi
|
||||||
|
if python3 -c "import pikepdf" 2>/dev/null; then
|
||||||
|
available_engines="$available_engines pikepdf"
|
||||||
|
fi
|
||||||
|
|
||||||
img2pdf()
|
img2pdf()
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue