Convert 8-bit PNG alpha channels to /SMasks in PDF
This commit is contained in:
parent
d03f331521
commit
af5ae5b9b6
1 changed files with 84 additions and 39 deletions
123
src/img2pdf.py
123
src/img2pdf.py
|
@ -742,6 +742,7 @@ class pdfdoc(object):
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
imgformat,
|
imgformat,
|
||||||
imgdata,
|
imgdata,
|
||||||
|
smaskdata,
|
||||||
imgwidthpdf,
|
imgwidthpdf,
|
||||||
imgheightpdf,
|
imgheightpdf,
|
||||||
imgxpdf,
|
imgxpdf,
|
||||||
|
@ -759,6 +760,8 @@ class pdfdoc(object):
|
||||||
artborder=None,
|
artborder=None,
|
||||||
iccp=None,
|
iccp=None,
|
||||||
):
|
):
|
||||||
|
assert color != Colorspace.RGBA or (imgformat == ImageFormat.PNG and smaskdata is not None)
|
||||||
|
|
||||||
if self.engine == Engine.pikepdf:
|
if self.engine == Engine.pikepdf:
|
||||||
PdfArray = pikepdf.Array
|
PdfArray = pikepdf.Array
|
||||||
PdfDict = pikepdf.Dictionary
|
PdfDict = pikepdf.Dictionary
|
||||||
|
@ -779,7 +782,7 @@ class pdfdoc(object):
|
||||||
|
|
||||||
if color == Colorspace["1"] or color == Colorspace.L:
|
if color == Colorspace["1"] or color == Colorspace.L:
|
||||||
colorspace = PdfName.DeviceGray
|
colorspace = PdfName.DeviceGray
|
||||||
elif color == Colorspace.RGB:
|
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
||||||
colorspace = PdfName.DeviceRGB
|
colorspace = PdfName.DeviceRGB
|
||||||
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
||||||
colorspace = PdfName.DeviceCMYK
|
colorspace = PdfName.DeviceCMYK
|
||||||
|
@ -818,7 +821,7 @@ class pdfdoc(object):
|
||||||
iccpdict[PdfName.Alternate] = colorspace
|
iccpdict[PdfName.Alternate] = colorspace
|
||||||
if color == Colorspace["1"] or color == Colorspace.L:
|
if color == Colorspace["1"] or color == Colorspace.L:
|
||||||
iccpdict[PdfName.N] = 1
|
iccpdict[PdfName.N] = 1
|
||||||
elif color == Colorspace.RGB:
|
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
||||||
iccpdict[PdfName.N] = 3
|
iccpdict[PdfName.N] = 3
|
||||||
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
||||||
iccpdict[PdfName.N] = 4
|
iccpdict[PdfName.N] = 4
|
||||||
|
@ -869,15 +872,34 @@ class pdfdoc(object):
|
||||||
decodeparms[PdfName.Rows] = imgheightpx
|
decodeparms[PdfName.Rows] = imgheightpx
|
||||||
image[PdfName.DecodeParms] = [decodeparms]
|
image[PdfName.DecodeParms] = [decodeparms]
|
||||||
elif imgformat is ImageFormat.PNG:
|
elif imgformat is ImageFormat.PNG:
|
||||||
decodeparms = PdfDict()
|
if color == Colorspace.RGBA:
|
||||||
decodeparms[PdfName.Predictor] = 15
|
if self.engine == Engine.pikepdf:
|
||||||
if color in [Colorspace.P, Colorspace["1"], Colorspace.L]:
|
smask = self.writer.make_stream(smaskdata)
|
||||||
decodeparms[PdfName.Colors] = 1
|
else:
|
||||||
|
smask = PdfDict(stream=convert_load(smaskdata))
|
||||||
|
smask[PdfName.Type] = PdfName.XObject
|
||||||
|
smask[PdfName.Subtype] = PdfName.Image
|
||||||
|
smask[PdfName.Filter] = PdfName.FlateDecode
|
||||||
|
smask[PdfName.Width] = imgwidthpx
|
||||||
|
smask[PdfName.Height] = imgheightpx
|
||||||
|
smask[PdfName.ColorSpace] = PdfName.DeviceGray
|
||||||
|
smask[PdfName.BitsPerComponent] = depth
|
||||||
|
|
||||||
|
image[PdfName.SMask] = smask
|
||||||
|
|
||||||
|
# /SMask requires PDF 1.4
|
||||||
|
if self.output_version < "1.4":
|
||||||
|
self.output_version = "1.4"
|
||||||
else:
|
else:
|
||||||
decodeparms[PdfName.Colors] = 3
|
decodeparms = PdfDict()
|
||||||
decodeparms[PdfName.Columns] = imgwidthpx
|
decodeparms[PdfName.Predictor] = 15
|
||||||
decodeparms[PdfName.BitsPerComponent] = depth
|
if color in [Colorspace.P, Colorspace["1"], Colorspace.L]:
|
||||||
image[PdfName.DecodeParms] = decodeparms
|
decodeparms[PdfName.Colors] = 1
|
||||||
|
else:
|
||||||
|
decodeparms[PdfName.Colors] = 3
|
||||||
|
decodeparms[PdfName.Columns] = imgwidthpx
|
||||||
|
decodeparms[PdfName.BitsPerComponent] = depth
|
||||||
|
image[PdfName.DecodeParms] = decodeparms
|
||||||
|
|
||||||
text = (
|
text = (
|
||||||
"q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ"
|
"q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ"
|
||||||
|
@ -954,6 +976,8 @@ class pdfdoc(object):
|
||||||
if self.engine == Engine.internal:
|
if self.engine == Engine.internal:
|
||||||
self.writer.addobj(content)
|
self.writer.addobj(content)
|
||||||
self.writer.addobj(image)
|
self.writer.addobj(image)
|
||||||
|
if smask is not None:
|
||||||
|
self.writer.addobj(smask)
|
||||||
if iccp is not None:
|
if iccp is not None:
|
||||||
self.writer.addobj(iccpdict)
|
self.writer.addobj(iccpdict)
|
||||||
|
|
||||||
|
@ -1183,8 +1207,10 @@ def get_imgmetadata(
|
||||||
# Search online for the 72.009 dpi problem for more info.
|
# Search online for the 72.009 dpi problem for more info.
|
||||||
ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
|
ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
|
||||||
ics = imgdata.mode
|
ics = imgdata.mode
|
||||||
|
|
||||||
if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info:
|
if imgformat == ImageFormat.PNG and ics == "RGBA":
|
||||||
|
logger.warning("Image contains an alpha channel which will be stored as a separate soft mask (/SMask) image in PDF.")
|
||||||
|
elif (ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info):
|
||||||
logger.warning("Image contains transparency which cannot be retained in PDF.")
|
logger.warning("Image contains transparency which cannot be retained in PDF.")
|
||||||
logger.warning("img2pdf will not perform a lossy operation.")
|
logger.warning("img2pdf will not perform a lossy operation.")
|
||||||
logger.warning("You can remove the alpha channel using imagemagick:")
|
logger.warning("You can remove the alpha channel using imagemagick:")
|
||||||
|
@ -1427,6 +1453,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
ndpi,
|
ndpi,
|
||||||
imgformat,
|
imgformat,
|
||||||
rawdata,
|
rawdata,
|
||||||
|
None,
|
||||||
imgwidthpx,
|
imgwidthpx,
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
[],
|
[],
|
||||||
|
@ -1483,6 +1510,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
ndpi,
|
ndpi,
|
||||||
ImageFormat.JPEG,
|
ImageFormat.JPEG,
|
||||||
rawdata[offset : offset + mpent["Size"]],
|
rawdata[offset : offset + mpent["Size"]],
|
||||||
|
None,
|
||||||
imgwidthpx,
|
imgwidthpx,
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
[],
|
[],
|
||||||
|
@ -1495,7 +1523,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
img_page_count += 1
|
img_page_count += 1
|
||||||
cleanup()
|
cleanup()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# We can directly embed the IDAT chunk of PNG images if the PNG is not
|
# We can directly embed the IDAT chunk of PNG images if the PNG is not
|
||||||
# interlaced
|
# interlaced
|
||||||
#
|
#
|
||||||
|
@ -1503,35 +1531,46 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
# or not. Thus, we retrieve that info manually by looking at byte 13 in the
|
# or not. Thus, we retrieve that info manually by looking at byte 13 in the
|
||||||
# IHDR chunk. We know where to find that in the file because the IHDR chunk
|
# IHDR chunk. We know where to find that in the file because the IHDR chunk
|
||||||
# must be the first chunk.
|
# must be the first chunk.
|
||||||
if imgformat == ImageFormat.PNG and rawdata[28] == 0:
|
if imgformat == ImageFormat.PNG:
|
||||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||||
imgdata, imgformat, default_dpi, colorspace, rawdata, rot
|
imgdata, imgformat, default_dpi, colorspace, rawdata, rot
|
||||||
)
|
)
|
||||||
pngidat, palette = parse_png(rawdata)
|
|
||||||
# PIL does not provide the information about the original bits per
|
if color == Colorspace.RGBA or rawdata[28] == 0:
|
||||||
# sample. Thus, we retrieve that info manually by looking at byte 9 in
|
if color == Colorspace.RGBA:
|
||||||
# the IHDR chunk. We know where to find that in the file because the
|
r, g, b, a = imgdata.split()
|
||||||
# IHDR chunk must be the first chunk
|
pngdata = zlib.compress(Image.merge("RGB", (r, g, b)).tobytes())
|
||||||
depth = rawdata[24]
|
smaskdata = zlib.compress(a.tobytes())
|
||||||
if depth not in [1, 2, 4, 8, 16]:
|
palette = None
|
||||||
raise ValueError("invalid bit depth: %d" % depth)
|
else:
|
||||||
logger.debug("read_images() embeds a PNG")
|
pngdata, palette = parse_png(rawdata)
|
||||||
cleanup()
|
smaskdata = None
|
||||||
return [
|
|
||||||
(
|
# PIL does not provide the information about the original bits per
|
||||||
color,
|
# sample. Thus, we retrieve that info manually by looking at byte 9 in
|
||||||
ndpi,
|
# the IHDR chunk. We know where to find that in the file because the
|
||||||
imgformat,
|
# IHDR chunk must be the first chunk
|
||||||
pngidat,
|
depth = rawdata[24]
|
||||||
imgwidthpx,
|
if depth not in [1, 2, 4, 8, 16]:
|
||||||
imgheightpx,
|
raise ValueError("invalid bit depth: %d" % depth)
|
||||||
palette,
|
logger.debug("read_images() embeds a PNG")
|
||||||
False,
|
cleanup()
|
||||||
depth,
|
return [
|
||||||
rotation,
|
(
|
||||||
iccp,
|
color,
|
||||||
)
|
ndpi,
|
||||||
]
|
imgformat,
|
||||||
|
pngdata,
|
||||||
|
smaskdata,
|
||||||
|
imgwidthpx,
|
||||||
|
imgheightpx,
|
||||||
|
palette,
|
||||||
|
False,
|
||||||
|
depth,
|
||||||
|
rotation,
|
||||||
|
iccp,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
# If our input is not JPEG or PNG, then we might have a format that
|
# If our input is not JPEG or PNG, then we might have a format that
|
||||||
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
||||||
|
@ -1615,6 +1654,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
ndpi,
|
ndpi,
|
||||||
ImageFormat.CCITTGroup4,
|
ImageFormat.CCITTGroup4,
|
||||||
rawdata,
|
rawdata,
|
||||||
|
None,
|
||||||
imgwidthpx,
|
imgwidthpx,
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
[],
|
[],
|
||||||
|
@ -1644,6 +1684,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
ndpi,
|
ndpi,
|
||||||
ImageFormat.CCITTGroup4,
|
ImageFormat.CCITTGroup4,
|
||||||
ccittdata,
|
ccittdata,
|
||||||
|
None,
|
||||||
imgwidthpx,
|
imgwidthpx,
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
[],
|
[],
|
||||||
|
@ -1682,6 +1723,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
ndpi,
|
ndpi,
|
||||||
imgformat,
|
imgformat,
|
||||||
imggz,
|
imggz,
|
||||||
|
None,
|
||||||
imgwidthpx,
|
imgwidthpx,
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
[],
|
[],
|
||||||
|
@ -1713,6 +1755,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
ndpi,
|
ndpi,
|
||||||
ImageFormat.PNG,
|
ImageFormat.PNG,
|
||||||
pngidat,
|
pngidat,
|
||||||
|
None,
|
||||||
imgwidthpx,
|
imgwidthpx,
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
palette,
|
palette,
|
||||||
|
@ -2118,6 +2161,7 @@ def convert(*images, **kwargs):
|
||||||
ndpi,
|
ndpi,
|
||||||
imgformat,
|
imgformat,
|
||||||
imgdata,
|
imgdata,
|
||||||
|
smaskdata,
|
||||||
imgwidthpx,
|
imgwidthpx,
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
palette,
|
palette,
|
||||||
|
@ -2171,6 +2215,7 @@ def convert(*images, **kwargs):
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
imgformat,
|
imgformat,
|
||||||
imgdata,
|
imgdata,
|
||||||
|
smaskdata,
|
||||||
imgwidthpdf,
|
imgwidthpdf,
|
||||||
imgheightpdf,
|
imgheightpdf,
|
||||||
imgxpdf,
|
imgxpdf,
|
||||||
|
|
Loading…
Reference in a new issue