Convert 8-bit PNG alpha channels to /SMasks in PDF

This commit is contained in:
Tamás Zahola 2021-06-12 22:03:56 +02:00 committed by Gogs
parent d03f331521
commit af5ae5b9b6

View file

@ -742,6 +742,7 @@ class pdfdoc(object):
imgheightpx, imgheightpx,
imgformat, imgformat,
imgdata, imgdata,
smaskdata,
imgwidthpdf, imgwidthpdf,
imgheightpdf, imgheightpdf,
imgxpdf, imgxpdf,
@ -759,6 +760,8 @@ class pdfdoc(object):
artborder=None, artborder=None,
iccp=None, iccp=None,
): ):
assert color != Colorspace.RGBA or (imgformat == ImageFormat.PNG and smaskdata is not None)
if self.engine == Engine.pikepdf: if self.engine == Engine.pikepdf:
PdfArray = pikepdf.Array PdfArray = pikepdf.Array
PdfDict = pikepdf.Dictionary PdfDict = pikepdf.Dictionary
@ -779,7 +782,7 @@ class pdfdoc(object):
if color == Colorspace["1"] or color == Colorspace.L: if color == Colorspace["1"] or color == Colorspace.L:
colorspace = PdfName.DeviceGray colorspace = PdfName.DeviceGray
elif color == Colorspace.RGB: elif color == Colorspace.RGB or color == Colorspace.RGBA:
colorspace = PdfName.DeviceRGB colorspace = PdfName.DeviceRGB
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
colorspace = PdfName.DeviceCMYK colorspace = PdfName.DeviceCMYK
@ -818,7 +821,7 @@ class pdfdoc(object):
iccpdict[PdfName.Alternate] = colorspace iccpdict[PdfName.Alternate] = colorspace
if color == Colorspace["1"] or color == Colorspace.L: if color == Colorspace["1"] or color == Colorspace.L:
iccpdict[PdfName.N] = 1 iccpdict[PdfName.N] = 1
elif color == Colorspace.RGB: elif color == Colorspace.RGB or color == Colorspace.RGBA:
iccpdict[PdfName.N] = 3 iccpdict[PdfName.N] = 3
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
iccpdict[PdfName.N] = 4 iccpdict[PdfName.N] = 4
@ -869,15 +872,34 @@ class pdfdoc(object):
decodeparms[PdfName.Rows] = imgheightpx decodeparms[PdfName.Rows] = imgheightpx
image[PdfName.DecodeParms] = [decodeparms] image[PdfName.DecodeParms] = [decodeparms]
elif imgformat is ImageFormat.PNG: elif imgformat is ImageFormat.PNG:
decodeparms = PdfDict() if color == Colorspace.RGBA:
decodeparms[PdfName.Predictor] = 15 if self.engine == Engine.pikepdf:
if color in [Colorspace.P, Colorspace["1"], Colorspace.L]: smask = self.writer.make_stream(smaskdata)
decodeparms[PdfName.Colors] = 1 else:
smask = PdfDict(stream=convert_load(smaskdata))
smask[PdfName.Type] = PdfName.XObject
smask[PdfName.Subtype] = PdfName.Image
smask[PdfName.Filter] = PdfName.FlateDecode
smask[PdfName.Width] = imgwidthpx
smask[PdfName.Height] = imgheightpx
smask[PdfName.ColorSpace] = PdfName.DeviceGray
smask[PdfName.BitsPerComponent] = depth
image[PdfName.SMask] = smask
# /SMask requires PDF 1.4
if self.output_version < "1.4":
self.output_version = "1.4"
else: else:
decodeparms[PdfName.Colors] = 3 decodeparms = PdfDict()
decodeparms[PdfName.Columns] = imgwidthpx decodeparms[PdfName.Predictor] = 15
decodeparms[PdfName.BitsPerComponent] = depth if color in [Colorspace.P, Colorspace["1"], Colorspace.L]:
image[PdfName.DecodeParms] = decodeparms decodeparms[PdfName.Colors] = 1
else:
decodeparms[PdfName.Colors] = 3
decodeparms[PdfName.Columns] = imgwidthpx
decodeparms[PdfName.BitsPerComponent] = depth
image[PdfName.DecodeParms] = decodeparms
text = ( text = (
"q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" "q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ"
@ -954,6 +976,8 @@ class pdfdoc(object):
if self.engine == Engine.internal: if self.engine == Engine.internal:
self.writer.addobj(content) self.writer.addobj(content)
self.writer.addobj(image) self.writer.addobj(image)
if smask is not None:
self.writer.addobj(smask)
if iccp is not None: if iccp is not None:
self.writer.addobj(iccpdict) self.writer.addobj(iccpdict)
@ -1183,8 +1207,10 @@ def get_imgmetadata(
# Search online for the 72.009 dpi problem for more info. # Search online for the 72.009 dpi problem for more info.
ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
ics = imgdata.mode ics = imgdata.mode
if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info: if imgformat == ImageFormat.PNG and ics == "RGBA":
logger.warning("Image contains an alpha channel which will be stored as a separate soft mask (/SMask) image in PDF.")
elif (ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info):
logger.warning("Image contains transparency which cannot be retained in PDF.") logger.warning("Image contains transparency which cannot be retained in PDF.")
logger.warning("img2pdf will not perform a lossy operation.") logger.warning("img2pdf will not perform a lossy operation.")
logger.warning("You can remove the alpha channel using imagemagick:") logger.warning("You can remove the alpha channel using imagemagick:")
@ -1427,6 +1453,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
ndpi, ndpi,
imgformat, imgformat,
rawdata, rawdata,
None,
imgwidthpx, imgwidthpx,
imgheightpx, imgheightpx,
[], [],
@ -1483,6 +1510,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
ndpi, ndpi,
ImageFormat.JPEG, ImageFormat.JPEG,
rawdata[offset : offset + mpent["Size"]], rawdata[offset : offset + mpent["Size"]],
None,
imgwidthpx, imgwidthpx,
imgheightpx, imgheightpx,
[], [],
@ -1495,7 +1523,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
img_page_count += 1 img_page_count += 1
cleanup() cleanup()
return result return result
# We can directly embed the IDAT chunk of PNG images if the PNG is not # We can directly embed the IDAT chunk of PNG images if the PNG is not
# interlaced # interlaced
# #
@ -1503,35 +1531,46 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
# or not. Thus, we retrieve that info manually by looking at byte 13 in the # or not. Thus, we retrieve that info manually by looking at byte 13 in the
# IHDR chunk. We know where to find that in the file because the IHDR chunk # IHDR chunk. We know where to find that in the file because the IHDR chunk
# must be the first chunk. # must be the first chunk.
if imgformat == ImageFormat.PNG and rawdata[28] == 0: if imgformat == ImageFormat.PNG:
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata, rot imgdata, imgformat, default_dpi, colorspace, rawdata, rot
) )
pngidat, palette = parse_png(rawdata)
# PIL does not provide the information about the original bits per if color == Colorspace.RGBA or rawdata[28] == 0:
# sample. Thus, we retrieve that info manually by looking at byte 9 in if color == Colorspace.RGBA:
# the IHDR chunk. We know where to find that in the file because the r, g, b, a = imgdata.split()
# IHDR chunk must be the first chunk pngdata = zlib.compress(Image.merge("RGB", (r, g, b)).tobytes())
depth = rawdata[24] smaskdata = zlib.compress(a.tobytes())
if depth not in [1, 2, 4, 8, 16]: palette = None
raise ValueError("invalid bit depth: %d" % depth) else:
logger.debug("read_images() embeds a PNG") pngdata, palette = parse_png(rawdata)
cleanup() smaskdata = None
return [
( # PIL does not provide the information about the original bits per
color, # sample. Thus, we retrieve that info manually by looking at byte 9 in
ndpi, # the IHDR chunk. We know where to find that in the file because the
imgformat, # IHDR chunk must be the first chunk
pngidat, depth = rawdata[24]
imgwidthpx, if depth not in [1, 2, 4, 8, 16]:
imgheightpx, raise ValueError("invalid bit depth: %d" % depth)
palette, logger.debug("read_images() embeds a PNG")
False, cleanup()
depth, return [
rotation, (
iccp, color,
) ndpi,
] imgformat,
pngdata,
smaskdata,
imgwidthpx,
imgheightpx,
palette,
False,
depth,
rotation,
iccp,
)
]
# If our input is not JPEG or PNG, then we might have a format that # If our input is not JPEG or PNG, then we might have a format that
# supports multiple frames (like TIFF or GIF), so we need a loop to # supports multiple frames (like TIFF or GIF), so we need a loop to
@ -1615,6 +1654,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
ndpi, ndpi,
ImageFormat.CCITTGroup4, ImageFormat.CCITTGroup4,
rawdata, rawdata,
None,
imgwidthpx, imgwidthpx,
imgheightpx, imgheightpx,
[], [],
@ -1644,6 +1684,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
ndpi, ndpi,
ImageFormat.CCITTGroup4, ImageFormat.CCITTGroup4,
ccittdata, ccittdata,
None,
imgwidthpx, imgwidthpx,
imgheightpx, imgheightpx,
[], [],
@ -1682,6 +1723,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
ndpi, ndpi,
imgformat, imgformat,
imggz, imggz,
None,
imgwidthpx, imgwidthpx,
imgheightpx, imgheightpx,
[], [],
@ -1713,6 +1755,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
ndpi, ndpi,
ImageFormat.PNG, ImageFormat.PNG,
pngidat, pngidat,
None,
imgwidthpx, imgwidthpx,
imgheightpx, imgheightpx,
palette, palette,
@ -2118,6 +2161,7 @@ def convert(*images, **kwargs):
ndpi, ndpi,
imgformat, imgformat,
imgdata, imgdata,
smaskdata,
imgwidthpx, imgwidthpx,
imgheightpx, imgheightpx,
palette, palette,
@ -2171,6 +2215,7 @@ def convert(*images, **kwargs):
imgheightpx, imgheightpx,
imgformat, imgformat,
imgdata, imgdata,
smaskdata,
imgwidthpdf, imgwidthpdf,
imgheightpdf, imgheightpdf,
imgxpdf, imgxpdf,