Interlaced PNGs cannot be directly embedded but have to be re-encoded

main
parent 7244d2c6ed
commit d9a6c9db03
Signed by untrusted user: josch
GPG Key ID: F2CBA5C78FBD83E1

@ -759,6 +759,8 @@ def read_images(rawdata, colorspace, first_frame_only=False):
# depending on the input format, determine whether to pass the raw # depending on the input format, determine whether to pass the raw
# image or the zlib compressed color information # image or the zlib compressed color information
# JPEG and JPEG2000 can be embedded into the PDF as-is
if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata) imgdata, imgformat, default_dpi, colorspace, rawdata)
@ -770,71 +772,81 @@ def read_images(rawdata, colorspace, first_frame_only=False):
raise JpegColorspaceError("jpeg can't have an alpha channel") raise JpegColorspaceError("jpeg can't have an alpha channel")
im.close() im.close()
return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [])] return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [])]
elif imgformat == ImageFormat.PNG:
# We can directly embed the IDAT chunk of PNG images if the PNG is not
# interlaced
#
# PIL does not provide the information whether a PNG was stored interlaced
# or not. Thus, we retrieve that info manually by looking at byte 13 in the
# IHDR chunk. We know where to find that in the file because the IHDR chunk
# must be the first chunk.
if imgformat == ImageFormat.PNG and rawdata[28] == 0:
color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata) imgdata, imgformat, default_dpi, colorspace, rawdata)
pngidat, palette = parse_png(rawdata) pngidat, palette = parse_png(rawdata)
return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, palette)] return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, palette)]
else:
result = []
img_page_count = 0
# loop through all frames of the image (example: multipage TIFF)
while True:
try:
imgdata.seek(img_page_count)
except EOFError:
break
if first_frame_only and img_page_count > 0: # Everything else has to be encoded
break
logging.debug("Converting frame: %d" % img_page_count) result = []
img_page_count = 0
# loop through all frames of the image (example: multipage TIFF)
while True:
try:
imgdata.seek(img_page_count)
except EOFError:
break
color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( if first_frame_only and img_page_count > 0:
imgdata, imgformat, default_dpi, colorspace) break
newimg = None logging.debug("Converting frame: %d" % img_page_count)
if color == Colorspace['1']:
try: color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
ccittdata = transcode_monochrome(imgdata) imgdata, imgformat, default_dpi, colorspace)
imgformat = ImageFormat.CCITTGroup4
result.append((color, ndpi, imgformat, ccittdata, newimg = None
imgwidthpx, imgheightpx)) if color == Colorspace['1']:
img_page_count += 1 try:
continue ccittdata = transcode_monochrome(imgdata)
except Exception as e: imgformat = ImageFormat.CCITTGroup4
logging.debug(e) result.append((color, ndpi, imgformat, ccittdata,
logging.debug("Converting colorspace 1 to L") imgwidthpx, imgheightpx))
newimg = imgdata.convert('L') img_page_count += 1
color = Colorspace.L continue
elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, except Exception as e:
Colorspace["CMYK;I"], Colorspace.P]: logging.debug(e)
logging.debug("Colorspace is OK: %s", color) logging.debug("Converting colorspace 1 to L")
newimg = imgdata newimg = imgdata.convert('L')
elif color in [Colorspace.RGBA, Colorspace.other]: color = Colorspace.L
logging.debug("Converting colorspace %s to RGB", color) elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
newimg = imgdata.convert('RGB') Colorspace["CMYK;I"], Colorspace.P]:
color = Colorspace.RGB logging.debug("Colorspace is OK: %s", color)
else: newimg = imgdata
raise ValueError("unknown colorspace: %s" % color.name) elif color in [Colorspace.RGBA, Colorspace.other]:
# cheapo version to retrieve a PNG encoding of the payload is to logging.debug("Converting colorspace %s to RGB", color)
# just save it with PIL. In the future this could be replaced by newimg = imgdata.convert('RGB')
# dedicated function applying the Paeth PNG filter to the raw pixel color = Colorspace.RGB
pngbuffer = BytesIO() else:
newimg.save(pngbuffer, format="png") raise ValueError("unknown colorspace: %s" % color.name)
pngidat, palette = parse_png(pngbuffer.getvalue()) # cheapo version to retrieve a PNG encoding of the payload is to
imgformat = ImageFormat.PNG # just save it with PIL. In the future this could be replaced by
result.append((color, ndpi, imgformat, pngidat, imgwidthpx, # dedicated function applying the Paeth PNG filter to the raw pixel
imgheightpx, palette)) pngbuffer = BytesIO()
img_page_count += 1 newimg.save(pngbuffer, format="png")
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the pngidat, palette = parse_png(pngbuffer.getvalue())
# close() method imgformat = ImageFormat.PNG
try: result.append((color, ndpi, imgformat, pngidat, imgwidthpx,
imgdata.close() imgheightpx, palette))
except AttributeError: img_page_count += 1
pass # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
im.close() # close() method
return result try:
imgdata.close()
except AttributeError:
pass
im.close()
return result
# converts a length in pixels to a length in PDF units (1/72 of an inch) # converts a length in pixels to a length in PDF units (1/72 of an inch)

Loading…
Cancel
Save