Add support for JBIG2 (generic coding) #184

Open
ooBJ3u wants to merge 2 commits from ooBJ3u/img2pdf:main into main
2 changed files with 7 additions and 3 deletions
Showing only changes of commit 244600065d - Show all commits

View file

@ -1874,7 +1874,11 @@ def read_images(
imgformat = ImageFormat.JBIG2 imgformat = ImageFormat.JBIG2
if rawdata[:24] != b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a\x01\x00\x00\x00\x01\x00\x00\x00\x00\x30\x00\x01\x00\x00\x00\x13": if rawdata[:24] != b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a\x01\x00\x00\x00\x01\x00\x00\x00\x00\x30\x00\x01\x00\x00\x00\x13":
raise ImageOpenError( raise ImageOpenError(
"Unsupported JBIG2 format; only single-page generic coding is supported (e.g. from `jbig2enc`)" "Unsupported JBIG2 format; only single-page generic coding is supported (e.g. from `jbig2enc`)."
)
if rawdata[-22:] != b"\x00\x00\x00\x021\x00\x01\x00\x00\x00\x00\x00\x00\x00\x033\x00\x01\x00\x00\x00\x00":
Review

One question about the style of the code here:

Do you think it would be better to use hex instead of the character value for the 5th and 16th bytes (1 and 3)? IMO, I like that better for the consistency, but because the value is accurate as-is, I'm not opposed to leaving it.

One question about the style of the code here: Do you think it would be better to use hex instead of the character value for the 5th and 16th bytes (`1` and `3`)? IMO, I like that better for the consistency, but because the value is accurate as-is, I'm not opposed to leaving it.
Review

This is how Python prints the bytearray by default, so I figured that is fine.

This is how Python prints the bytearray by default, so I figured that is fine.
Review

Okay, that makes sense then. No objection from me.

Okay, that makes sense then. No objection from me.
raise ImageOpenError(
"Unsupported JBIG2 format; we expect end-of-page and end-of-file segments at the end (e.g. from `jbig2enc`)."
) )
elif rawdata[:14].lower() == b"id=imagemagick": elif rawdata[:14].lower() == b"id=imagemagick":
# image is in MIFF format # image is in MIFF format
@ -2126,7 +2130,7 @@ def read_images(
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata, rot imgdata, imgformat, default_dpi, colorspace, rawdata, rot
) )
streamdata = rawdata[13:] # Strip file header streamdata = rawdata[13:-22] # Strip file header and footer
return [ return [
( (
color, color,

View file

@ -7062,7 +7062,7 @@ def test_general(general_input, engine):
if imgprops.Filter in ["/DCTDecode", "/JPXDecode"]: if imgprops.Filter in ["/DCTDecode", "/JPXDecode"]:
assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata
elif imgprops.Filter == "/JBIG2Decode": elif imgprops.Filter == "/JBIG2Decode":
assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata[13:] # Strip file header assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata[13:-22] # Strip file header and footer.
elif imgprops.Filter == pikepdf.Array([pikepdf.Name.CCITTFaxDecode]): elif imgprops.Filter == pikepdf.Array([pikepdf.Name.CCITTFaxDecode]):
tiff_header = tiff_header_for_ccitt( tiff_header = tiff_header_for_ccitt(
int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4 int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4