parent
f597887088
commit
acc25a4926
3 changed files with 183 additions and 15 deletions
|
@ -827,8 +827,10 @@ class pdfdoc(object):
|
||||||
artborder=None,
|
artborder=None,
|
||||||
iccp=None,
|
iccp=None,
|
||||||
):
|
):
|
||||||
assert (color != Colorspace.RGBA and color != Colorspace.LA) or (
|
assert (
|
||||||
imgformat == ImageFormat.PNG and smaskdata is not None
|
color not in [Colorspace.RGBA, Colorspace.LA]
|
||||||
|
or (imgformat == ImageFormat.PNG and smaskdata is not None)
|
||||||
|
or imgformat == ImageFormat.JPEG2000
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.engine == Engine.pikepdf:
|
if self.engine == Engine.pikepdf:
|
||||||
|
@ -852,7 +854,13 @@ class pdfdoc(object):
|
||||||
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
|
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
|
||||||
colorspace = PdfName.DeviceGray
|
colorspace = PdfName.DeviceGray
|
||||||
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
||||||
colorspace = PdfName.DeviceRGB
|
if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000:
|
||||||
|
# there is no DeviceRGBA and for JPXDecode it is okay to have
|
||||||
|
# no colorspace as the pdf reader is supposed to get this info
|
||||||
|
# from the jpeg2000 payload itself
|
||||||
|
colorspace = None
|
||||||
|
else:
|
||||||
|
colorspace = PdfName.DeviceRGB
|
||||||
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
||||||
colorspace = PdfName.DeviceCMYK
|
colorspace = PdfName.DeviceCMYK
|
||||||
elif color == Colorspace.P:
|
elif color == Colorspace.P:
|
||||||
|
@ -923,7 +931,8 @@ class pdfdoc(object):
|
||||||
image[PdfName.Filter] = ofilter
|
image[PdfName.Filter] = ofilter
|
||||||
image[PdfName.Width] = imgwidthpx
|
image[PdfName.Width] = imgwidthpx
|
||||||
image[PdfName.Height] = imgheightpx
|
image[PdfName.Height] = imgheightpx
|
||||||
image[PdfName.ColorSpace] = colorspace
|
if colorspace is not None:
|
||||||
|
image[PdfName.ColorSpace] = colorspace
|
||||||
image[PdfName.BitsPerComponent] = depth
|
image[PdfName.BitsPerComponent] = depth
|
||||||
|
|
||||||
smask = None
|
smask = None
|
||||||
|
@ -1292,7 +1301,7 @@ def get_imgmetadata(
|
||||||
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
|
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
|
||||||
# this codepath gets called if the PIL installation is not able to
|
# this codepath gets called if the PIL installation is not able to
|
||||||
# handle JPEG2000 files
|
# handle JPEG2000 files
|
||||||
imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
|
imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = parsejp2(rawdata)
|
||||||
|
|
||||||
if hdpi is None:
|
if hdpi is None:
|
||||||
hdpi = default_dpi
|
hdpi = default_dpi
|
||||||
|
@ -1312,7 +1321,7 @@ def get_imgmetadata(
|
||||||
ics = imgdata.mode
|
ics = imgdata.mode
|
||||||
|
|
||||||
# GIF and PNG files with transparency are supported
|
# GIF and PNG files with transparency are supported
|
||||||
if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and (
|
if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
|
||||||
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
|
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
|
||||||
):
|
):
|
||||||
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
||||||
|
@ -1828,10 +1837,13 @@ def read_images(
|
||||||
raise JpegColorspaceError("jpeg can't be monochrome")
|
raise JpegColorspaceError("jpeg can't be monochrome")
|
||||||
if color == Colorspace["P"]:
|
if color == Colorspace["P"]:
|
||||||
raise JpegColorspaceError("jpeg can't have a color palette")
|
raise JpegColorspaceError("jpeg can't have a color palette")
|
||||||
if color == Colorspace["RGBA"]:
|
if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000:
|
||||||
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
||||||
logger.debug("read_images() embeds a JPEG")
|
logger.debug("read_images() embeds a JPEG")
|
||||||
cleanup()
|
cleanup()
|
||||||
|
depth = 8
|
||||||
|
if imgformat == ImageFormat.JPEG2000:
|
||||||
|
_, _, _, _, _, _, depth = parsejp2(rawdata)
|
||||||
return [
|
return [
|
||||||
(
|
(
|
||||||
color,
|
color,
|
||||||
|
@ -1843,7 +1855,7 @@ def read_images(
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
[],
|
[],
|
||||||
False,
|
False,
|
||||||
8,
|
depth,
|
||||||
rotation,
|
rotation,
|
||||||
iccp,
|
iccp,
|
||||||
)
|
)
|
||||||
|
|
|
@ -361,6 +361,8 @@ def compare(im1, im2, exact, icc, cmyk):
|
||||||
+ [
|
+ [
|
||||||
"-metric",
|
"-metric",
|
||||||
"AE",
|
"AE",
|
||||||
|
"-alpha",
|
||||||
|
"off",
|
||||||
im1,
|
im1,
|
||||||
im2,
|
im2,
|
||||||
"null:",
|
"null:",
|
||||||
|
@ -1216,6 +1218,74 @@ def jpg_2000_img(tmp_path_factory, tmp_normal_png):
|
||||||
in_img.unlink()
|
in_img.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def jpg_2000_rgba8_img(tmp_path_factory, tmp_alpha_png):
|
||||||
|
in_img = tmp_path_factory.mktemp("jpg_2000_rgba8") / "in.jp2"
|
||||||
|
subprocess.check_call(CONVERT + [str(tmp_alpha_png), "-depth", "8", str(in_img)])
|
||||||
|
identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
|
||||||
|
assert len(identify) == 1
|
||||||
|
# somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
|
||||||
|
# put into an array, here we cater for the older version containing just
|
||||||
|
# the bare dictionary
|
||||||
|
if "image" in identify:
|
||||||
|
identify = [identify]
|
||||||
|
assert "image" in identify[0]
|
||||||
|
assert identify[0]["image"].get("format") == "JP2", str(identify)
|
||||||
|
assert identify[0]["image"].get("mimeType") == "image/jp2", str(identify)
|
||||||
|
assert identify[0]["image"].get("geometry") == {
|
||||||
|
"width": 60,
|
||||||
|
"height": 60,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
}, str(identify)
|
||||||
|
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
|
||||||
|
assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify)
|
||||||
|
assert identify[0]["image"].get("depth") == 8, str(identify)
|
||||||
|
assert identify[0]["image"].get("pageGeometry") == {
|
||||||
|
"width": 60,
|
||||||
|
"height": 60,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
}, str(identify)
|
||||||
|
assert identify[0]["image"].get("compression") == "JPEG2000", str(identify)
|
||||||
|
yield in_img
|
||||||
|
in_img.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def jpg_2000_rgba16_img(tmp_path_factory, tmp_alpha_png):
|
||||||
|
in_img = tmp_path_factory.mktemp("jpg_2000_rgba16") / "in.jp2"
|
||||||
|
subprocess.check_call(CONVERT + [str(tmp_alpha_png), str(in_img)])
|
||||||
|
identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
|
||||||
|
assert len(identify) == 1
|
||||||
|
# somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
|
||||||
|
# put into an array, here we cater for the older version containing just
|
||||||
|
# the bare dictionary
|
||||||
|
if "image" in identify:
|
||||||
|
identify = [identify]
|
||||||
|
assert "image" in identify[0]
|
||||||
|
assert identify[0]["image"].get("format") == "JP2", str(identify)
|
||||||
|
assert identify[0]["image"].get("mimeType") == "image/jp2", str(identify)
|
||||||
|
assert identify[0]["image"].get("geometry") == {
|
||||||
|
"width": 60,
|
||||||
|
"height": 60,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
}, str(identify)
|
||||||
|
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
|
||||||
|
assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify)
|
||||||
|
assert identify[0]["image"].get("depth") == 16, str(identify)
|
||||||
|
assert identify[0]["image"].get("pageGeometry") == {
|
||||||
|
"width": 60,
|
||||||
|
"height": 60,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
}, str(identify)
|
||||||
|
assert identify[0]["image"].get("compression") == "JPEG2000", str(identify)
|
||||||
|
yield in_img
|
||||||
|
in_img.unlink()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def png_rgb8_img(tmp_normal_png):
|
def png_rgb8_img(tmp_normal_png):
|
||||||
in_img = tmp_normal_png
|
in_img = tmp_normal_png
|
||||||
|
@ -4068,6 +4138,60 @@ def jpg_2000_pdf(tmp_path_factory, jpg_2000_img, request):
|
||||||
out_pdf.unlink()
|
out_pdf.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
|
||||||
|
def jpg_2000_rgba8_pdf(tmp_path_factory, jpg_2000_rgba8_img, request):
|
||||||
|
out_pdf = tmp_path_factory.mktemp("jpg_2000_rgba8_pdf") / "out.pdf"
|
||||||
|
subprocess.check_call(
|
||||||
|
[
|
||||||
|
img2pdfprog,
|
||||||
|
"--producer=",
|
||||||
|
"--nodate",
|
||||||
|
"--engine=" + request.param,
|
||||||
|
"--output=" + str(out_pdf),
|
||||||
|
jpg_2000_rgba8_img,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
with pikepdf.open(str(out_pdf)) as p:
|
||||||
|
assert (
|
||||||
|
p.pages[0].Contents.read_bytes()
|
||||||
|
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
|
||||||
|
)
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
|
||||||
|
assert not hasattr(p.pages[0].Resources.XObject.Im0, "ColorSpace")
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Filter == "/JPXDecode"
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Height == 60
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Width == 60
|
||||||
|
yield out_pdf
|
||||||
|
out_pdf.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
|
||||||
|
def jpg_2000_rgba16_pdf(tmp_path_factory, jpg_2000_rgba16_img, request):
|
||||||
|
out_pdf = tmp_path_factory.mktemp("jpg_2000_rgba16_pdf") / "out.pdf"
|
||||||
|
subprocess.check_call(
|
||||||
|
[
|
||||||
|
img2pdfprog,
|
||||||
|
"--producer=",
|
||||||
|
"--nodate",
|
||||||
|
"--engine=" + request.param,
|
||||||
|
"--output=" + str(out_pdf),
|
||||||
|
jpg_2000_rgba16_img,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
with pikepdf.open(str(out_pdf)) as p:
|
||||||
|
assert (
|
||||||
|
p.pages[0].Contents.read_bytes()
|
||||||
|
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
|
||||||
|
)
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 16
|
||||||
|
assert not hasattr(p.pages[0].Resources.XObject.Im0, "ColorSpace")
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Filter == "/JPXDecode"
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Height == 60
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Width == 60
|
||||||
|
yield out_pdf
|
||||||
|
out_pdf.unlink()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
|
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
|
||||||
def png_rgb8_pdf(tmp_path_factory, png_rgb8_img, request):
|
def png_rgb8_pdf(tmp_path_factory, png_rgb8_img, request):
|
||||||
out_pdf = tmp_path_factory.mktemp("png_rgb8_pdf") / "out.pdf"
|
out_pdf = tmp_path_factory.mktemp("png_rgb8_pdf") / "out.pdf"
|
||||||
|
@ -5461,6 +5585,39 @@ def test_jpg_2000(tmp_path_factory, jpg_2000_img, jpg_2000_pdf):
|
||||||
compare_pdfimages_jp2(tmpdir, jpg_2000_img, jpg_2000_pdf)
|
compare_pdfimages_jp2(tmpdir, jpg_2000_img, jpg_2000_pdf)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform in ["win32"],
|
||||||
|
reason="test utilities not available on Windows and MacOS",
|
||||||
|
)
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
not HAVE_JP2, reason="requires imagemagick with support for jpeg2000"
|
||||||
|
)
|
||||||
|
def test_jpg_2000_rgba8(tmp_path_factory, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf):
|
||||||
|
tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba8")
|
||||||
|
compare_ghostscript(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
|
||||||
|
compare_poppler(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
|
||||||
|
# compare_mupdf(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
|
||||||
|
compare_pdfimages_jp2(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform in ["win32"],
|
||||||
|
reason="test utilities not available on Windows and MacOS",
|
||||||
|
)
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
not HAVE_JP2, reason="requires imagemagick with support for jpeg2000"
|
||||||
|
)
|
||||||
|
def test_jpg_2000_rgba16(tmp_path_factory, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf):
|
||||||
|
tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba16")
|
||||||
|
compare_ghostscript(
|
||||||
|
tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf, gsdevice="tiff48nc"
|
||||||
|
)
|
||||||
|
# poppler outputs 8-bit RGB so the comparison will not be exact
|
||||||
|
# compare_poppler(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf, exact=False)
|
||||||
|
# compare_mupdf(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf)
|
||||||
|
compare_pdfimages_jp2(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
sys.platform in ["win32"],
|
sys.platform in ["win32"],
|
||||||
reason="test utilities not available on Windows and MacOS",
|
reason="test utilities not available on Windows and MacOS",
|
||||||
|
|
13
src/jp2.py
13
src/jp2.py
|
@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes):
|
||||||
|
|
||||||
|
|
||||||
def parse_ihdr(data):
|
def parse_ihdr(data):
|
||||||
height = struct.unpack(">I", data[0:4])[0]
|
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
||||||
width = struct.unpack(">I", data[4:8])[0]
|
return width, height, channels, bpp+1
|
||||||
return width, height
|
|
||||||
|
|
||||||
|
|
||||||
def parse_colr(data):
|
def parse_colr(data):
|
||||||
|
@ -85,13 +84,13 @@ def parse_jp2h(data):
|
||||||
while byteStart < noBytes and boxLengthValue != 0:
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
if boxType == b"ihdr":
|
if boxType == b"ihdr":
|
||||||
width, height = parse_ihdr(boxContents)
|
width, height, channels, bpp = parse_ihdr(boxContents)
|
||||||
elif boxType == b"colr":
|
elif boxType == b"colr":
|
||||||
colorspace = parse_colr(boxContents)
|
colorspace = parse_colr(boxContents)
|
||||||
elif boxType == b"res ":
|
elif boxType == b"res ":
|
||||||
hdpi, vdpi = parse_res(boxContents)
|
hdpi, vdpi = parse_res(boxContents)
|
||||||
byteStart = byteEnd
|
byteStart = byteEnd
|
||||||
return (width, height, colorspace, hdpi, vdpi)
|
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||||
|
|
||||||
|
|
||||||
def parsejp2(data):
|
def parsejp2(data):
|
||||||
|
@ -102,7 +101,7 @@ def parsejp2(data):
|
||||||
while byteStart < noBytes and boxLengthValue != 0:
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
if boxType == b"jp2h":
|
if boxType == b"jp2h":
|
||||||
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(boxContents)
|
||||||
break
|
break
|
||||||
byteStart = byteEnd
|
byteStart = byteEnd
|
||||||
if not width:
|
if not width:
|
||||||
|
@ -112,7 +111,7 @@ def parsejp2(data):
|
||||||
if not colorspace:
|
if not colorspace:
|
||||||
raise Exception("no colorspace in jp2 header")
|
raise Exception("no colorspace in jp2 header")
|
||||||
# retrieving the dpi is optional so we do not error out if not present
|
# retrieving the dpi is optional so we do not error out if not present
|
||||||
return (width, height, colorspace, hdpi, vdpi)
|
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in a new issue