forked from josch/img2pdf
JPEG2000 support
This commit is contained in:
parent
571266a513
commit
0bd841c530
2 changed files with 63 additions and 35 deletions
53
README.md
53
README.md
|
@ -1,17 +1,17 @@
|
|||
img2pdf
|
||||
=======
|
||||
|
||||
Lossless conversion of images to PDF without unnecessarily re-encoding JPEG
|
||||
files. Thus, no loss of quality and no unnecessary large output file.
|
||||
Lossless conversion of images to PDF without unnecessarily re-encoding JPEG and
|
||||
JPEG2000 files. Thus, no loss of quality and no unnecessary large output file.
|
||||
|
||||
background
|
||||
----------
|
||||
|
||||
PDF is able to embed JPEG images as they are without re-encoding them (and
|
||||
hence loosing quality) but I was missing a tool to do this automatically, thus
|
||||
I wrote this piece of python code.
|
||||
PDF is able to embed JPEG and JPEG2000 images as they are without re-encoding
|
||||
them (and hence loosing quality) but I was missing a tool to do this
|
||||
automatically, thus I wrote this piece of python code.
|
||||
|
||||
If you know how to embed JPEG images into a PDF container without
|
||||
If you know how to embed JPEG and JPEG2000 images into a PDF container without
|
||||
recompression, using existing tools, please contact me so that I can put this
|
||||
code into the garbage bin :D
|
||||
|
||||
|
@ -19,12 +19,12 @@ functionality
|
|||
-------------
|
||||
|
||||
The program will take image filenames from commandline arguments and output a
|
||||
PDF file with them embedded into it. If the input image is a JPEG file, it will
|
||||
be included as-is without any processing. If it is in any other format, the
|
||||
image will be included as zip-encoded RGB. As a result, this tool will be able
|
||||
to lossless wrap any image into a PDF container while performing better (in
|
||||
terms of quality/filesize ratio) than existing tools in case the input image is
|
||||
a JPEG.
|
||||
PDF file with them embedded into it. If the input image is a JPEG or JPEG2000
|
||||
file, it will be included as-is without any processing. If it is in any other
|
||||
format, the image will be included as zip-encoded RGB. As a result, this tool
|
||||
will be able to lossless wrap any image into a PDF container while performing
|
||||
better (in terms of quality/filesize ratio) than existing tools in case the
|
||||
input image is a JPEG or JPEG2000 file.
|
||||
|
||||
For the record, the imagemagick command to lossless convert any image to
|
||||
PDF using zip-encoding, is:
|
||||
|
@ -32,24 +32,25 @@ PDF using zip-encoding, is:
|
|||
convert input.jpg -compress Zip output.pdf
|
||||
|
||||
The downside is, that using imagemagick like this will make the resulting PDF
|
||||
files a few times bigger than the input JPEG and can also not output a
|
||||
multipage PDF.
|
||||
files a few times bigger than the input JPEG or JPEG2000 file and can also not
|
||||
output a multipage PDF.
|
||||
|
||||
img2pdf is able to output a PDF with multiple pages if more than one input
|
||||
image is given, losslessly embed JPEGs into a PDF container without adding more
|
||||
overhead than the PDF structure itself and will save all other graphics formats
|
||||
using lossless zip-compression.
|
||||
image is given, losslessly embed JPEG and JPEG2000 files into a PDF container
|
||||
without adding more overhead than the PDF structure itself and will save all
|
||||
other graphics formats using lossless zip-compression.
|
||||
|
||||
bugs
|
||||
----
|
||||
|
||||
If you find a JPEG that, when embedded can not be read by the Adobe Acrobat
|
||||
Reader, please contact me.
|
||||
If you find a JPEG or JPEG2000 file that, when embedded can not be read by the
|
||||
Adobe Acrobat Reader, please contact me.
|
||||
|
||||
For lossless conversion of other formats than JPEG, zip/flate encoding is used.
|
||||
This choice is based on a number of tests I did on images. I converted them
|
||||
into PDF using imagemagick and all compressions it has to offer and then
|
||||
compared the output size of the lossless variants. In all my tests, zip/flate
|
||||
encoding performed best. You can verify my findings using the test_comp.sh
|
||||
script with any input image given as a commandline argument. If you find an
|
||||
input file that is outperformed by another lossless compression, contact me.
|
||||
For lossless conversion of other formats than JPEG or JPEG2000 files, zip/flate
|
||||
encoding is used. This choice is based on a number of tests I did on images.
|
||||
I converted them into PDF using imagemagick and all compressions it has to
|
||||
offer and then compared the output size of the lossless variants. In all my
|
||||
tests, zip/flate encoding performed best. You can verify my findings using the
|
||||
test_comp.sh script with any input image given as a commandline argument. If
|
||||
you find an input file that is outperformed by another lossless compression,
|
||||
contact me.
|
||||
|
|
45
img2pdf.py
45
img2pdf.py
|
@ -4,6 +4,7 @@ import Image
|
|||
import sys
|
||||
import zlib
|
||||
import argparse
|
||||
import struct
|
||||
from datetime import datetime
|
||||
|
||||
def parse(cont, indent=1):
|
||||
|
@ -38,6 +39,8 @@ class obj():
|
|||
def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
||||
creationdate=None, moddate=None, subject=None, keywords=None):
|
||||
|
||||
version = 3 # default pdf version 1.3
|
||||
|
||||
now = datetime.now()
|
||||
|
||||
info = dict()
|
||||
|
@ -67,15 +70,32 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
|||
pagestuples = list()
|
||||
|
||||
for im in images:
|
||||
imgdata = Image.open(im)
|
||||
width, height = imgdata.size
|
||||
if dpi:
|
||||
dpi_x, dpi_y = dpi, dpi
|
||||
try:
|
||||
imgdata = Image.open(im)
|
||||
except IOError:
|
||||
# test if it is a jpeg2000 image
|
||||
im.seek(0)
|
||||
if im.read(12) != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||
print "cannot read input image"
|
||||
exit(1)
|
||||
# image is jpeg2000
|
||||
imgformat = "JP2"
|
||||
im.seek(48)
|
||||
height, width = struct.unpack(">II", im.read(8))
|
||||
color = "RGB" # TODO: read real colorspace
|
||||
if dpi:
|
||||
dpi_x, dpi_y = dpi, dpi
|
||||
else:
|
||||
dpi_x, dpi_y = (96, 96) # TODO: read real dpi
|
||||
else:
|
||||
dpi_x, dpi_y = imgdata.info.get("dpi", (96, 96))
|
||||
pdf_x, pdf_y = 72.0*width/dpi_x, 72.0*height/dpi_y # pdf units = 1/72 inch
|
||||
imgformat = imgdata.format
|
||||
color = imgdata.mode
|
||||
width, height = imgdata.size
|
||||
if dpi:
|
||||
dpi_x, dpi_y = dpi, dpi
|
||||
else:
|
||||
dpi_x, dpi_y = imgdata.info.get("dpi", (96, 96))
|
||||
imgformat = imgdata.format
|
||||
color = imgdata.mode
|
||||
|
||||
if color == 'L':
|
||||
color = "/DeviceGray"
|
||||
elif color == 'RGB':
|
||||
|
@ -84,11 +104,18 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
|||
print "unsupported color space:", color
|
||||
exit(1)
|
||||
|
||||
pdf_x, pdf_y = 72.0*width/dpi_x, 72.0*height/dpi_y # pdf units = 1/72 inch
|
||||
|
||||
# either embed the whole jpeg or deflate the bitmap representation
|
||||
if imgformat is "JPEG":
|
||||
ofilter = [ "/DCTDecode" ]
|
||||
im.seek(0)
|
||||
imgdata = im.read()
|
||||
elif imgformat is "JP2":
|
||||
ofilter = [ "/JPXDecode" ]
|
||||
im.seek(0)
|
||||
imgdata = im.read()
|
||||
version = 5 # jpeg2000 needs pdf 1.5
|
||||
else:
|
||||
ofilter = [ "/FlateDecode" ]
|
||||
imgdata = zlib.compress(imgdata.tostring())
|
||||
|
@ -147,7 +174,7 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
|||
|
||||
xreftable = list()
|
||||
|
||||
result = "%PDF-1.3\n"
|
||||
result = "%%PDF-1.%d\n"%version
|
||||
|
||||
xreftable.append("0000000000 65535 f \n")
|
||||
for o in objects:
|
||||
|
|
Loading…
Reference in a new issue