58 lines
1.6 KiB
Python
58 lines
1.6 KiB
Python
#!/usr/bin/env python
|
|
|
|
|
|
import sys
|
|
import os
|
|
import zlib
|
|
import Image
|
|
import StringIO
|
|
|
|
from pdfrw import PdfReader, PdfDict, PdfArray, PdfName, PdfWriter
|
|
|
|
def process_image(image):
|
|
#if image.get("/Mask"):
|
|
# del(image["/Mask"])
|
|
#if image.get("/SMask"):
|
|
# del(image["/SMask"])
|
|
#if image.get("/ImageMask"):
|
|
# del(image["/ImageMask"])
|
|
# image["/Width"] = 1
|
|
# image["/Height"] = 1
|
|
# image["/Filter"] = PdfName("FlateDecode")
|
|
# imgdata = Image.open("empty.jpg")
|
|
# image.stream = zlib.compress(imgdata.tostring())
|
|
#print image
|
|
if image["/Filter"] == PdfName("FlateDecode"):
|
|
pass
|
|
elif image["/Filter"] == PdfName("DCTDecode"):
|
|
im = Image.open(StringIO.StringIO(image.stream))
|
|
outf = StringIO.StringIO()
|
|
im.save(outf, "JPEG", quality=45)
|
|
image.stream = outf.getvalue()
|
|
outf.close()
|
|
#image["/Filter"] = PdfName("FlateDecode")
|
|
#image.stream = zlib.compress(im.tostring())
|
|
|
|
def find_images(obj, visited=set()):
|
|
if not isinstance(obj, (PdfDict, PdfArray)):
|
|
return
|
|
|
|
# Don't get stuck in an infinite loop
|
|
myid = id(obj)
|
|
if myid in visited:
|
|
return
|
|
visited.add(myid)
|
|
|
|
if isinstance(obj, PdfDict):
|
|
if obj.Type == PdfName.XObject and obj.Subtype == PdfName.Image:
|
|
process_image(obj)
|
|
obj = obj.itervalues()
|
|
|
|
for item in obj:
|
|
find_images(item, visited)
|
|
|
|
if __name__ == '__main__':
|
|
inpfn,outfn = sys.argv[1:]
|
|
reader = PdfReader(inpfn)
|
|
find_images(reader)
|
|
PdfWriter().addpages(reader.pages).write(outfn)
|