initial commit

main
josch 10 years ago
commit ce1a1bf48a

@ -0,0 +1,58 @@
#!/usr/bin/env python
import sys
import os
import zlib
import Image
import StringIO
from pdfrw import PdfReader, PdfDict, PdfArray, PdfName, PdfWriter
def process_image(image):
#if image.get("/Mask"):
# del(image["/Mask"])
#if image.get("/SMask"):
# del(image["/SMask"])
#if image.get("/ImageMask"):
# del(image["/ImageMask"])
# image["/Width"] = 1
# image["/Height"] = 1
# image["/Filter"] = PdfName("FlateDecode")
# imgdata = Image.open("empty.jpg")
# image.stream = zlib.compress(imgdata.tostring())
#print image
if image["/Filter"] == PdfName("FlateDecode"):
pass
elif image["/Filter"] == PdfName("DCTDecode"):
im = Image.open(StringIO.StringIO(image.stream))
outf = StringIO.StringIO()
im.save(outf, "JPEG", quality=45)
image.stream = outf.getvalue()
outf.close()
#image["/Filter"] = PdfName("FlateDecode")
#image.stream = zlib.compress(im.tostring())
def find_images(obj, visited=set()):
if not isinstance(obj, (PdfDict, PdfArray)):
return
# Don't get stuck in an infinite loop
myid = id(obj)
if myid in visited:
return
visited.add(myid)
if isinstance(obj, PdfDict):
if obj.Type == PdfName.XObject and obj.Subtype == PdfName.Image:
process_image(obj)
obj = obj.itervalues()
for item in obj:
find_images(item, visited)
if __name__ == '__main__':
inpfn,outfn = sys.argv[1:]
reader = PdfReader(inpfn)
find_images(reader)
PdfWriter().addpages(reader.pages).write(outfn)
Loading…
Cancel
Save