From 6c44d8cea60db017c6859ddd198192ea35de7f0f Mon Sep 17 00:00:00 2001
From: Johannes 'josch' Schauer <josch@mister-muffin.de>
Date: Mon, 20 Aug 2018 10:21:18 +0200
Subject: [PATCH] src/img2pdf.py: add more rationale behind palette encoding

---
 src/img2pdf.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/img2pdf.py b/src/img2pdf.py
index 797bd27..52a6ffb 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -317,6 +317,10 @@ if PY3:
                     string = string.encode('ascii')
                 except UnicodeEncodeError:
                     string = b"\xfe\xff"+string.encode("utf-16-be")
+                # We should probably encode more here because at least
+                # ghostscript interpretes a carriage return byte (0x0D) as a
+                # new line byte (0x0A)
+                # PDF supports: \n, \r, \t, \b and \f
                 string = string.replace(b'\\', b'\\\\')
                 string = string.replace(b'(', b'\\(')
                 string = string.replace(b')', b'\\)')
@@ -780,6 +784,15 @@ def parse_png(rawdata):
         if rawdata[i-4:i] == b"IDAT":
             pngidat += rawdata[i:i+n]
         elif rawdata[i-4:i] == b"PLTE":
+            # This could be as simple as saying "palette = rawdata[i:i+n]" but
+            # pdfrw does only escape parenthesis and backslashes in the raw
+            # byte stream. But raw carriage return bytes are interpreted as
+            # line feed bytes by ghostscript. So instead we use the hex string
+            # format. pdfrw cannot write it but at least ghostscript is happy
+            # with it. We would also write out the palette in binary format
+            # (and escape more bytes) but since we cannot use pdfrw anyways,
+            # we choose the more human readable variant.
+            # See https://github.com/pmaupin/pdfrw/issues/147
             for j in range(i, i+n, 3):
                 # with int.from_bytes() we would not have to prepend extra
                 # zeroes