From 559d42cd4aed08333145c776878c7134bba2acf9 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Wed, 29 Apr 2020 16:15:55 +0200 Subject: [PATCH] magick.py: use our own deflate algo for bit-by-bit reproducible output independent of the compressor used --- magick.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++- test.sh | 26 +++++++++++++------------- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/magick.py b/magick.py index 8299cf1..72a43e3 100755 --- a/magick.py +++ b/magick.py @@ -71,6 +71,51 @@ def palettize(img, pal): return result +# we cannot use zlib.compress() because different compressors may compress the +# same data differently, for example by using different optimizations on +# different architectures: +# https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/R7GD4L5Z6HELCDAL2RDESWR2F3ZXHWVX/ +# +# to make the compressed representation of the uncompressed data bit-by-bit +# identical on all platforms we make use of the compression method 0, that is, +# no compression at all :) +def compress(data): + # two-byte zlib header (rfc1950) + # common header for lowest compression level + # bits 0-3: Compression info, base-2 logarithm of the LZ77 window size, + # minus eight -- 7 indicates a 32K window size + # bits 4-7: Compression method -- 8 is deflate + # bits 8-9: Compression level -- 0 is fastest + # bit 10: preset dictionary -- 0 is none + # bits 11-15: check bits so that the 16-bit unsigned integer stored in MSB + # order is a multiple of 31 + result = b"\x78\x01" + # content is stored in deflate format (rfc1951) + # maximum chunk size is the largest 16 bit unsigned integer + chunksize = 0xFFFF + for i in range(0, len(data), chunksize): + # bits 0-4 are unused + # bits 5-6 indicate compression method -- 0 is no compression + # bit 7 indicates the last chunk + if i * chunksize < len(data) - chunksize: + result += b"\x00" + else: + # last chunck + result += b"\x01" + chunk = data[i : i + chunksize] + # the chunk length as little endian 16 bit unsigned integer + result += struct.pack("I", zlib.adler32(data)) + return result + + def write_png(data, path, bitdepth, colortype, palette=None): with open(path, "wb") as f: f.write(b"\x89PNG\r\n\x1A\n") @@ -124,7 +169,7 @@ def write_png(data, path, bitdepth, colortype, palette=None): raw += struct.pack(">B", val) else: raise Exception() - compressed = zlib.compress(raw) + compressed = compress(raw) block = b"IDAT" + compressed f.write( struct.pack(">I", len(compressed)) diff --git a/test.sh b/test.sh index f4d50e5..88ec374 100755 --- a/test.sh +++ b/test.sh @@ -97,19 +97,19 @@ trap error EXIT python3 magick.py "$tempdir" cat << END | ( cd "$tempdir"; md5sum --check --status - ) -7ed200c092c726c68e889514fff0d8f1 alpha.png -bf56e00465b98fb738f6edd2c58dac3b gray16.png -f93c3e3c11dad3f8c11db4fd2d01c2cc gray1.png -d63167c66e8a65bd5c15f68c8d554c48 gray2.png -6bceb845d9c9946adad1526954973945 gray4.png -7ba8152b9146eb7d9d50529189baf7db gray8.png -75130ec7635919e40c7396d45899ddbe inverse.png -bd1a2c9f9dfc51a827eafa3877cf7f83 normal16.png -329eac79fec2e1bc30d7a50ba2e3f2a5 normal.png -9ffd3f592b399f9f9c23892db82369cd palette1.png -6d3a39fe5f2efea5975f048d11a5cb02 palette2.png -57add39e5c278249b64ab23314a41c39 palette4.png -192a3b298d812a156e6f248238d2bb52 palette8.png +cc611e80cde3b9b7adb7723801a4e5d4 alpha.png +706175887af8ca1a33cfd93449f970df gray16.png +198e3333d7dc6158f94d1007e75c5bd3 gray1.png +795ff9c5c59cdc95f5a0b9191a7247d9 gray2.png +722223ba74be9cba1af4a549076b70d3 gray4.png +2320216faa5a10bf0f5f04ebce07f8e1 gray8.png +35a47d6ae6de8c9d0b31aa0cda8648f3 inverse.png +6ad810399058a87d8145d8d9a7734da5 normal16.png +c8d2e1f116f31ecdeae050524efca7b6 normal.png +c69edd665ee3546d9eb57fe10cd8095a palette1.png +504ecab83662aa2394042a62b1d2eafb palette2.png +90d1bbe7b26e210d8846cf8692a98b72 palette4.png +50bf09eb3571901f0bf642b9a733038c palette8.png END # use img2pdfprog environment variable if it is set