From f3674907d622ae03a12d744faf42bbd870ac6dba Mon Sep 17 00:00:00 2001 From: josch Date: Fri, 13 Mar 2015 14:29:53 +0100 Subject: [PATCH] store times in UTC and understand YYYY-MM-DD, YYYY-MM-DDTHH:MM, YYYY-MM-DDTHH:MM:SS and everything understood by dateutil module and date --date --- CHANGES.rst | 2 ++ src/img2pdf.py | 55 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9884655..ec2a745 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,8 @@ CHANGES - replace -x and -y option by combined option -s (or --pagesize) and use -S for --subject - correctly encode and escape non-ascii metadata + - explicitly store date in UTC and allow parsing all date formats understood + by dateutil and `date --date` 0.1.5 ----- diff --git a/src/img2pdf.py b/src/img2pdf.py index 5c30b2b..ffa95e1 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -43,6 +43,9 @@ def error_out(message): def warning_out(message): sys.stderr.write("W: "+message+"\n") +def datetime_to_pdfdate(dt): + return dt.strftime("%Y%m%d%H%M%SZ") + def parse(cont, indent=1): if type(cont) is dict: return b"<<\n"+b"\n".join( @@ -95,17 +98,14 @@ class pdfdoc(object): info[b"/Creator"] = b"("+creator+b")" if producer: info[b"/Producer"] = b"("+producer+b")" - - datetime_formatstring = "%Y%m%d%H%M%S" if creationdate: - info[b"/CreationDate"] = b"(D:"+creationdate.strftime(datetime_formatstring).encode()+b")" + info[b"/CreationDate"] = b"(D:"+datetime_to_pdfdate(creationdate).encode()+b")" elif not nodate: - info[b"/CreationDate"] = b"(D:"+now.strftime(datetime_formatstring).encode()+b")" + info[b"/CreationDate"] = b"(D:"+datetime_to_pdfdate(now).encode()+b")" if moddate: - info[b"/ModDate"] = b"(D:"+moddate.strftime(datetime_formatstring).encode()+b")" + info[b"/ModDate"] = b"(D:"+datetime_to_pdfdate(moddate).encode()+b")" elif not nodate: - info[b"/ModDate"] = b"(D:"+now.strftime(datetime_formatstring).encode()+b")" - + info[b"/ModDate"] = b"(D:"+datetime_to_pdfdate(now).encode()+b")" if subject: info[b"/Subject"] = b"("+subject+b")" if keywords: @@ -374,7 +374,42 @@ def positive_float(string): return value def valid_date(string): - return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S") + # first try parsing in ISO8601 format + try: + return datetime.strptime(string, "%Y-%m-%d") + except ValueError: + pass + try: + return datetime.strptime(string, "%Y-%m-%dT%H:%M") + except ValueError: + pass + try: + return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S") + except ValueError: + pass + # then try dateutil + try: + from dateutil import parser + except ImportError: + pass + else: + try: + return parser.parse(string) + except TypeError: + pass + # as a last resort, try the local date utility + try: + import subprocess + except ImportError: + pass + else: + try: + utime = subprocess.check_output(["date", "--date", string, "+%s"]) + except subprocess.CalledProcessError: + pass + else: + return datetime.utcfromtimestamp(int(utime)) + raise argparse.ArgumentTypeError("cannot parse date: %s"%string) def get_standard_papersize(string): papersizes = { @@ -577,10 +612,10 @@ parser.add_argument( help='producer for metadata') parser.add_argument( '-r', '--creationdate', metavar='creationdate', type=valid_date, - help='creation date for metadata in YYYY-MM-DDTHH:MM:SS format') + help='UTC creation date for metadata in YYYY-MM-DD or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format understood by python dateutil module or any format understood by `date --date`') parser.add_argument( '-m', '--moddate', metavar='moddate', type=valid_date, - help='modification date for metadata in YYYY-MM-DDTHH:MM:SS format') + help='UTC modification date for metadata in YYYY-MM-DD or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format understood by python dateutil module or any format understood by `date --date`') parser.add_argument( '-S', '--subject', metavar='subject', type=pdf_embedded_string, help='subject for metadata')