store times in UTC and understand YYYY-MM-DD, YYYY-MM-DDTHH:MM, YYYY-MM-DDTHH:MM:SS and everything understood by dateutil module and date --date

This commit is contained in:
josch 2015-03-13 14:29:53 +01:00
parent 81647dd06f
commit f3674907d6
2 changed files with 47 additions and 10 deletions

View file

@ -8,6 +8,8 @@ CHANGES
- replace -x and -y option by combined option -s (or --pagesize) and use -S - replace -x and -y option by combined option -s (or --pagesize) and use -S
for --subject for --subject
- correctly encode and escape non-ascii metadata - correctly encode and escape non-ascii metadata
- explicitly store date in UTC and allow parsing all date formats understood
by dateutil and `date --date`
0.1.5 0.1.5
----- -----

View file

@ -43,6 +43,9 @@ def error_out(message):
def warning_out(message): def warning_out(message):
sys.stderr.write("W: "+message+"\n") sys.stderr.write("W: "+message+"\n")
def datetime_to_pdfdate(dt):
return dt.strftime("%Y%m%d%H%M%SZ")
def parse(cont, indent=1): def parse(cont, indent=1):
if type(cont) is dict: if type(cont) is dict:
return b"<<\n"+b"\n".join( return b"<<\n"+b"\n".join(
@ -95,17 +98,14 @@ class pdfdoc(object):
info[b"/Creator"] = b"("+creator+b")" info[b"/Creator"] = b"("+creator+b")"
if producer: if producer:
info[b"/Producer"] = b"("+producer+b")" info[b"/Producer"] = b"("+producer+b")"
datetime_formatstring = "%Y%m%d%H%M%S"
if creationdate: if creationdate:
info[b"/CreationDate"] = b"(D:"+creationdate.strftime(datetime_formatstring).encode()+b")" info[b"/CreationDate"] = b"(D:"+datetime_to_pdfdate(creationdate).encode()+b")"
elif not nodate: elif not nodate:
info[b"/CreationDate"] = b"(D:"+now.strftime(datetime_formatstring).encode()+b")" info[b"/CreationDate"] = b"(D:"+datetime_to_pdfdate(now).encode()+b")"
if moddate: if moddate:
info[b"/ModDate"] = b"(D:"+moddate.strftime(datetime_formatstring).encode()+b")" info[b"/ModDate"] = b"(D:"+datetime_to_pdfdate(moddate).encode()+b")"
elif not nodate: elif not nodate:
info[b"/ModDate"] = b"(D:"+now.strftime(datetime_formatstring).encode()+b")" info[b"/ModDate"] = b"(D:"+datetime_to_pdfdate(now).encode()+b")"
if subject: if subject:
info[b"/Subject"] = b"("+subject+b")" info[b"/Subject"] = b"("+subject+b")"
if keywords: if keywords:
@ -374,7 +374,42 @@ def positive_float(string):
return value return value
def valid_date(string): def valid_date(string):
# first try parsing in ISO8601 format
try:
return datetime.strptime(string, "%Y-%m-%d")
except ValueError:
pass
try:
return datetime.strptime(string, "%Y-%m-%dT%H:%M")
except ValueError:
pass
try:
return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S") return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
except ValueError:
pass
# then try dateutil
try:
from dateutil import parser
except ImportError:
pass
else:
try:
return parser.parse(string)
except TypeError:
pass
# as a last resort, try the local date utility
try:
import subprocess
except ImportError:
pass
else:
try:
utime = subprocess.check_output(["date", "--date", string, "+%s"])
except subprocess.CalledProcessError:
pass
else:
return datetime.utcfromtimestamp(int(utime))
raise argparse.ArgumentTypeError("cannot parse date: %s"%string)
def get_standard_papersize(string): def get_standard_papersize(string):
papersizes = { papersizes = {
@ -577,10 +612,10 @@ parser.add_argument(
help='producer for metadata') help='producer for metadata')
parser.add_argument( parser.add_argument(
'-r', '--creationdate', metavar='creationdate', type=valid_date, '-r', '--creationdate', metavar='creationdate', type=valid_date,
help='creation date for metadata in YYYY-MM-DDTHH:MM:SS format') help='UTC creation date for metadata in YYYY-MM-DD or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format understood by python dateutil module or any format understood by `date --date`')
parser.add_argument( parser.add_argument(
'-m', '--moddate', metavar='moddate', type=valid_date, '-m', '--moddate', metavar='moddate', type=valid_date,
help='modification date for metadata in YYYY-MM-DDTHH:MM:SS format') help='UTC modification date for metadata in YYYY-MM-DD or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format understood by python dateutil module or any format understood by `date --date`')
parser.add_argument( parser.add_argument(
'-S', '--subject', metavar='subject', type=pdf_embedded_string, '-S', '--subject', metavar='subject', type=pdf_embedded_string,
help='subject for metadata') help='subject for metadata')