from urllib2 import Request, urlopen
from urllib import urlencode, quote_plus, urlretrieve
from gzip import GzipFile
from cStringIO import StringIO
from struct import unpack
from time import sleep
from datetime import timedelta, date, time, datetime
from lxml import etree
def get_id(station):
return get_ids(station+"!")[0]
def search_ids(station):
return get_ids(station+"?")
def get_ids(query):
req = Request("http://railnavigator.bahn.de/bin/rnav/query.exe/dn",
''%query,
{"User-Agent":"Java/1.6.0_0"})
# \n\n
# a lot of unknown stuff which consists either of never changing data
# or of zero-ed data
# but seems to be not important for the rest as its size is static and
# no static information seems to be missing
# probably 46 bytes of obsolete data?
self.f.seek(0x3e)
#print "cities_offset: %d, train_props_offset: %d"%(self.cities_offset, self.train_props_offset)
additional_offset1, u2, additional_offset2 \
= unpack("<3I", self.f.read(12))
#print "additional_offset1: %d, u2: %d, additional_offset2: %d"%(additional_offset1, u2, additional_offset2)
self.f.seek(additional_offset1)
u1, = unpack("
for i in xrange(number_of_conn):
self.f.seek(0x4a + 12*i)
freq, train_list_offset, number_of_trains, number_of_changes, duration \
= unpack(" 13:54
512 => 5:12
"""
t = "%03d"%t
return timedelta(hours=int(t[:-2]), minutes=int(t[-2:]))
def parse_time(self, t):
"""
time is stored as an integer which, when represented as a string can be
split to get a string representation of the time
1345 => 13:54
512 => 5:12
"""
t = "%03d"%t
return time(hour=int(t[:-2]), minute=int(t[-2:]))
def parse_date(self, d):
"""
dates are expressed as integers of days since 01.01.1980
"""
return date(1980, 1, 1)+timedelta(days=d)
def parse_transportation(self, t):
"""
transportation can be by some train or by foot
"""
if t == 1:
return "feet"
elif t == 2:
return "train"
else:
raise Exception, "transportation %d unexpected"%t
def get_frequency(self, offset):
"""
given the offset, get the frequency a connection is scheduled from the
data block beginning at frequencies_offset
the last three values are still a myth as they dont seem to correspond
with the information in the string referenced by the first value
"""
self.f.seek(self.frequencies_offset+offset)
#TODO: what do the last values mean?
#TODO: where are the days of service properly encoded?
freq, u1, u2, u3 = unpack("<4H", self.f.read(8))
return self.get_string(freq), u1, u2, u3
def get_string(self, offset):
"""
given the offset, return a zero terminated string from the stringblock
"""
if offset in self.strings:
# get it from dict to prevent ugly f.read(1)
return self.strings[offset]
else:
self.f.seek(self.strings_offset+offset)
result = ""
# read zero terminated string.. ugly in py..
while True:
b = self.f.read(1)
if b == "\0":
# some strings come with many whitespaces at the end
result = result.strip()
# by convention of the format "---" means None
if result == "---":
result = None
# fill a dict with strings to prevent too much ugly f.read(1)
self.strings[offset] = result
return result
else:
result += b
def get_train_props(self, offset):
"""
given the offset, get the string list of train properties from the
data block beginning at train_props_offset. the first ushort marks the
amount of properties
"""
self.f.seek(self.train_props_offset+offset)
n, = unpack("> y) & 1) for y in range(16-1, -1, -1))
#print a.timetable_info
for conn in a.connections:
print conn
#for conn in a.connections:
# print "\t".join([conn["freq"][0], bin(conn["freq"][1]), bin(conn["freq"][2]), bin(conn["freq"][3])])
#for conn in a.connections:
# print conn["freq"], conn["trains"][0]["dep_station"]["L"], conn["trains"][0]["train"], conn["trains"][0]["dep_time"]