from urllib2 import Request, urlopen
from urllib import urlencode, quote_plus, urlretrieve
from gzip import GzipFile
from cStringIO import StringIO
from zlib import decompress
from struct import unpack
from time import sleep
from datetime import timedelta, date, datetime
from lxml import etree
def get_id(station):
return get_ids(station+"!")[0]
def search_ids(station):
return get_ids(station+"?")
def get_ids(query):
req = Request("http://railnavigator.bahn.de/bin/rnav/query.exe/dn",
''%query,
{"User-Agent":"Java/1.6.0_0"})
# \n\n
# a lot of unknown stuff which consists either of never changing data
# or of zero-ed data
# but seems to be not important for the rest as its size is static and
# no static information seems to be missing
# probably 46 bytes of obsolete data?
self.f.seek(0x3e)
#print "cities_offset: %d, train_props_offset: %d"%(self.cities_offset, self.train_props_offset)
additional_offset1, u2, additional_offset2 \
= unpack("<3I", self.f.read(12))
#print "additional_offset1: %d, u2: %d, additional_offset2: %d"%(additional_offset1, u2, additional_offset2)
self.f.seek(additional_offset1)
u1, = unpack("
for i in xrange(number_of_conn):
self.f.seek(0x4a + 12*i)
freq, train_list_offset, number_of_trains, number_of_changes, duration \
= unpack(" 13:54
512 => 5:12
"""
time = "%03d"%time
return ":".join([time[:-2], time[-2:]])
def parse_date(self, d):
"""
dates are expressed as integers of days since 01.01.1980
"""
return date(1980, 1, 1)+timedelta(days=d)
def parse_transportation(self, t):
"""
transportation can be by some train or by foot
"""
if t == 1:
return "feet"
elif t == 2:
return "train"
else:
raise Exception, "transportation %d unexpected"%t
def get_frequency(self, offset):
"""
given the offset, get the frequency a connection is scheduled from the
data block beginning at frequencies_offset
the last three values are still a myth as they dont seem to correspond
with the information in the string referenced by the first value
"""
self.f.seek(self.frequencies_offset+offset)
#TODO: what do the last values mean?
#TODO: where are the days of service properly encoded?
freq, u1, u2, u3 = unpack("<4H", self.f.read(8))
return self.get_string(freq), u1, u2, u3
def get_string(self, offset):
"""
given the offset, return a zero terminated string from the stringblock
"""
if offset in self.strings:
# get it from dict to prevent ugly f.read(1)
return self.strings[offset]
else:
self.f.seek(self.strings_offset+offset)
result = ""
# read zero terminated string.. ugly in py..
while True:
b = self.f.read(1)
if b == "\0":
# some strings come with many whitespaces at the end
result = result.strip()
# by convention of the format "---" means None
if result == "---":
result = None
# fill a dict with strings to prevent too much ugly f.read(1)
self.strings[offset] = result
return result
else:
result += b
def get_train_props(self, offset):
"""
given the offset, get the string list of train properties from the
data block beginning at train_props_offset. the first ushort marks the
amount of properties
"""
self.f.seek(self.train_props_offset+offset)
n, = unpack("> y) & 1) for y in range(16-1, -1, -1))
#print a.timetable_info
for conn in a.connections:
print conn
#for conn in a.connections:
# print "\t".join([conn["freq"][0], bin(conn["freq"][1]), bin(conn["freq"][2]), bin(conn["freq"][3])])
#for conn in a.connections:
# print conn["freq"], conn["trains"][0]["dep_station"]["L"], conn["trains"][0]["train"], conn["trains"][0]["dep_time"]