From 4dd9907c395c855e4883931603c9bcac2415b943 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 18 Jun 2010 13:28:40 +0200 Subject: [PATCH] initial commit --- parser.py | 479 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 479 insertions(+) create mode 100644 parser.py diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..f9e98af --- /dev/null +++ b/parser.py @@ -0,0 +1,479 @@ +from urllib2 import Request, urlopen +from urllib import urlencode, quote_plus, urlretrieve +from gzip import GzipFile +from cStringIO import StringIO +from zlib import decompress +from struct import unpack +from time import sleep +from datetime import timedelta, date, datetime +from lxml import etree + +def get_id(station): + return get_ids(station+"!")[0] + +def search_ids(station): + return get_ids(station+"?") + +def get_ids(query): + req = Request("http://railnavigator.bahn.de/bin/rnav/query.exe/dn", + ''%query, + {"User-Agent":"Java/1.6.0_0"}) + # \n\n + # a lot of unknown stuff which consists either of never changing data + # or of zero-ed data + # but seems to be not important for the rest as its size is static and + # no static information seems to be missing + # probably 46 bytes of obsolete data? + self.f.seek(0x3e) + #print "cities_offset: %d, train_props_offset: %d"%(self.cities_offset, self.train_props_offset) + additional_offset1, u2, additional_offset2 \ + = unpack("<3I", self.f.read(12)) + #print "additional_offset1: %d, u2: %d, additional_offset2: %d"%(additional_offset1, u2, additional_offset2) + + self.f.seek(additional_offset1) + u1, = unpack(" + + for i in xrange(number_of_conn): + self.f.seek(0x4a + 12*i) + freq, train_list_offset, number_of_trains, number_of_changes, duration \ + = unpack(" 13:54 + 512 => 5:12 + """ + time = "%03d"%time + return ":".join([time[:-2], time[-2:]]) + + def parse_date(self, d): + """ + dates are expressed as integers of days since 01.01.1980 + """ + return date(1980, 1, 1)+timedelta(days=d) + + def parse_transportation(self, t): + """ + transportation can be by some train or by foot + """ + if t == 1: + return "feet" + elif t == 2: + return "train" + else: + raise Exception, "transportation %d unexpected"%t + + def get_frequency(self, offset): + """ + given the offset, get the frequency a connection is scheduled from the + data block beginning at frequencies_offset + the last three values are still a myth as they dont seem to correspond + with the information in the string referenced by the first value + """ + self.f.seek(self.frequencies_offset+offset) + #TODO: what do the last values mean? + #TODO: where are the days of service properly encoded? + freq, u1, u2, u3 = unpack("<4H", self.f.read(8)) + return self.get_string(freq), u1, u2, u3 + + def get_string(self, offset): + """ + given the offset, return a zero terminated string from the stringblock + """ + if offset in self.strings: + # get it from dict to prevent ugly f.read(1) + return self.strings[offset] + else: + self.f.seek(self.strings_offset+offset) + result = "" + # read zero terminated string.. ugly in py.. + while True: + b = self.f.read(1) + if b == "\0": + # some strings come with many whitespaces at the end + result = result.strip() + # by convention of the format "---" means None + if result == "---": + result = None + # fill a dict with strings to prevent too much ugly f.read(1) + self.strings[offset] = result + return result + else: + result += b + + def get_train_props(self, offset): + """ + given the offset, get the string list of train properties from the + data block beginning at train_props_offset. the first ushort marks the + amount of properties + """ + self.f.seek(self.train_props_offset+offset) + n, = unpack("> y) & 1) for y in range(16-1, -1, -1)) +#print a.timetable_info +for conn in a.connections: + print conn +#for conn in a.connections: +# print "\t".join([conn["freq"][0], bin(conn["freq"][1]), bin(conn["freq"][2]), bin(conn["freq"][3])]) +#for conn in a.connections: +# print conn["freq"], conn["trains"][0]["dep_station"]["L"], conn["trains"][0]["train"], conn["trains"][0]["dep_time"]