from urllib2 import Request, urlopen from urllib import urlencode, quote_plus, urlretrieve from gzip import GzipFile from cStringIO import StringIO from zlib import decompress from struct import unpack from time import sleep from datetime import timedelta, date, datetime from lxml import etree def get_id(station): return get_ids(station+"!")[0] def search_ids(station): return get_ids(station+"?") def get_ids(query): req = Request("http://railnavigator.bahn.de/bin/rnav/query.exe/dn", ''%query, {"User-Agent":"Java/1.6.0_0"}) # \n\n # a lot of unknown stuff which consists either of never changing data # or of zero-ed data # but seems to be not important for the rest as its size is static and # no static information seems to be missing # probably 46 bytes of obsolete data? self.f.seek(0x3e) #print "cities_offset: %d, train_props_offset: %d"%(self.cities_offset, self.train_props_offset) additional_offset1, u2, additional_offset2 \ = unpack("<3I", self.f.read(12)) #print "additional_offset1: %d, u2: %d, additional_offset2: %d"%(additional_offset1, u2, additional_offset2) self.f.seek(additional_offset1) u1, = unpack(" for i in xrange(number_of_conn): self.f.seek(0x4a + 12*i) freq, train_list_offset, number_of_trains, number_of_changes, duration \ = unpack(" 13:54 512 => 5:12 """ time = "%03d"%time return ":".join([time[:-2], time[-2:]]) def parse_date(self, d): """ dates are expressed as integers of days since 01.01.1980 """ return date(1980, 1, 1)+timedelta(days=d) def parse_transportation(self, t): """ transportation can be by some train or by foot """ if t == 1: return "feet" elif t == 2: return "train" else: raise Exception, "transportation %d unexpected"%t def get_frequency(self, offset): """ given the offset, get the frequency a connection is scheduled from the data block beginning at frequencies_offset the last three values are still a myth as they dont seem to correspond with the information in the string referenced by the first value """ self.f.seek(self.frequencies_offset+offset) #TODO: what do the last values mean? #TODO: where are the days of service properly encoded? freq, u1, u2, u3 = unpack("<4H", self.f.read(8)) return self.get_string(freq), u1, u2, u3 def get_string(self, offset): """ given the offset, return a zero terminated string from the stringblock """ if offset in self.strings: # get it from dict to prevent ugly f.read(1) return self.strings[offset] else: self.f.seek(self.strings_offset+offset) result = "" # read zero terminated string.. ugly in py.. while True: b = self.f.read(1) if b == "\0": # some strings come with many whitespaces at the end result = result.strip() # by convention of the format "---" means None if result == "---": result = None # fill a dict with strings to prevent too much ugly f.read(1) self.strings[offset] = result return result else: result += b def get_train_props(self, offset): """ given the offset, get the string list of train properties from the data block beginning at train_props_offset. the first ushort marks the amount of properties """ self.f.seek(self.train_props_offset+offset) n, = unpack("> y) & 1) for y in range(16-1, -1, -1)) #print a.timetable_info for conn in a.connections: print conn #for conn in a.connections: # print "\t".join([conn["freq"][0], bin(conn["freq"][1]), bin(conn["freq"][2]), bin(conn["freq"][3])]) #for conn in a.connections: # print conn["freq"], conn["trains"][0]["dep_station"]["L"], conn["trains"][0]["train"], conn["trains"][0]["dep_time"]