from urllib2 import Request, urlopen from urllib import urlencode, quote_plus, urlretrieve from gzip import GzipFile from cStringIO import StringIO from struct import unpack from time import sleep from datetime import timedelta, date, time, datetime from lxml import etree def get_id(station): return get_ids(station+"!")[0] def search_ids(station): return get_ids(station+"?") def get_ids(query): req = Request("http://railnavigator.bahn.de/bin/rnav/query.exe/dn", ''%query, {"User-Agent":"Java/1.6.0_0"}) # \n\n # a lot of unknown stuff which consists either of never changing data # or of zero-ed data # but seems to be not important for the rest as its size is static and # no static information seems to be missing # probably 46 bytes of obsolete data? self.f.seek(0x3e) #print "cities_offset: %d, train_props_offset: %d"%(self.cities_offset, self.train_props_offset) additional_offset1, u2, additional_offset2 \ = unpack("<3I", self.f.read(12)) #print "additional_offset1: %d, u2: %d, additional_offset2: %d"%(additional_offset1, u2, additional_offset2) self.f.seek(additional_offset1) u1, = unpack(" for i in xrange(number_of_conn): self.f.seek(0x4a + 12*i) freq, train_list_offset, number_of_trains, number_of_changes, duration \ = unpack(" 13:54 512 => 5:12 """ t = "%03d"%t return timedelta(hours=int(t[:-2]), minutes=int(t[-2:])) def parse_time(self, t): """ time is stored as an integer which, when represented as a string can be split to get a string representation of the time 1345 => 13:54 512 => 5:12 """ t = "%03d"%t return time(hour=int(t[:-2]), minute=int(t[-2:])) def parse_date(self, d): """ dates are expressed as integers of days since 01.01.1980 """ return date(1980, 1, 1)+timedelta(days=d) def parse_transportation(self, t): """ transportation can be by some train or by foot """ if t == 1: return "feet" elif t == 2: return "train" else: raise Exception, "transportation %d unexpected"%t def get_frequency(self, offset): """ given the offset, get the frequency a connection is scheduled from the data block beginning at frequencies_offset the last three values are still a myth as they dont seem to correspond with the information in the string referenced by the first value """ self.f.seek(self.frequencies_offset+offset) #TODO: what do the last values mean? #TODO: where are the days of service properly encoded? freq, u1, u2, u3 = unpack("<4H", self.f.read(8)) return self.get_string(freq), u1, u2, u3 def get_string(self, offset): """ given the offset, return a zero terminated string from the stringblock """ if offset in self.strings: # get it from dict to prevent ugly f.read(1) return self.strings[offset] else: self.f.seek(self.strings_offset+offset) result = "" # read zero terminated string.. ugly in py.. while True: b = self.f.read(1) if b == "\0": # some strings come with many whitespaces at the end result = result.strip() # by convention of the format "---" means None if result == "---": result = None # fill a dict with strings to prevent too much ugly f.read(1) self.strings[offset] = result return result else: result += b def get_train_props(self, offset): """ given the offset, get the string list of train properties from the data block beginning at train_props_offset. the first ushort marks the amount of properties """ self.f.seek(self.train_props_offset+offset) n, = unpack("> y) & 1) for y in range(16-1, -1, -1)) #print a.timetable_info for conn in a.connections: print conn #for conn in a.connections: # print "\t".join([conn["freq"][0], bin(conn["freq"][1]), bin(conn["freq"][2]), bin(conn["freq"][3])]) #for conn in a.connections: # print conn["freq"], conn["trains"][0]["dep_station"]["L"], conn["trains"][0]["train"], conn["trains"][0]["dep_time"]