from urllib2 import Request, urlopen from urllib import urlencode, quote_plus, urlretrieve from gzip import GzipFile from cStringIO import StringIO from struct import unpack from time import sleep from datetime import timedelta, date, time, datetime from lxml import etree DEBUG = 3 def get_id(station): return get_ids(station+"!")[0] def search_ids(station): return get_ids(station+"?") def get_ids(query): req = Request("http://railnavigator.bahn.de/bin/rnav/query.exe/dn", ''%query, {"User-Agent":"Java/1.6.0_0"}) # \n\n= level: print "\033[1m"+args[0]%args[1:]+"\033[0m" class PlnParse: """ all data in little endian offsets are always counted from start of the file latitude, longitude are in wgs84 format ushort is of size 2 uint is of size 4 """ def __init__(self, fh): self.f = fh self.strings = dict() self.connections = list() """ pos type description 0x00 ushort version """ self.f.seek(0x00) debug(1, "%08x: read %d bytes for version", self.f.tell(), 2) self.version, = unpack(" # a lot of mostly unknown stuff which consists either of never changing data # or of zero-ed data # but seems not to be important as its size is static and # no static information seems to be missing # probably 46 bytes of obsolete data? # u5_offset and u8_offset are only non-zero for partial pln data self.f.seek(0x3e) debug(1, "%08x: read %d bytes for lots of mostly useless stuff", self.f.tell(), 12) additional_offset1, u1, additional_offset2 = unpack("<3I", self.f.read(12)) debug(2, "\tadditional_offset1: %d, u1: %d, additional_offset2: %d", additional_offset1, u1, additional_offset2) debug(3, "\t\thex(additional_offset1) = %08x", additional_offset1) debug(3, "\t\thex(additional_offset2) = %08x", additional_offset2) self.f.seek(additional_offset1) debug(1, "%08x: read %d bytes for some unknown", self.f.tell(), 2) u1, = unpack(" for i in xrange(number_of_conn): self.f.seek(0x4a + 12*i) freq, train_list_offset, number_of_trains, number_of_changes, duration \ = unpack(" 13:54 512 => 5:12 """ t = "%03d"%t return timedelta(hours=int(t[:-2]), minutes=int(t[-2:])) def parse_time(self, t): """ time is stored as an integer which, when represented as a string can be split to get a string representation of the time 1345 => 13:54 512 => 5:12 """ t = "%03d"%t hour, minute = int(t[:-2]), int(t[-2:]) # TODO: what to do with hour>=24 ? hour %= 24 return time(hour, minute) def parse_date(self, d): """ dates are expressed as integers of days since 01.01.1980 """ return date(1980, 1, 1)+timedelta(days=d) def parse_transportation(self, t): """ transportation can be by some train or by foot """ if t == 1: return "feet" elif t == 2: return "train" else: raise Exception, "transportation %d unexpected"%t def get_frequency(self, offset): """ given the offset, get the frequency a connection is scheduled from the data block beginning at frequencies_offset the last three values are still a myth as they dont seem to correspond with the information in the string referenced by the first value """ self.f.seek(self.frequencies_offset+offset) #TODO: what do the last values mean? #TODO: where are the days of service properly encoded? freq, u1, u2, u3 = unpack("<4H", self.f.read(8)) return self.get_string(freq), u1, u2, u3 def get_string(self, offset): """ given the offset, return a zero terminated string from the stringblock """ if offset in self.strings: # get it from dict to prevent ugly f.read(1) return self.strings[offset] else: self.f.seek(self.strings_offset+offset) result = "" # read zero terminated string.. ugly in py.. while True: b = self.f.read(1) if b == "\0": # some strings come with many whitespaces at the end result = result.strip() # by convention of the format "---" means None if result == "---": result = None # fill a dict with strings to prevent too much ugly f.read(1) self.strings[offset] = result return result else: result += b def get_train_props(self, offset): """ given the offset, get the string list of train properties from the data block beginning at train_props_offset. the first ushort marks the amount of properties """ self.f.seek(self.train_props_offset+offset) n, = unpack("> y) & 1) for y in range(16-1, -1, -1)) #print a.timetable_info for conn in a.connections: print conn #for conn in a.connections: # print "\t".join([conn["freq"][0], bin(conn["freq"][1]), bin(conn["freq"][2]), bin(conn["freq"][3])]) #for conn in a.connections: # print conn["freq"], conn["trains"][0]["dep_station"]["L"], conn["trains"][0]["train"], conn["trains"][0]["dep_time"]