523 lines
18 KiB
Python
523 lines
18 KiB
Python
from urllib2 import Request, urlopen
|
|
from urllib import urlencode, quote_plus, urlretrieve
|
|
from gzip import GzipFile
|
|
from cStringIO import StringIO
|
|
from struct import unpack
|
|
from time import sleep
|
|
from datetime import timedelta, date, time, datetime
|
|
from lxml import etree
|
|
|
|
DEBUG = 3
|
|
|
|
def get_id(station):
|
|
return get_ids(station+"!")[0]
|
|
|
|
def search_ids(station):
|
|
return get_ids(station+"?")
|
|
|
|
def get_ids(query):
|
|
req = Request("http://railnavigator.bahn.de/bin/rnav/query.exe/dn",
|
|
'<?xml version="1.0" encoding="UTF-8" ?><ReqC ver="1.1" prod="String" lang="DE"><MLcReq><MLc n="%s" t="ST" /></MLcReq></ReqC>'%query,
|
|
{"User-Agent":"Java/1.6.0_0"})
|
|
# <MLc t="ST" n="Bahlen Germann "Bahler Buur", Dinklage" i="A=1@O=Bahlen Germann "Bahler Buur", Dinklage@X=81
|
|
# grrrrrrrrr - too stupid to escape their xml!!!!
|
|
parser = etree.XMLParser(recover=True, encoding='iso-8859-1')
|
|
tree = etree.parse(urlopen(req), parser)
|
|
# make sure None entries from badly parsed nodes are not in the output
|
|
return [elem.get("i") for elem in tree.findall(".//MLc") if elem.get("i")]
|
|
|
|
def parse_i(i):
|
|
tokens = i.strip('@').split("@")
|
|
d = dict()
|
|
for t in tokens:
|
|
key, value = t.split("=", 1)
|
|
d[key] = value
|
|
return d
|
|
|
|
def compile_i_from_station(station):
|
|
return get_id(station)
|
|
|
|
def compile_i_from_stationid(stationid):
|
|
return "A=1@L=%09d@"%stationid
|
|
|
|
def compile_i_from_coords(x, y, name=None):
|
|
if not name:
|
|
name="---"
|
|
return "A=16@O=%s@X=%d@Y=%d@"%(name, int(x*1000000), int(y*1000000))
|
|
|
|
#infile = open("../cities_germany_211009.clean.txt", "r")
|
|
#outfile = open("../cities_germany_291009.out.txt", "w")
|
|
#for city in infile:
|
|
# print city,
|
|
# for station in search_ids(city):
|
|
# outfile.write("%s\t%s\t%s\t%s\n"%(station["O"], station["X"], station["Y"], station["L"]))
|
|
# sleep(10)
|
|
#infile.close()
|
|
#outfile.close()
|
|
#exit()
|
|
|
|
|
|
#big plan
|
|
def get_big_pln_data():
|
|
req = Request("http://persoenlicherfahrplan.bahn.de/bin/pf/query-p2w.exe/dn",
|
|
urlencode({"start":"1",
|
|
"pp":"5",
|
|
"ZID":"A=1@X=10885568@Y=48365444@O=Augsburg Hbf@L=008000013@U=80@K=S1-0N1@G=8000013@C=49@a=128@B=1@",
|
|
"output":"pln",
|
|
"hcount":"0",
|
|
"h2gversion":"6.20.7",
|
|
"spmo":"1",
|
|
"htype":"MicroEmulator-2.0",
|
|
"SID":"A=1@X=6092250@Y=50767829@O=Aachen Hbf@L=008000001@U=80@K=S1-0N1@G=8000001@C=49@a=128@B=1@",
|
|
"L":"vs_javapln",
|
|
"p2wIVRoute":"1",
|
|
"p2wCreateMaps":"1"}),
|
|
{"User-Agent":"Java/1.6.0_0"})
|
|
tokens = urlopen(req).read().split()
|
|
time = ""
|
|
url = ""
|
|
for t in tokens:
|
|
key, value = t.split("=", 1)
|
|
if key == "url":
|
|
url = value
|
|
elif key == "time":
|
|
time = value
|
|
|
|
def test_pln():
|
|
url2 = ""
|
|
while url2 == "":
|
|
tokens = urlopen(url).read().strip().split("\n")
|
|
print tokens
|
|
for t in tokens:
|
|
key, value = t.split("=", 1)
|
|
if key == "url":
|
|
url2 = value
|
|
break
|
|
elif key == "time":
|
|
sleep(10)
|
|
print value
|
|
urlretrieve(url2, "pln")
|
|
|
|
# next: ignoreMinuteRound=yes&REQ0HafasScrollDir=1&h2g-direct=1&seqnr=1&ident=ox.0348695.1256054390&
|
|
|
|
|
|
def get_departures(station, dt):
|
|
if isinstance(station, str):
|
|
station = parse_i(get_id(station))["L"]
|
|
req = Request("http://mobile.bahn.de/bin/mobil/bhftafel.exe/dn?",
|
|
urlencode({
|
|
"start":"yes",
|
|
"date":dt.strftime("%d.%m.%Y"),
|
|
"time":dt.strftime("%H:%M"),
|
|
"boardType":"dep",
|
|
"sTI":"0", # can be left out
|
|
"L":"vs_java3", # also vs_java
|
|
"input":station,
|
|
"productsFilter":"1"*14
|
|
}),
|
|
{"User-Agent":"Java/1.6.0_0"})
|
|
stops = urlopen(req).read()[4:-4].split("\n/>\n<Journey ")
|
|
result = list()
|
|
for stop in stops:
|
|
tokens = stop.split("\n")
|
|
evaid, name = tokens[0].split(" ", 1)
|
|
s = dict()
|
|
s["L"] = evaid.split("=")[1].strip("\"")
|
|
s["O"] = name.split("=")[1].strip("\"")
|
|
for t in tokens[1:]:
|
|
key, value = t.split("=", 1)
|
|
value = value.strip("\" ")
|
|
if value:
|
|
s[key] = value
|
|
result.append(s)
|
|
return result
|
|
|
|
def get_stops(train, dep_station, arr_station, dt):
|
|
if isinstance(dep_station, str):
|
|
station = parse_i(get_id(dep_station))["L"]
|
|
if isinstance(arr_station, str):
|
|
station = parse_i(get_id(arr_station))["L"]
|
|
req = Request("http://mobile.bahn.de/bin/mobil/bhftafel.exe/dn?",
|
|
urlencode({
|
|
"start":"yes",
|
|
"REQTrain_name":train,
|
|
"date":dt.strftime("%d.%m.%Y"),
|
|
"time":dt.strftime("%H:%M"),
|
|
"boardType":"dep",
|
|
"sTI":"1",
|
|
"L":"vs_java3",
|
|
"input":dep_station,
|
|
"dirInput":arr_station,
|
|
"productsFilter":"1"*14
|
|
}),
|
|
{"User-Agent":"Java/1.6.0_0"})
|
|
stops = urlopen(req).read()[4:-4].split("\n/>\n<St ")
|
|
result = list()
|
|
for stop in stops:
|
|
tokens = stop.split("\n")
|
|
evaid, name = tokens[0].split(" ", 1)
|
|
s = dict()
|
|
s["L"] = evaid.split("=")[1].strip("\"")
|
|
s["O"] = name.split("=")[1].strip("\"")
|
|
for t in tokens[1:]:
|
|
key, value = t.split("=", 1)
|
|
value = value.strip("\" ")
|
|
if value:
|
|
s[key] = value
|
|
result.append(s)
|
|
return result
|
|
|
|
#print get_stops('RE 36072', 8010334, 8010310, datetime(2010, 06, 18, 15, 43))
|
|
|
|
#late:
|
|
#http://mobile.bahn.de/bin/mobil/bhftafel.exe/dn?
|
|
# start=yes
|
|
# &time=actual
|
|
# &rT.2=13%3A51
|
|
# &rT.1=13%3A51
|
|
# &boardType=dep
|
|
# &hcount=0
|
|
# &h2gversion=6.20.7
|
|
# &rZ.2=RE%2010125
|
|
# &htype=MicroEmulator-2.0
|
|
# &rZ.1=RE%2010125
|
|
# &L=vs_java
|
|
# &input=%238000001&
|
|
|
|
def get_pln_data(start, end, stop1=None, stop2=None, dt=None, bike=False,
|
|
ice=True, ic=True, ir=True, re=True, sbahn=True, bus=True, boat=True,
|
|
subway=True, tram=True, taxi=True):
|
|
if dt is None:
|
|
dt = datetime.now()
|
|
query = {"ignoreMinuteRound":"yes",
|
|
"date":dt.strftime("%d.%m.%Y"),
|
|
"ZID":end,
|
|
"h2g-direct":"1",
|
|
"time":dt.strftime("%H:%M"),
|
|
"SID":start,
|
|
"start":"1"}
|
|
|
|
if not all([ice, ic, ir, re, sbahn, bus, boat, subway, tram, taxi]):
|
|
query["REQ0JourneyProduct_prod_list_1"] = \
|
|
"".join(str(int(x)) for x in [ice, ic, ir, re, sbahn, bus, boat, subway, tram, taxi])+"1111"
|
|
if bike:
|
|
query["REQ0JourneyProduct_opt3"] = "1"
|
|
if stop1:
|
|
query["VID1"]=stop1
|
|
if stop2:
|
|
query["VID2"]=stop2
|
|
|
|
req = Request("http://mobile.bahn.de/bin/mobil/query.exe/dn",
|
|
urlencode(query),
|
|
{"User-Agent":"Java/1.6.0_0"})
|
|
r = urlopen(req) # is a file like object but does not have tell()
|
|
s = StringIO(r.read()) # this is why a stringio has to be created for gzip
|
|
return GzipFile(mode="r", fileobj=s) # possibly replacable by manually reading the gzip header
|
|
|
|
def debug(level, *args):
|
|
if DEBUG >= level:
|
|
print "\033[1m"+args[0]%args[1:]+"\033[0m"
|
|
|
|
class PlnParse:
|
|
"""
|
|
all data in little endian
|
|
offsets are always counted from start of the file
|
|
latitude, longitude are in wgs84 format
|
|
ushort is of size 2
|
|
uint is of size 4
|
|
"""
|
|
def __init__(self, fh):
|
|
self.f = fh
|
|
self.strings = dict()
|
|
self.connections = list()
|
|
|
|
"""
|
|
pos type description
|
|
0x00 ushort version
|
|
"""
|
|
self.f.seek(0x00)
|
|
debug(1, "%08x: read %d bytes for version", self.f.tell(), 2)
|
|
self.version, = unpack("<H", self.f.read(2))
|
|
if self.version != 5:
|
|
raise IOError, "unknown version: %d"%self.version
|
|
|
|
"""
|
|
pos type description
|
|
0x02 ushort string reference to start station name
|
|
0x04 uint unknown
|
|
0x08 uint longitude of start station
|
|
0x0C uint latitude of start station
|
|
"""
|
|
self.f.seek(0x02)
|
|
debug(1, "%08x: read %d bytes for start station, unknown, longitude and latitude", self.f.tell(), 14)
|
|
start_station, u1, sX, sY = unpack("<H3I", self.f.read(14))
|
|
debug(2, "\tstart_station: %d, u1: %d, sX: %d, sY: %d", start_station, u1, sX, sY)
|
|
|
|
"""
|
|
pos type description
|
|
0x10 ushort string reference to end station name
|
|
0x12 uint unknown
|
|
0x16 uint longitude of end station
|
|
0x1A uint latitude of end station
|
|
"""
|
|
self.f.seek(0x10)
|
|
debug(1, "%08x: read %d bytes for end station, unknown, longitude and latitude", self.f.tell(), 14)
|
|
end_station, u1, eX, eY = unpack("<H3I", self.f.read(14))
|
|
debug(2, "\tend_station: %d, u1: %d, eX: %d, eY: %d", end_station, u1, eX, eY)
|
|
|
|
"""
|
|
pos type description
|
|
0x1e ushort number of connections found
|
|
0x20 uint position of connection frequency data
|
|
0x24 uint position of string block
|
|
"""
|
|
self.f.seek(0x1e)
|
|
debug(1, "%08x: read %d bytes for number of connections, frequency offset and string offset", self.f.tell(), 10)
|
|
number_of_conn, self.frequencies_offset, self.strings_offset = unpack("<HII", self.f.read(10))
|
|
debug(2, "\tnumber_of_conn: %d, frequencies_offset: %d, strings_offset: %d", number_of_conn, self.frequencies_offset, self.strings_offset)
|
|
debug(3, "\t\thex(frequencies_offset) = %08x", self.frequencies_offset)
|
|
debug(3, "\t\thex(strings_offset) = %08x", self.strings_offset)
|
|
|
|
#now that we read the string_offset, get the station names
|
|
self.start_station = {"name":self.get_string(start_station), "X":sX, "Y":sY}
|
|
self.end_station = {"name":self.get_string(end_station), "X":eX, "Y":eY}
|
|
debug(3, "\t\tget_string(start_station) = %s", self.get_string(start_station))
|
|
debug(3, "\t\tget_string(end_station) = %s", self.get_string(end_station))
|
|
|
|
"""
|
|
pos type description
|
|
0x28 ushort begin date of current timetable version
|
|
0x2a ushort end date of current timetable version
|
|
0x2c ushort date of query
|
|
0x2e ushort remaining days in current timetable version
|
|
"""
|
|
self.f.seek(0x28)
|
|
debug(1, "%08x: read %d bytes for dates of beginning, ending and remaining days of the current timetable", self.f.tell(), 8)
|
|
timetable_begin, timetable_end, today, timetable_remaining = unpack("<4H", self.f.read(8))
|
|
self.timetable_info = {
|
|
"timetable_begin":self.parse_date(timetable_begin),
|
|
"timetable_end":self.parse_date(timetable_end),
|
|
"today":self.parse_date(today),
|
|
"timetable_remaining":timetable_remaining}
|
|
debug(2, "\ttimetable_begin: %d, timetable_end: %d, today: %d, timetable_remaining: %d", timetable_begin, timetable_end, today, timetable_remaining)
|
|
|
|
self.f.seek(0x30)
|
|
debug(1, "%08x: read %d bytes for unknown string", self.f.tell(), 6)
|
|
debug(2, "\tunknown string: %s", self.f.read(6))
|
|
|
|
"""
|
|
pos type description
|
|
0x36 uint position of city descriptions
|
|
0x3a uint position of train property data
|
|
"""
|
|
self.f.seek(0x36)
|
|
debug(1, "%08x: read %d bytes for city and train props offset", self.f.tell(), 8)
|
|
self.cities_offset, self.train_props_offset = unpack("<II", self.f.read(8))
|
|
debug(2, "\tcities_offset: %d, train_props_offset: %d", self.cities_offset, self.train_props_offset)
|
|
|
|
#TODO: <begin big (46 bytes) ugly unknown section>
|
|
# a lot of mostly unknown stuff which consists either of never changing data
|
|
# or of zero-ed data
|
|
# but seems not to be important as its size is static and
|
|
# no static information seems to be missing
|
|
# probably 46 bytes of obsolete data?
|
|
# u5_offset and u8_offset are only non-zero for partial pln data
|
|
self.f.seek(0x3e)
|
|
debug(1, "%08x: read %d bytes for lots of mostly useless stuff", self.f.tell(), 12)
|
|
additional_offset1, u1, additional_offset2 = unpack("<3I", self.f.read(12))
|
|
debug(2, "\tadditional_offset1: %d, u1: %d, additional_offset2: %d", additional_offset1, u1, additional_offset2)
|
|
debug(3, "\t\thex(additional_offset1) = %08x", additional_offset1)
|
|
debug(3, "\t\thex(additional_offset2) = %08x", additional_offset2)
|
|
|
|
self.f.seek(additional_offset1)
|
|
debug(1, "%08x: read %d bytes for some unknown", self.f.tell(), 2)
|
|
u1, = unpack("<H", self.f.read(2))
|
|
debug(2, "\tu1: %d", u1)
|
|
|
|
self.f.seek(additional_offset2)
|
|
debug(1, "%08x: read %d bytes for a huge bunch of unknowns", self.f.tell(), 32)
|
|
u1, u2, request_number, request_id, u5_offset, u6, products, u8_offset, u9, u10, u11, u12 = unpack("<IIHHIHHI4H", self.f.read(32))
|
|
debug(2, "\tu1: %d, u2: %d, request_number: %d, request_id: %d, u5_offset: %d, u6: %d, products: %d, u8_offset: %d, u9: %d, u10: %d, u11: %d, u12: %d", u1, u2, request_number, request_id, u5_offset, u6, products, u8_offset, u9, u10, u11, u12)
|
|
debug(3, "\t\tget_string(request_id) = %s", self.get_string(request_id))
|
|
debug(3, "\t\tget_string(products) = %s", self.get_string(products))
|
|
debug(3, "\t\thex(u5_offset) = %08x", u5_offset)
|
|
debug(3, "\t\thex(u8_offset) = %08x", u8_offset)
|
|
|
|
# the only useful stuff:
|
|
self.reqest_id = self.get_string(request_id)
|
|
self.products = self.parse_products(self.get_string(products))
|
|
#TODO: </end big ugly unknown section>
|
|
|
|
for i in xrange(number_of_conn):
|
|
self.f.seek(0x4a + 12*i)
|
|
freq, train_list_offset, number_of_trains, number_of_changes, duration \
|
|
= unpack("<HI3H", self.f.read(12))
|
|
"""
|
|
0 <= i < number_of_conn
|
|
pos type description
|
|
0x4a+12*i ushort frequency of this connection
|
|
0x4c+12*i uint position of train list after train_list_offset
|
|
0x50+12*i ushort number of changes
|
|
0x52+12*i ushort duration of this connection
|
|
"""
|
|
trains = list()
|
|
for j in xrange(number_of_trains):
|
|
self.f.seek(0x4a + train_list_offset + j*20)
|
|
dep_time, dep_station, arr_time, arr_station, transportation, \
|
|
train, arr_track, dep_track, note, train_proporties \
|
|
= unpack("<10H", self.f.read(20))
|
|
"""
|
|
0 <= j < number_of_trains
|
|
pos type description
|
|
0x4a+train_list_offset+j*20 ushort departure time
|
|
0x4a+train_list_offset+j*20 ushort departure station
|
|
0x4a+train_list_offset+j*20 ushort arrival time
|
|
0x4a+train_list_offset+j*20 ushort arrival station
|
|
0x4a+train_list_offset+j*20 ushort transportation method
|
|
0x4a+train_list_offset+j*20 ushort train identification
|
|
0x4a+train_list_offset+j*20 ushort arrival track
|
|
0x4a+train_list_offset+j*20 ushort departure track
|
|
0x4a+train_list_offset+j*20 ushort notes like final destination of tram
|
|
0x4a+train_list_offset+j*20 ushort train proporties
|
|
"""
|
|
trains.append({
|
|
"dep_time":self.parse_time(dep_time),
|
|
"dep_station":self.get_city(dep_station),
|
|
"arr_time":self.parse_time(arr_time),
|
|
"arr_station":self.get_city(arr_station),
|
|
"transportation_type":self.parse_transportation(transportation),
|
|
"train":self.get_string(train),
|
|
"arr_track":self.get_string(arr_track),
|
|
"dep_track":self.get_string(dep_track),
|
|
"note":self.get_string(note),
|
|
"train_properties":self.get_train_props(train_proporties)})
|
|
self.connections.append({
|
|
"freq":self.get_frequency(freq),
|
|
"number_of_changes":number_of_changes,
|
|
"duration":self.parse_timedelta(duration),
|
|
"trains":trains})
|
|
|
|
def parse_products(self, products):
|
|
"""
|
|
used means of transportation for a returned connection set is given as
|
|
10 chars being "1" or "0" depending wether the product is being
|
|
considered or not.
|
|
this method maps this string to a dict
|
|
"""
|
|
if products and len(products) == 14:
|
|
p = ["ice", "ic", "ir", "re", "sbahn", "bus", "boat", "subway", "tram", "taxi"]
|
|
return dict(zip(p, map(bool, map(int, products[:10]))))
|
|
|
|
def parse_timedelta(self, t):
|
|
"""
|
|
time is stored as an integer which, when represented as a string can be
|
|
split to get a string representation of the time
|
|
1345 => 13:54
|
|
512 => 5:12
|
|
"""
|
|
t = "%03d"%t
|
|
return timedelta(hours=int(t[:-2]), minutes=int(t[-2:]))
|
|
|
|
def parse_time(self, t):
|
|
"""
|
|
time is stored as an integer which, when represented as a string can be
|
|
split to get a string representation of the time
|
|
1345 => 13:54
|
|
512 => 5:12
|
|
"""
|
|
t = "%03d"%t
|
|
hour, minute = int(t[:-2]), int(t[-2:])
|
|
# TODO: what to do with hour>=24 ?
|
|
hour %= 24
|
|
return time(hour, minute)
|
|
|
|
def parse_date(self, d):
|
|
"""
|
|
dates are expressed as integers of days since 01.01.1980
|
|
"""
|
|
return date(1980, 1, 1)+timedelta(days=d)
|
|
|
|
def parse_transportation(self, t):
|
|
"""
|
|
transportation can be by some train or by foot
|
|
"""
|
|
if t == 1:
|
|
return "feet"
|
|
elif t == 2:
|
|
return "train"
|
|
else:
|
|
raise Exception, "transportation %d unexpected"%t
|
|
|
|
def get_frequency(self, offset):
|
|
"""
|
|
given the offset, get the frequency a connection is scheduled from the
|
|
data block beginning at frequencies_offset
|
|
the last three values are still a myth as they dont seem to correspond
|
|
with the information in the string referenced by the first value
|
|
"""
|
|
self.f.seek(self.frequencies_offset+offset)
|
|
#TODO: what do the last values mean?
|
|
#TODO: where are the days of service properly encoded?
|
|
freq, u1, u2, u3 = unpack("<4H", self.f.read(8))
|
|
return self.get_string(freq), u1, u2, u3
|
|
|
|
def get_string(self, offset):
|
|
"""
|
|
given the offset, return a zero terminated string from the stringblock
|
|
"""
|
|
if offset in self.strings:
|
|
# get it from dict to prevent ugly f.read(1)
|
|
return self.strings[offset]
|
|
else:
|
|
self.f.seek(self.strings_offset+offset)
|
|
result = ""
|
|
# read zero terminated string.. ugly in py..
|
|
while True:
|
|
b = self.f.read(1)
|
|
if b == "\0":
|
|
# some strings come with many whitespaces at the end
|
|
result = result.strip()
|
|
# by convention of the format "---" means None
|
|
if result == "---":
|
|
result = None
|
|
# fill a dict with strings to prevent too much ugly f.read(1)
|
|
self.strings[offset] = result
|
|
return result
|
|
else:
|
|
result += b
|
|
|
|
def get_train_props(self, offset):
|
|
"""
|
|
given the offset, get the string list of train properties from the
|
|
data block beginning at train_props_offset. the first ushort marks the
|
|
amount of properties
|
|
"""
|
|
self.f.seek(self.train_props_offset+offset)
|
|
n, = unpack("<H", self.f.read(2))
|
|
return map(self.get_string, unpack("<%dH"%n, self.f.read(2*n)))
|
|
|
|
def get_city(self, offset):
|
|
"""
|
|
given the offset, get data about a city from the data block beginning
|
|
at cities_offset.
|
|
the data includes the city name, its code and latitude, longitude
|
|
"""
|
|
self.f.seek(self.cities_offset+offset*14)
|
|
name, L, X, Y = unpack("<H3I", self.f.read(14))
|
|
return {"name":self.get_string(name), "L":L, "X":X, "Y":Y}
|
|
|
|
#f = open("stendal-salzwedel-1st.bin", "r")
|
|
f = GzipFile("stendal-salzwedel-1st.bin", "r")
|
|
#a = PlnParse(get_pln_data(compile_i_from_station("Bremen"), compile_i_from_station("Karlsruhe"), dt = datetime.today()+timedelta(days=1)))
|
|
#a = PlnParse(get_pln_data(compile_i_from_coords(8.6535, 53.1667), compile_i_from_station("Bremen"), dt = datetime.today()+timedelta(days=4)))
|
|
a = PlnParse(f)
|
|
|
|
#def bin(i):
|
|
# return "".join(str((i >> y) & 1) for y in range(16-1, -1, -1))
|
|
#print a.timetable_info
|
|
for conn in a.connections:
|
|
print conn
|
|
#for conn in a.connections:
|
|
# print "\t".join([conn["freq"][0], bin(conn["freq"][1]), bin(conn["freq"][2]), bin(conn["freq"][3])])
|
|
#for conn in a.connections:
|
|
# print conn["freq"], conn["trains"][0]["dep_station"]["L"], conn["trains"][0]["train"], conn["trains"][0]["dep_time"]
|