From 8cdf2529051c26d46dfb369d9eb44654cba70058 Mon Sep 17 00:00:00 2001
From: josch <j.schauer@email.de>
Date: Fri, 23 Jan 2015 11:26:04 +0100
Subject: [PATCH] initial commit

---
 rsmusicextract.py | 126 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100755 rsmusicextract.py

diff --git a/rsmusicextract.py b/rsmusicextract.py
new file mode 100755
index 0000000..dc540f8
--- /dev/null
+++ b/rsmusicextract.py
@@ -0,0 +1,126 @@
+#!/usr/bin/python3
+#
+# Copyright 2015 Johannes 'josch' Schauer <j.schauer@email.de>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+import os, struct, io, zlib, sys
+
+def bytes_to_int(data):
+    return sum([b << ((len(data) - i - 1) * 8) for i, b in enumerate(data)])
+
+def unpack_file(cache_dir, archive_idx, file_idx):
+    idx_fname = os.path.join(cache_dir, "main_file_cache.idx" + str(archive_idx))
+    with open(idx_fname, "rb") as idx_file:
+        idx_file.seek(file_idx*6)
+        fsize = bytes_to_int(idx_file.read(3))
+        curr_chunk_offs = bytes_to_int(idx_file.read(3)) * 520
+    write_offs, chunk_idx, fbuf = 0, 0, b""
+    with open(os.path.join(cache_dir, "main_file_cache.dat2"), "rb") as cache_file:
+        while curr_chunk_offs != 0:
+            cache_file.seek(curr_chunk_offs)
+            if file_idx >= 65536:
+                assert file_idx == struct.unpack(">I", cache_file.read(4))[0]
+                chunk_size = min(510, fsize - write_offs)
+            else:
+                assert file_idx == struct.unpack(">H", cache_file.read(2))[0]
+                chunk_size = min(512, fsize - write_offs)
+            assert chunk_idx == struct.unpack(">H", cache_file.read(2))[0]
+            curr_chunk_offs = bytes_to_int(cache_file.read(3)) * 520
+            assert archive_idx == struct.unpack("B", cache_file.read(1))[0]
+            chunk_idx += 1
+            write_offs += chunk_size
+            fbuf += cache_file.read(chunk_size)
+    assert len(fbuf) == fsize
+    if fbuf[9:11] == b"\x1f\x8b":
+        return zlib.decompress(fbuf[19:], -zlib.MAX_WBITS)
+    elif fbuf[4:10] == b"\x31\x41\x59\x26\x53\x59":
+        raise Exception("bzip2 decompression not implemented")
+    else:
+        return fbuf[5:]
+
+def get_tname_dict(inf):
+    track_id2name = dict()
+    music_num, = struct.unpack(">H", inf.read(2))
+    for i in range(music_num):
+        track_id, = struct.unpack(">H", inf.read(2))
+        s = b""
+        while True:
+            b = inf.read(1)
+            if b == b"\x00":
+                break
+            s += b
+        if s in [b'', b' ', b'  ', b'   ']:
+            continue
+        track_id2name[track_id] = s.decode("utf8")
+    return track_id2name
+
+def get_tid_dict(inf, track_id2name):
+    file_id2track = dict()
+    file_num, = struct.unpack(">H", inf.read(2))
+    for i in range(file_num):
+        track_id, file_id = struct.unpack(">HI", inf.read(6))
+        if track_id not in track_id2name:
+            continue
+        file_id2track[file_id] = track_id
+    return file_id2track
+
+def main(cache_dir, out_dir, process_incomplete):
+    # archive 15, file 5 stores the track names
+    resolve = unpack_file(cache_dir, 17, 5)
+    # this is a gross hack because I don't know a better way to find the right
+    # sections in the archive
+    names = resolve.index(b"\x00\x66\x24\x07")
+    files = resolve.index(b"\x00\x66\x0b\x08")
+    assert names != -1 and files != -1
+    track_id2name = get_tname_dict(io.BytesIO(resolve[names+6:]))
+    file_id2track = get_tid_dict(io.BytesIO(resolve[files+6:]), track_id2name.keys())
+
+    # go through all track ids and get the associated file from the archive
+    tracklist = []
+    for i,(file_id, track_id) in enumerate(sorted(file_id2track.items())):
+        jaga = unpack_file(cache_dir, 40, file_id)
+        if jaga is None or jaga[:4] != b"JAGA":
+            continue
+        jaga, incomplete, ogg_chunks = io.BytesIO(jaga[32:]), False, []
+        while jaga.read(4) != b"OggS":
+            file_id, = struct.unpack(">I", jaga.read(4))
+            ogg = unpack_file(cache_dir, 40, file_id)
+            if ogg is None or ogg[:4] != b"OggS":
+                incomplete = True
+                break
+            ogg_chunks.append(ogg)
+        print("%f %%"%((i*100)/len(file_id2track)), end='\r', file=sys.stderr)
+        if incomplete and not process_incomplete:
+            continue
+        if not out_dir:
+            tracklist.append(track_id2name[track_id])
+            continue
+        ogg_chunks = [ b"OggS" + jaga.read() ] + ogg_chunks
+        if incomplete:
+            outdir = os.path.join(out_dir, "incomplete", track_id2name[track_id])
+        else:
+            outdir = os.path.join(out_dir, track_id2name[track_id])
+        os.makedirs(outdir, exist_ok=True)
+        for i,chunk in enumerate(ogg_chunks):
+            with open(os.path.join(outdir, "%03d.ogg"%i), "wb") as f:
+                f.write(chunk)
+    if not out_dir:
+        print("\n".join(sorted(tracklist)))
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Extract music tracks from runescape cache file.")
+    parser.add_argument("cache", help="The runescape cache directory (probably ends with jagexcache/runescape/LIVE/).")
+    parser.add_argument("out", nargs="?", help="Output directory for extracted music. If this is not supplied, the available music is simply listed.")
+    parser.add_argument("-i", "--incomplete", action="store_true", help="Also process incomplete music.")
+    args = parser.parse_args()
+    main(args.cache, args.out, args.incomplete)