import urllib2 import urllib import json import shelve import time import itertools import sys def get_json(url): content_len = 0 while content_len == 0: time.sleep(3) # throttle try: d = urllib2.urlopen(url) except Exception as e: print url, "exception:", e else: content_len = int(d.info()['Content-length']) return json.load(d) def scrape_online(): result = shelve.open("ge.db") for cat_num in range(36): cat_content = dict() data = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/category.json?category="+str(cat_num)) for item in data['alpha']: page=1 alpha=item['letter'] num_items=item['items'] if num_items == 0: continue print cat_num, alpha, num_items cat_content[alpha] = list() while len(cat_content[alpha]) < num_items: content_len = 0 itemdata = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/items.json?category="+str(cat_num)+"&alpha="+urllib.quote(str(alpha))+"&page="+str(page)) cat_content[alpha].extend(itemdata['items']) page += 1 result[str(cat_num)] = cat_content result.sync() result.close() db = shelve.open("ge.db") items = list(itertools.chain(*itertools.chain(*[cat.values() for cat in db.values()]))) if len(sys.argv) != 2: print "argument missing" exit() search="rune" for it in items: if sys.argv[1].lower() in it['name'].lower(): print it #print it['id'], it['name'], it['current']['price']