52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
import urllib2
|
|
import urllib
|
|
import json
|
|
import shelve
|
|
import time
|
|
import itertools
|
|
import sys
|
|
|
|
def get_json(url):
|
|
content_len = 0
|
|
while content_len == 0:
|
|
time.sleep(3) # throttle
|
|
try: d = urllib2.urlopen(url)
|
|
except Exception as e: print url, "exception:", e
|
|
else: content_len = int(d.info()['Content-length'])
|
|
return json.load(d)
|
|
|
|
def scrape_online():
|
|
result = shelve.open("ge.db")
|
|
for cat_num in range(36):
|
|
cat_content = dict()
|
|
data = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/category.json?category="+str(cat_num))
|
|
for item in data['alpha']:
|
|
page=1
|
|
alpha=item['letter']
|
|
num_items=item['items']
|
|
if num_items == 0: continue
|
|
print cat_num, alpha, num_items
|
|
cat_content[alpha] = list()
|
|
while len(cat_content[alpha]) < num_items:
|
|
content_len = 0
|
|
itemdata = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/items.json?category="+str(cat_num)+"&alpha="+urllib.quote(str(alpha))+"&page="+str(page))
|
|
cat_content[alpha].extend(itemdata['items'])
|
|
page += 1
|
|
result[str(cat_num)] = cat_content
|
|
result.sync()
|
|
result.close()
|
|
|
|
db = shelve.open("ge.db")
|
|
|
|
items = list(itertools.chain(*itertools.chain(*[cat.values() for cat in db.values()])))
|
|
|
|
if len(sys.argv) != 2:
|
|
print "argument missing"
|
|
exit()
|
|
|
|
search="rune"
|
|
|
|
for it in items:
|
|
if sys.argv[1].lower() in it['name'].lower():
|
|
print it
|
|
#print it['id'], it['name'], it['current']['price']
|