initial commit

This commit is contained in:
josch 2014-06-26 10:37:49 +02:00
commit b753dc063e

52
scrape.py Normal file
View file

@ -0,0 +1,52 @@
import urllib2
import urllib
import json
import shelve
import time
import itertools
import sys
def get_json(url):
content_len = 0
while content_len == 0:
time.sleep(3) # throttle
try: d = urllib2.urlopen(url)
except Exception as e: print url, "exception:", e
else: content_len = int(d.info()['Content-length'])
return json.load(d)
def scrape_online():
result = shelve.open("ge.db")
for cat_num in range(36):
cat_content = dict()
data = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/category.json?category="+str(cat_num))
for item in data['alpha']:
page=1
alpha=item['letter']
num_items=item['items']
if num_items == 0: continue
print cat_num, alpha, num_items
cat_content[alpha] = list()
while len(cat_content[alpha]) < num_items:
content_len = 0
itemdata = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/items.json?category="+str(cat_num)+"&alpha="+urllib.quote(str(alpha))+"&page="+str(page))
cat_content[alpha].extend(itemdata['items'])
page += 1
result[str(cat_num)] = cat_content
result.sync()
result.close()
db = shelve.open("ge.db")
items = list(itertools.chain(*itertools.chain(*[cat.values() for cat in db.values()])))
if len(sys.argv) != 2:
print "argument missing"
exit()
search="rune"
for it in items:
if sys.argv[1].lower() in it['name'].lower():
print it
#print it['id'], it['name'], it['current']['price']