initial commit
This commit is contained in:
commit
b753dc063e
1 changed files with 52 additions and 0 deletions
52
scrape.py
Normal file
52
scrape.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
import urllib2
|
||||||
|
import urllib
|
||||||
|
import json
|
||||||
|
import shelve
|
||||||
|
import time
|
||||||
|
import itertools
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def get_json(url):
|
||||||
|
content_len = 0
|
||||||
|
while content_len == 0:
|
||||||
|
time.sleep(3) # throttle
|
||||||
|
try: d = urllib2.urlopen(url)
|
||||||
|
except Exception as e: print url, "exception:", e
|
||||||
|
else: content_len = int(d.info()['Content-length'])
|
||||||
|
return json.load(d)
|
||||||
|
|
||||||
|
def scrape_online():
|
||||||
|
result = shelve.open("ge.db")
|
||||||
|
for cat_num in range(36):
|
||||||
|
cat_content = dict()
|
||||||
|
data = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/category.json?category="+str(cat_num))
|
||||||
|
for item in data['alpha']:
|
||||||
|
page=1
|
||||||
|
alpha=item['letter']
|
||||||
|
num_items=item['items']
|
||||||
|
if num_items == 0: continue
|
||||||
|
print cat_num, alpha, num_items
|
||||||
|
cat_content[alpha] = list()
|
||||||
|
while len(cat_content[alpha]) < num_items:
|
||||||
|
content_len = 0
|
||||||
|
itemdata = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/items.json?category="+str(cat_num)+"&alpha="+urllib.quote(str(alpha))+"&page="+str(page))
|
||||||
|
cat_content[alpha].extend(itemdata['items'])
|
||||||
|
page += 1
|
||||||
|
result[str(cat_num)] = cat_content
|
||||||
|
result.sync()
|
||||||
|
result.close()
|
||||||
|
|
||||||
|
db = shelve.open("ge.db")
|
||||||
|
|
||||||
|
items = list(itertools.chain(*itertools.chain(*[cat.values() for cat in db.values()])))
|
||||||
|
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print "argument missing"
|
||||||
|
exit()
|
||||||
|
|
||||||
|
search="rune"
|
||||||
|
|
||||||
|
for it in items:
|
||||||
|
if sys.argv[1].lower() in it['name'].lower():
|
||||||
|
print it
|
||||||
|
#print it['id'], it['name'], it['current']['price']
|
Loading…
Reference in a new issue