initial commit
This commit is contained in:
commit
b753dc063e
1 changed files with 52 additions and 0 deletions
52
scrape.py
Normal file
52
scrape.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import urllib2
|
||||
import urllib
|
||||
import json
|
||||
import shelve
|
||||
import time
|
||||
import itertools
|
||||
import sys
|
||||
|
||||
def get_json(url):
|
||||
content_len = 0
|
||||
while content_len == 0:
|
||||
time.sleep(3) # throttle
|
||||
try: d = urllib2.urlopen(url)
|
||||
except Exception as e: print url, "exception:", e
|
||||
else: content_len = int(d.info()['Content-length'])
|
||||
return json.load(d)
|
||||
|
||||
def scrape_online():
|
||||
result = shelve.open("ge.db")
|
||||
for cat_num in range(36):
|
||||
cat_content = dict()
|
||||
data = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/category.json?category="+str(cat_num))
|
||||
for item in data['alpha']:
|
||||
page=1
|
||||
alpha=item['letter']
|
||||
num_items=item['items']
|
||||
if num_items == 0: continue
|
||||
print cat_num, alpha, num_items
|
||||
cat_content[alpha] = list()
|
||||
while len(cat_content[alpha]) < num_items:
|
||||
content_len = 0
|
||||
itemdata = get_json("http://services.runescape.com/m=itemdb_rs/api/catalogue/items.json?category="+str(cat_num)+"&alpha="+urllib.quote(str(alpha))+"&page="+str(page))
|
||||
cat_content[alpha].extend(itemdata['items'])
|
||||
page += 1
|
||||
result[str(cat_num)] = cat_content
|
||||
result.sync()
|
||||
result.close()
|
||||
|
||||
db = shelve.open("ge.db")
|
||||
|
||||
items = list(itertools.chain(*itertools.chain(*[cat.values() for cat in db.values()])))
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print "argument missing"
|
||||
exit()
|
||||
|
||||
search="rune"
|
||||
|
||||
for it in items:
|
||||
if sys.argv[1].lower() in it['name'].lower():
|
||||
print it
|
||||
#print it['id'], it['name'], it['current']['price']
|
Loading…
Reference in a new issue