Scrape Foursquare API to get venue information
import urllib
import urllib2
import json
import datetime
import pandas as pd
from pandas.io.json import json_normalize
import math
import time
from math import cos
from pandas import DataFrame
### Helper function for converting meters to lat/long
def distcust(p, d, lat_m, long_m):
lat = p['lat']
long = p['long']
lat1 = lat + lat_m * (d / (11100.0/90*1000) * cos(lat))
long1 = long + long_m * (d / (11100.0/90*1000))
return {'lat': lat1, 'long': long1}
client_id = "YOUR_CLIENT_ID"
client_secret = "YOUR_CLIENT_SECRET"
#p = {'lat': 37.7833, 'long': -122.4167} # central San Francisco, at Van Ness and Market
#p = {'lat': 40.783011, 'long': -73.965368} # central NYC, at Central Park
p = {'lat': 42.963601, 'long': -85.66878} # grand rapids, mi at division and fulton
distance = 100
limit = 50
gridSize = 10
df = DataFrame()
requested_keys = ["categories","id","location","name"]
category = "bar"
category_id = "4d4b7105d754a06376d81259"
for x in [x1 / 10.0 for x1 in range(-3*gridSize, 3*gridSize)]:
for y in [y1 / 10.0 for y1 in range(-3*gridSize, 3*gridSize)]:
center = distcust(p,distance,x,y)
url = "https://api.foursquare.com/v2/venues/search?ll=%s,%s&intent=browse&radius=%s&categoryId=%s&client_id=%s&client_secret=%s&v=%s" % (center["lat"], center["long"], distance, category_id, client_id, client_secret, time.strftime("%Y%m%d"))
try:
req = urllib2.Request(url)
response = urllib2.urlopen(req)
data = json.loads(response.read())
response.close()
#print data["response"]['venues']
data = DataFrame(data["response"]['venues'])[requested_keys]
df2 = DataFrame()
venue_ids = []
frames = []
#print data["id"]
for d in data["id"]:
requested_keys2 = ["id", "price.currency","rating", "likes.count"]
url2 = "https://api.foursquare.com/v2/venues/%s?client_id=%s&client_secret=%s&v=%s" % (d, client_id, client_secret, time.strftime("%Y%m%d"))
req2 = urllib2.Request(url2)
response2 = urllib2.urlopen(req2)
data2 = json.loads(response2.read())
response.close()
ddata = data2['response']
nom_data = json_normalize(ddata['venue'])
if "price.currency" not in nom_data.columns:
nom_data["price.currency"] = 'NONE'
if "rating" not in nom_data.columns:
nom_data["rating"] = 'NONE'
venue_ids.append(d)
frames.append(nom_data[requested_keys2])
#print "getting attr for %s" % nom_data["name"]
time.sleep(1)
df2 = pd.concat(frames, keys=venue_ids)
mdata = pd.merge(data, df2,how='left',on='id', suffixes=('_x', '_y'))
#print mdata
df = df.append(mdata,ignore_index=True)
#print df
#df.to_csv("test.csv")
print center
time.sleep(1) # stay within API limits
except Exception, e:
print e
df = df.drop_duplicates(cols='id',take_last=True)
print df
df["categories"] = df["categories"].apply(lambda x: dict(x[0])['name'])
df["lat"] = df["location"].apply(lambda x: dict(x)["lat"])
df["long"] = df["location"].apply(lambda x: dict(x)["lng"])
df["distance"] = df["location"].apply(lambda x: dict(x)["distance"])
df["checkins"] = df["stats"].apply(lambda x: dict(x)["checkinsCount"])
ordered_df = df[["id_x","name_x","categories","checkins", "distance","lat","long", "price.currency", "rating", "likes.count"]]
ordered_df.to_csv("foursquare_%s_grand_rapids.csv" % category,encoding='utf-8', index=False)
Written by Matthew L
Related protips
1 Response
It throws error as "HTTP Error 400: Bad Request",, any reason?
over 1 year ago
·
Have a fresh tip? Share with Coderwall community!
Post
Post a tip
Best
#Python
Authors
Sponsored by #native_company# — Learn More
#native_title#
#native_desc#