# only install the folium library the first time - then comment out
#!conda install -c conda-forge folium=0.5.0 --yes
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
!pip install geopy
import random # library for random number generation
import datetime
import numpy as np # library for vectorized computation
import pandas as pd # library to process data as dataframes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.datasets.samples_generator import make_blobs
import folium
import folium.map
import json # library to handle JSON files
import requests # library to handle requests
import time
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
print('Libraries imported.')
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3
def __iter__(self): return 0
# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_c7288fd559544ee4805edfc80a32e3c0 = ibm_boto3.client(service_name='s3',
ibm_api_key_id='eB-aiMeRHR9sUVErXYnFX5q1iaJ_Lz_7nu_6iYpLa5Lh',
ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
config=Config(signature_version='oauth'),
endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')
body = client_c7288fd559544ee4805edfc80a32e3c0.get_object(Bucket='ibmdatascienceprofessionalcapston-donotdelete-pr-sipdcjsjlytlpp',Key='firearmdata.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )
firearms_df = pd.read_csv(body)
# drop all rows with na values
firearms_df = firearms_df.dropna()
#reset index
firearms_df.reset_index(drop=True)
firearms_df['RECOVERY_ZIPCODE'] = firearms_df['RECOVERY_ZIPCODE'].astype('int32')
print('firearms data loaded')
firearms_df.head()
body = client_c7288fd559544ee4805edfc80a32e3c0.get_object(Bucket='ibmdatascienceprofessionalcapston-donotdelete-pr-sipdcjsjlytlpp',Key='louisvillezips.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )
louzipsdf = pd.read_csv(body)
louzipsdf['zip'] = louzipsdf['zip'].astype('int32')
print('louzips_df data loaded')
fmt = '%Y-%m-%dT%H:%M:%SZ'
w = firearms_df[['RECOVERY_DATE']].values.tolist()
dow = []
dt = []
for i in range(len(w)):
t = w[i]
dt.append(datetime.datetime.strptime(t[0],fmt).date())
t = dt[i]
dow.append(int(t.isoweekday())) # Sunday is day 0
crimedf = pd.DataFrame(data={'date':dt,'weekday':dow,'zip':firearms_df['RECOVERY_ZIPCODE']})
crimedf['weekday'] = crimedf['weekday'].astype('int32')
zipgrp = crimedf.groupby('zip')
zipcnt = zipgrp.count()
zipcnt.sort_values(by='zip',inplace=True)
zipcnt = zipcnt.reset_index()
zips = zipcnt.join(louzipsdf,lsuffix='_l',rsuffix='_r')
#zips.drop(['date','zip_r'],axis=1,inplace=True)
zips = zips.dropna()
zips.reset_index(drop=True)
zips.rename(columns={'zip_l':'zip','weekday':'crime count','lat':'lat','lng':'lon'},inplace=True)
def getDay(n):
d = 'X'
if(n == 1):
d = 'M:'
if(n == 2):
d = 'Tu:'
if(n == 3):
d = 'W:'
if(n == 4):
d = 'Th:'
if(n == 5):
d = 'F:'
if(n == 6):
d = 'Sa:'
if(n == 7):
d = 'Su:'
return d
def getDowLabel(zc):
lbl = ''
g = zipgrp.get_group(zc)
gcnt = g.groupby('weekday').count()
gsum = float(gcnt['zip'].sum())
ind = gcnt.index
j = 0
for y in ind:
pcnt = format(round(100*(gcnt['zip'].iloc[j]/gsum)),'.0f')
d = getDay(y)
c = d+str(pcnt)+' '
lbl = lbl+c
j = j+1
return lbl
def getVenueLabel(name,rating,cat):
lbl = 'Venue:'+name+' <br> '+'Rating:'+str(rating)+ '<br> '+'Category:'+cat
lbl = lbl+'<br> --------------------------- <br>'
return lbl
address = 'Louisville, KY'
geolocator = Nominatim(user_agent="kentucky_explorer")
location = geolocator.geocode(address)
lati = location.latitude
longi = location.longitude
print('The geograpical coordinate of Louisville are {}, {}.'.format(longi, lati))
CLIENT_ID = 'ZNMO4N1VVSBBVJDTQALK5TXZNMML2CYHLNMBUE0DKEM0HNAD' # your Foursquare ID
CLIENT_SECRET = 'VTRJTZOOYPBE0GXCTKRDDSNTQEG5YSFYZ0GBKSAJT4DAZMNB' # your Foursquare Secret
VERSION = '20180604'
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
def getLouisvilleVenues():
radius = 13000 # 8 miles
limit = 50
# create the API request URL
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
lati,
longi,
radius,
limit)
# make the GET request
results = requests.get(url).json()["response"]['groups'][0]['items']
# return only relevant information for each nearby venue
return results
def getVenue(v):
venueid = v['venue']['id']
# create the API request URL
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
venueid,
CLIENT_ID,
CLIENT_SECRET,
VERSION)
# make the GET request
resp = requests.get(url).json()['response']
hasVenue = 'venue' in resp
if hasVenue:
result = resp['venue']
else:
result = None
# return only relevant information for each nearby venue
return result
def getVenuesDetail():
venuesdf = pd.DataFrame(columns=['name','zipc','lat','lon','rating','color','cat','label'])
res = getLouisvilleVenues()
K = len(res)
time.sleep(2.0)
j = 1
for v in res:
venue = getVenue(v)
time.sleep(1.0)
if venue is None:
print('--- empty venue ---')
print('processed venue ... ',j,' ... out of ',K)
j = j+1
continue
time.sleep(0.5)
hasName = 'name' in venue
hasLoc = 'location' in venue
hasRate = 'rating' in venue
hasColor = 'ratingColor' in venue
hasCats = 'categories' in venue
if(hasName and hasLoc and hasRate and hasColor and hasCats):
hasZip = 'postalCode' in venue['location']
if hasZip:
name = venue['name']
zipc = venue['location']['postalCode']
lat = venue['location']['lat']
lon = venue['location']['lng']
rating = venue['rating']
color = venue['ratingColor']
cat = venue['categories'][0]['shortName']
venueLabel = getVenueLabel(name,rating,cat)
venuesdf.loc[venuesdf.size] = [name,zipc,lat,lon,rating,color,cat,venueLabel]
print('processed venue ... ',j,' ... out of ',K)
j = j+1
print('returning ',str(len(venuesdf.index)),' venues ...')
return venuesdf
#venuesDetail = getVenuesDetail()
# save for future use
#venuesDetail.to_csv('louisvillevenuedetail.csv',index=False)
venueDetail = pd.read_csv('louisvillevenuedetail.csv')
venueRandom = venueDetail.sample(10)
venueRandom
# dictionary for zip code labels
zcLabels = {}
# determine scale factor for radius
N = float(len(crimedf['zip']))
sf = zips['crime count']/N
Mx = max(sf)
mx = min(sf)
R = Mx-mx
slp =10.0/R
#setup the map
louisville = folium.Map(location=[lati, longi], zoom_start=11)
# set color scheme for the clusters
w = np.arange(len(zips['zip']))
ys = [i + w + (i*w)**2 for i in range(len(w))]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
K = len(rainbow)
i = 0
# add markers to the map
for zc,cnt,lat,lon in zip(zips['zip'], zips['crime count'], zips['lat'], zips['lon']):
v = cnt/N
radval = round(slp*(v-mx))
pcrimes = format(round(100*(cnt/N)), '.0f')
dowlbl = getDowLabel(zc)
ziplbl = ' Zip Code:'+str(zc)+'||% of crimes: '+str(pcrimes)+'||% by day: '+dowlbl
zcLabels[zc] = ziplbl
label = folium.Popup(ziplbl)
folium.CircleMarker(
[lat, lon],
radius=10+radval,
popup=label,
color=rainbow[i % K],
fill=True,
fill_color=rainbow[i % K],
fill_opacity=0.25).add_to(louisville)
i = i+1
for t in venueRandom.itertuples():
name = t[1]
zc = int(t[2])
la = t[3]
lg = t[4]
ra = t[5]
col = t[6]
cat = t[7]
lbl = t[8]
if zc in zcLabels:
zlbl = zcLabels[zc]
w = 'Venue:'+name+'||Rating:'+str(ra)+'||Category:'+cat+'||'+zlbl
fLabel = folium.Popup(w,parse_html=True)
mrker = folium.Marker(
location=[la,lg],
popup=fLabel)
mrker.add_to(louisville)
# show the Louisville map
louisville