In groups of 2-4 people, write a Python program that calculates great circle distance between NYC and London.
import numpy as np
import math
from geopy.distance import great_circle
def great_circle_dist(a,b):
'''
given two points a and b, calculate great circle distance between them
where phi(s) are latitudes and lambda(s) are longitudes
'''
r = 6378.1 # km
phi1 = math.radians(a[1])
phi2 = math.radians(b[1])
lambda1 = math.radians(a[0])
lambda2 = math.radians(b[0])
d = r * math.acos((math.sin(phi1)*math.sin(phi2)) + (math.cos(phi1)*math.cos(phi2)*math.cos(abs(lambda1-lambda2))))
return d
NYC = (-73.935242, 40.730610) # lng, lat
London = (-0.1275, 51.50722) # lng, lat
dist = great_circle_dist(NYC, London)
print(dist, 'km')
print(great_circle(NYC[::-1], London[::-1]))
5570.560077026861 km 5564.366878189245 km
from IPython.display import Image
Image(filename='flights.jpeg')
import osmnx as ox
import pyproj
import geopandas as gpd
#pyproj.datadir.get_data_dir()
#pyproj.datadir.set_data_dir('C:/Users/barguzin/Anaconda3/envs/geo_env/Library/share/proj')
ox.settings.use_cache = True
import contextily as cx
goleta = ox.geocode_to_gdf("Goleta, CA, USA") # get Goleta
go_graph = ox.graph_from_place('Goleta, CA, USA', network_type="drive")
go_nodes, go_streets = ox.graph_to_gdfs(go_graph)
tags = {'amenity': ['pub', 'bar', 'cafe', 'restaurant']} # used for parsing OSM data
dist = 5000 # set search radius (in meters)
# download POIs
pois = ox.geometries.geometries_from_point(center_point = (goleta.lat[0],goleta.lon[0]), tags=tags, dist=dist)
# keep only points
pois = pois.loc[pois.geom_type=="Point"]
go_proj = ox.project_gdf(goleta, to_crs='EPSG:3857') # re-project layers
go_streets_proj = ox.project_gdf(go_streets, to_crs='EPSG:3857')
pois_proj = ox.project_gdf(pois, to_crs='EPSG:3857')
# print to the output
pois.head(3)
amenity | brand | brand:wikidata | brand:wikipedia | cuisine | name | official_name | takeaway | geometry | source | ... | disused:building | capacity | height | roof:colour | roof:shape | contact:facebook | addr:housename | designation | ways | type | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
element_type | osmid | |||||||||||||||||||||
node | 448863565 | cafe | Starbucks | Q37158 | en:Starbucks | coffee_shop | Starbucks | Starbucks Coffee | yes | POINT (-119.84817 34.41147) | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1341709739 | restaurant | NaN | NaN | NaN | american | IHOP | NaN | NaN | POINT (-119.78818 34.44325) | Local Knowledge | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
1348747781 | restaurant | NaN | NaN | NaN | indian | Masala Spice Indian Cuisine | NaN | NaN | POINT (-119.82570 34.44172) | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 rows × 58 columns
fig, ax = plt.subplots()
go_proj.plot(ax=ax, color='gray')
go_streets_proj.plot(ax=ax, zorder=2, lw=.25, color='k')
pois_proj.plot(ax=ax, color='r', markersize=15, ec='w')
go_proj.centroid.plot(color='r', markersize=200, ec='w', marker='*', ax=ax, zorder=3)
cx.add_basemap(ax=ax)
_ = ax.axis("off")
Assume we would like to calculate how far each of the POIs from the center of the polygon.
centr = gpd.GeoDataFrame(data=[1], columns=['geo_id'], geometry=go_proj.geometry.centroid)
dist_to_centr = pois_proj.distance(centr.geometry.iloc[0]).reset_index()
dist_to_centr.drop('element_type', axis=1, inplace=True)
dist_to_centr.columns = ['osm_id', 'dist_m']
dist_to_centr.head()
osm_id | dist_m | |
---|---|---|
0 | 448863565 | 3548.258535 |
1 | 1341709739 | 8014.092418 |
2 | 1348747781 | 3856.467935 |
3 | 1396621915 | 2469.264505 |
4 | 1469690940 | 6317.223055 |
# add the data back to the pois_proj
print(pois_proj.shape)
pois_proj = pois_proj.merge(dist_to_centr, left_on='osmid', right_on='osm_id')
print(pois_proj.shape)
print(type(pois_proj))
(46, 58) (46, 60) <class 'geopandas.geodataframe.GeoDataFrame'>
import shapely.geometry as spg
lines = []
for i, j in zip(pois_proj.geometry.values, np.repeat(centr.centroid.geometry.values, pois_proj.shape[0])):
ax, ay = i.x, i.y
bx, by = j.x, j.y
l = spg.LineString([(ax,ay),(bx,by)])
lines.append(l)
lines_gdf = gpd.GeoDataFrame(data=np.arange(0,len(lines)), columns=['geo_id'], geometry=lines, crs='epsg:3857')
print(lines_gdf.crs, centr.crs)
lines_gdf.head()
epsg:3857 EPSG:3857
geo_id | geometry | |
---|---|---|
0 | 0 | LINESTRING (-13341437.257 4084187.000, -133427... |
1 | 1 | LINESTRING (-13334759.579 4088475.583, -133427... |
2 | 2 | LINESTRING (-13338935.474 4088270.153, -133427... |
3 | 3 | LINESTRING (-13344993.881 4088445.536, -133427... |
4 | 4 | LINESTRING (-13336396.922 4087430.222, -133427... |
fig, ax = plt.subplots(figsize=(12,5))
go_proj.plot(ax=ax, color='gray')
go_streets_proj.plot(ax=ax, zorder=2, lw=.25, color='k')
pois_proj.plot(ax=ax, color='r', markersize=15, ec='w')
go_proj.centroid.plot(color='r', markersize=200, ec='w', marker='*', ax=ax, zorder=3)
lines_gdf.plot(ax=ax, color='r', linestyle='dashed', linewidth=.5)
cx.add_basemap(ax=ax)
for index, row in pois_proj.iloc[:5,].iterrows():
ax.annotate(np.round(row['dist_m'],2), (row['geometry'].x, row['geometry'].y), fontsize=10, weight='bold')
_ = ax.axis("off")
print(f'Average distance from POIs to the centroid of Goleta is {np.round(pois_proj.dist_m.mean(),2)}m')
Average distance from POIs to the centroid of Goleta is 3039.32m
# reproject graph and generate find central node
Gp = ox.project_graph(go_graph, to_crs='epsg:3857')
# find each nearest node to several points, and optionally return distance
nodes, dists = ox.nearest_nodes(Gp, pois_proj.geometry.x, pois_proj.geometry.y, return_dist=True)
centr_node = ox.nearest_nodes(Gp, centr.geometry.x, centr.geometry.y)
centr_node = centr_node[0]
routes_list = []
for i in nodes:
route = ox.shortest_path(Gp, centr_node, list(go_graph[i])[0], weight="length")
if route is not None:
routes_list.append(route)
ox.plot_graph_routes(Gp, routes=routes_list, route_linewidth=1, node_size=0);
# create lines from routes
nodes2, edges2 = ox.graph_to_gdfs(Gp)
list_lines = []
for r in routes_list:
route_nodes = nodes2.loc[r]
l = spg.LineString(route_nodes['geometry'].to_list())
list_lines.append(l)
gdf1 = gpd.GeoDataFrame(geometry=list_lines, crs='epsg:3857')
print(f'Average distance from POIs to the centroid of Goleta on the road network: {np.round(gdf1.geometry.length.mean(),2)}m')
gdf1.plot();
Average distance from POIs to the centroid of Goleta on the road network: 3763.46m
What do we call adjacent when it comes to points, lines, polygons?
# Buffer
fig, ax = plt.subplots(figsize=(12,5))
go_proj.plot(ax=ax, color='gray')
go_proj.centroid.plot(color='r', markersize=200, ec='w', marker='*', ax=ax)
go_proj.centroid.buffer(1000).plot(fc='None', ec='r', ax=ax)
go_proj.centroid.buffer(2000).plot(fc='None', ec='r', ax=ax, linestyle='dashed')
go_proj.centroid.buffer(5000).plot(fc='None', ec='r', ax=ax, linestyle='-.')
pois_proj.plot(ax=ax, color='b', markersize=15, ec='w')
cx.add_basemap(ax=ax)
_ = ax.axis("off")
import cenpy
dectest = cenpy.products.Decennial2010()
gol_data = dectest.from_place('Goleta,CA', level='block', variables=['^P004', 'P001001'])
fig, ax = plt.subplots(figsize=(12,4))
gol_data.plot(column='P004003', cmap='Reds', ax=ax, fc='w', ec='k');
gol_data.plot(ax=ax, fc='None', ec='k');
Matched: Goleta,CA to Goleta city within layer Incorporated Places
C:\Users\noibar\AppData\Local\Temp\ipykernel_22332\2738113903.py:3: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. gol_data = dectest.from_place('Goleta,CA', level='block', variables=['^P004', 'P001001'])
from pysal.lib import weights
w_queen = weights.contiguity.Queen.from_dataframe(gol_data)
w_rook = weights.contiguity.Rook.from_dataframe(gol_data)
w_queen.neighbors[0]
C:\Users\noibar\anaconda3\envs\geo_env\lib\site-packages\libpysal\weights\weights.py:172: UserWarning: The weights matrix is not fully connected: There are 11 disconnected components. There are 7 islands with ids: 86, 134, 144, 190, 232, 264, 276. warnings.warn(message) C:\Users\noibar\anaconda3\envs\geo_env\lib\site-packages\libpysal\weights\weights.py:172: UserWarning: The weights matrix is not fully connected: There are 12 disconnected components. There are 7 islands with ids: 86, 134, 144, 190, 232, 264, 276. warnings.warn(message)
[322, 42, 146, 85, 182, 87, 184, 286]
import pandas as pd
df_q = pd.DataFrame(*w_queen.full()).astype(int)
df_r = pd.DataFrame(*w_rook.full()).astype(int)
print(df_q.shape)
(365, 365)
import warnings
warnings.filterwarnings('ignore')
# Plot tract geography
fig, ax = plt.subplots(figsize=(4, 4))
minx, miny, maxx, maxy = go_proj.centroid.buffer(2000).total_bounds;
gol_data.plot(ax=ax, fc='w', ec='k');
w_queen.plot(gol_data,ax=ax, edge_kws=dict(color="r", linestyle=":", linewidth=1),
node_kws=dict(marker=""));
ax.set_xlim(minx, maxx)
ax.set_ylim(miny, maxy)
(4085497.682154004, 4089497.682154004)
import pandas as pd
fig, ax = plt.subplots(figsize=(12,4))
# check the number of neighbors
s = pd.Series(w_queen.cardinalities)
s2 = pd.Series(w_rook.cardinalities)
s.plot.hist(bins=s.unique().shape[0], ax=ax, color='b', alpha=.5);
s2.plot.hist(bins=s.unique().shape[0], ax=ax, color='r', alpha=.5);
ax.axvline(s.mean(), color='b', lw=2)
ax.axvline(s2.mean(), color='r', lw=2)
<matplotlib.lines.Line2D at 0x20eeeff3be0>
knn4_w = weights.distance.KNN.from_dataframe(gol_data, k=4)
knn4_w.islands
[]
knn4_w.histogram
[(4, 365)]
Create a weight matrix for the units below.
from IPython.display import Image
Image(filename='vor.png')
For more examples check out the Chapter on Spatial Weights in Geographic Data Science
Source: GIS Lounge
Source: Swift, A., Liu, L., & Uber, J. (2008). Reducing MAUP bias of correlation statistics between water quality and GI illness. Computers, Environment and Urban Systems, 32(2), 134-148.