import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import osmnx as ox
import pandas as pd
from scipy.spatial import Voronoi
from shapely.geometry import LineString, Polygon
from shapely.ops import polygonize
np.random.seed(0)
ox.config(use_cache=True)
# 2016 demographic data by postcode
path_demographic_data = '../../ABSData/2016 Census GCP Postal Areas for VIC/2016Census_G01_VIC_POA.csv'
postcode_demographics = pd.read_csv(path_demographic_data)
postcode_demographics.shape
(698, 109)
# postcode boundaries shapefile
path_postcode_boundaries = '../../ABSData/Boundaries/POA_2016_AUST.shp'
postcode_boundaries = gpd.read_file(path_postcode_boundaries)
postcode_boundaries.shape
(2670, 5)
# re-project postcode_boundaries to align CRSs
postcode_boundaries = postcode_boundaries.dropna(subset=['geometry']).to_crs(ox.settings.default_crs)
# geocode the addresses of 3 hospitals to lat-lng
dandenong_latlng = ox.geocode('Dandenong Hospital, Dandenong VIC 3175, Australia')
casey_latlng = ox.geocode('62-70 Kangan Dr, Berwick VIC 3806, Australia')
kingston_latlng = ox.geocode('The Kingston Centre, Heatherton VIC 3202, Australia')
# X and Y vectors of these hospitals' coordinates
hospitals = ['dandenong', 'casey', 'kingston']
X = pd.Series([dandenong_latlng[1], casey_latlng[1], kingston_latlng[1]], index=hospitals)
Y = pd.Series([dandenong_latlng[0], casey_latlng[0], kingston_latlng[0]], index=hospitals)
# merge demographic data with spatial boundaries
gdf = pd.merge(left=postcode_boundaries, right=postcode_demographics,
how='right', left_on='POA_CODE', right_on='POA_CODE_2016')
gdf.shape
(698, 114)
# extract postcode centroid lats and lngs
gdf1 = gdf.to_crs('epsg:3112')
postcodecentroid = gdf1['geometry'].centroid
postcodecentroid = postcodecentroid.to_crs(ox.settings.default_crs)
postcode_lng = postcodecentroid.x
postcode_lat = postcodecentroid.y
# calculate great-circle distance from each postcode to each hospital in km
for hospital in hospitals:
col = f'dist_{hospital}'
gdf[col] = ox.distance.great_circle_vec(postcode_lat, postcode_lng, Y.loc[hospital], X.loc[hospital]) / 1000
# subset only those postcodes within 10km of any of these 3 hospitals
mask = (gdf[['dist_dandenong', 'dist_casey', 'dist_kingston']] < 10).any(axis=1)
gdf_nearby = gdf[mask]
# plot the postcodes and hospitals
fig, ax = plt.subplots(figsize=(8,8))
gdf_nearby.plot(ax=ax, color='w', edgecolor='#cccccc', linewidth=1)
ax.scatter(X, Y, s=150, c='w', edgecolor='k', linewidth=4)
ax.axis('off')
plt.show()
# get the street network within the union of these postcodes
polygon = gdf_nearby.unary_union
G = ox.graph_from_polygon(polygon, network_type='drive')
# identify the nearest network nodes to each hospital
hospital_nodes = pd.Series(ox.get_nearest_nodes(G, X, Y, method='kdtree'), index=X.index)
# sample "addresses" along the street network (in this example, nodes stand-in for true addresses)
addresses = ox.graph_to_gdfs(G, edges=False).sample(1000)
# plot the postcodes, hospitals, and addresses
fig, ax = plt.subplots(figsize=(8,8))
gdf_nearby.plot(ax=ax, color='w', edgecolor='#cccccc', linewidth=1, alpha=1)
ax.scatter(addresses['x'], addresses['y'], marker='x', s=20, alpha=0.7)
ax.scatter(X, Y, s=150, c='w', edgecolor='k', linewidth=4, alpha=0.8)
ax.axis('off')
plt.show()
# calculate network-based distances between each address and each hospital
distances = {}
for address in addresses.index:
distances[address] = {}
for hospital_label, hospital_node in hospital_nodes.iteritems():
try:
distance = nx.shortest_path_length(G, source=address, target=hospital_node, weight='length')
except:
distance = None
distances[address][hospital_label] = distance
# identify which hospital is nearest to each address
df_dists = pd.DataFrame(distances).T.dropna().astype(float)
nearest_hospital = df_dists.idxmin(axis=1)
# plot the postcodes, hospitals, and addresses
fig, ax = plt.subplots(figsize=(8,8))
gdf_nearby.plot(ax=ax, color='w', edgecolor='#cccccc', linewidth=1, alpha=1)
colors = ['g', 'r', 'b']
groups = nearest_hospital.groupby(nearest_hospital)
for (label, group), c in zip(groups, colors):
x = addresses.loc[group.index]['x']
y = addresses.loc[group.index]['y']
ax.scatter(x, y, marker='x', s=20, color=c, alpha=0.7)
ax.scatter(X, Y, s=150, c='w', edgecolor='k', linewidth=4, alpha=0.8)
ax.axis('off')
plt.show()
# create voronoi tessellation
vor = Voronoi(addresses[['x', 'y']])
# convert tessellation into regions
lines = [LineString(vor.vertices[line]) for line in vor.ridge_vertices if -1 not in line]
regions = gpd.GeoDataFrame(data=polygonize(lines), columns=['geometry'], geometry='geometry')
# convert regions into temporary catchment basins
basins = []
labels = []
# for each group of addresses nearest to each hospital
groups = nearest_hospital.groupby(nearest_hospital)
for label, group in groups:
# find all the regions that intersect their point geometries to construct a basin
points = addresses.loc[group.index]['geometry'].unary_union
basin = regions[regions.intersects(points)].unary_union
# trim the basin to the extent of the postcodes' geometries
basin = basin.intersection(gdf_nearby.unary_union)
# if the intersection operation yielded a multipolygon, just take the largest polygon as the basin
if not isinstance(basin, Polygon):
basin = max(basin, key=lambda x: x.area)
basins.append(basin)
labels.append(label)
gdf_temp_basins = gpd.GeoDataFrame(data=basins, index=labels, columns=['geometry'], geometry='geometry')
gdf_temp_basins.crs = ox.settings.default_crs
# now expand the catchment basins to fill the postcodes, as tessellation alone likely won't do it
gdf_basins = gdf_temp_basins.copy()
for label in gdf_basins.index:
# get this basin's geometry and the geometry of all other basins
basin = gdf_basins.loc[label]['geometry']
others = [l for l in gdf_basins.index if l != label]
others_geometry = gdf_basins.loc[others, 'geometry'].unary_union
# find all postcodes that intersect this basin, take union, then subtract all other basins from union
# this expands the basin to fill the postcodes that it touches, without overlapping the other basins
expanded_basin = gdf_nearby[gdf_nearby.intersects(basin)].unary_union.difference(others_geometry)
try:
# if the difference operation yielded a multipolygon, just take the largest polygon as the basin
if not isinstance(expanded_basin, Polygon):
expanded_basin = max(expanded_basin, key=lambda x: x.area)
except:
expanded_basin = None
# add this basin to the geodataframe
gdf_basins.loc[label] = expanded_basin
# plot the postcodes, hospitals, addresses, and catchment basins
fig, ax = plt.subplots(figsize=(8,8))
colors = ['g', 'r', 'b']
gdf_nearby.plot(ax=ax, color='w', edgecolor='#cccccc', linewidth=1, alpha=1)
gdf_basins.plot(ax=ax, color=colors, alpha=0.3)
groups = nearest_hospital.groupby(nearest_hospital)
for (label, group), c in zip(groups, colors):
x = addresses.loc[group.index]['x']
y = addresses.loc[group.index]['y']
ax.scatter(x, y, marker='x', s=20, color=c, alpha=0.7)
ax.scatter(X, Y, s=150, c='w', edgecolor='k', linewidth=4, alpha=0.8)
ax.axis('off')
plt.show()
Based on number of strokes per postcode, estimated in example 1, and the catchment basins created here in example 2.
# load estimated strokes per postcode, from example 1
strokes = pd.read_csv('data/postcode_strokes.csv')
# merge postcode boundaries with estimated strokes
postcodes = gdf_nearby[['POA_CODE', 'geometry']]
postcode_strokes = pd.merge(left=postcodes, right=strokes)
# spatial-join the sampled points to their containing postcodes and catchment basins
addresses_postcodes = gpd.sjoin(addresses, postcode_strokes, how='left', op='intersects')[['geometry', 'POA_CODE', 'strokes']]
addresses_postcodes_basins = gpd.sjoin(addresses_postcodes, gdf_basins.reset_index(), how='left', op='intersects')
addresses_postcodes_basins = addresses_postcodes_basins.rename(columns={'index':'basin'})[['POA_CODE', 'basin', 'strokes']]
# count points per postcode/basin combo and the total per postcode
counts = addresses_postcodes_basins.groupby(['POA_CODE', 'basin']).size()
index = counts.index.get_level_values(level=0)
totals = addresses_postcodes_basins.groupby(['POA_CODE']).size().reindex(index)
# calculate share of postcodes' points falling in each catchment basin, then multiply by strokes per postcode
basins_shares_of_postcodes = counts / totals.values
strokes_per_postcode = postcode_strokes.set_index('POA_CODE').reindex(index)['strokes'].values
strokes_per_basin_per_postcode = basins_shares_of_postcodes * strokes_per_postcode
strokes_per_hospital = strokes_per_basin_per_postcode.reset_index().groupby('basin').sum()[0].astype(int)
# estimated strokes per hospital
strokes_per_hospital
basin casey 285 dandenong 896 kingston 1549 Name: 0, dtype: int64
# relative share of estimated strokes per hospital
(strokes_per_hospital / strokes_per_hospital.sum()).round(3)
basin casey 0.104 dandenong 0.328 kingston 0.567 Name: 0, dtype: float64