0. Package loading

library(tidyverse)
library(sf)
library(units)
library(tmaptools)
library (mapview)

1. Loading census and boundary data

Load postcode boundaries and demographic data from the 2016 census.

postcodeboundariesAUS <- 
    file.path(here::here(), "ABSData", "Boundaries", "POA_2016_AUST.shp") %>%
    sf::read_sf ()

basicDemographicsVIC <- file.path(here::here(), "ABSData",
                                  "2016 Census GCP Postal Areas for VIC",
                                  "2016Census_G01_VIC_POA.csv") %>%
    readr::read_csv()
#> Rows: 698 Columns: 109
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr   (1): POA_CODE_2016
#> dbl (108): Tot_P_M, Tot_P_F, Tot_P_P, Age_0_4_yr_M, Age_0_4_yr_F, Age_0_4_yr...
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Clean up the demographics table so that it only contains columns of interest, which in this case are the postcodes and age related columns. The columns about education status are being removed for clarity.

basicDemographicsVIC <- select(basicDemographicsVIC, POA_CODE_2016,
                               starts_with("Age_"),
                               -starts_with("Age_psns_"))

2. Geocoding hospital locations

Geocoding transforms a text address into a latitude/longitude coordinate. In this example we are using the OpenStreetMap Nominatim service, that can be queried without an API key.

rehab_addresses <- c(DandenongHospital = "Dandenong Hospital, Dandenong VIC 3175, Australia",
                     CaseyHospital = "62-70 Kangan Dr, Berwick VIC 3806, Australia",
                     KingstonHospital = "400 Warrigal Rd, Cheltenham VIC 3192, Australia")
RehabLocations <- tmaptools::geocode_OSM(rehab_addresses, as.sf=TRUE)

These RehabLocations then need to be transformed to the same coordinate reference system as the basicDemographicsVIC.

RehabLocations <- select(RehabLocations, -bbox)
RehabLocations <- sf::st_transform(RehabLocations,
                                   sf::st_crs(postcodeboundariesAUS))

These locations can then be viewed with mapview in one line:

m <- mapview(RehabLocations, map.type="OpenStreetMap.HOT", color='red', col.regions='red', cex=10)
mapshot(m, "map1.html")

Interactive version of this map

3. Combine demographics and spatial data

Join the demographics and shape tables of postcode boundaries, retaining Victoria only. Use postcode boundaries as the reference data frame so that sf data frame structure is retained. The right_join uses postcodes in the right hand argument (basicDemographicsVIC) to determine which rows to keep in the output.

basicDemographicsVIC <- right_join(postcodeboundariesAUS,
                                   basicDemographicsVIC, 
                                   by=c("POA_CODE" = "POA_CODE_2016"))

4. Compute distance to each service centre from each postcode

There are 698 postcodes which we now want to reduce to only those within a zone around the rehab locations. In this example we use a 10km straight-line distance as a simple approach to producing a set of postcodes of interest. Distances are calculated to centroids of each postcode polygon. (Running this code produces a warning that st_centroid does not give correct results for longitude/latitude data, but results are nevertheless good enough for our purposes here.)

dist_to_loc <- function (geometry, location){
    units::set_units(st_distance(st_centroid (geometry), location)[,1], km)
}
dist_range <- units::set_units(10, km)

basicDemographicsVIC <- mutate(basicDemographicsVIC,
       DirectDistanceToDandenong = dist_to_loc(geometry,RehabLocations["DandenongHospital", ]),
       DirectDistanceToCasey     = dist_to_loc(geometry,RehabLocations["CaseyHospital", ]),
       DirectDistanceToKingston  = dist_to_loc(geometry,RehabLocations["KingstonHospital", ]),
       DirectDistanceToNearest   = pmin(DirectDistanceToDandenong,
                                        DirectDistanceToCasey,
                                        DirectDistanceToKingston)
    )

basicDemographicsRehab <- filter(basicDemographicsVIC,
                                 DirectDistanceToNearest < dist_range) %>%
        mutate(Postcode = as.numeric(POA_CODE16)) %>%
        select(-starts_with("POA_"))

That reduces the data down to 43 nearby postcodes, with the last 2 lines converting all prior postcode columns (of which there were several all beginning with “POA”) to a single numeric column named “Postcode”.

m <- mapview (basicDemographicsRehab, map.type="OpenStreetMap.HOT", alpha.regions=0.5)
mapshot(m, "map3.html")

Interactive version of this map

5. Randomly sample addresses in postcodes

Case loads for rehabilitation centres will be estimated based on a set of random addresses. The addresses are generated by sampling a geocoded database, PSMA, to produce a specified number of unique addresses per postcode. The number of addresses selected will depend on the subsequent processing steps, with numbers being reduced if queries to web services are involved.

addressesPerPostcode <- 1000

We define a function, samplePCode, to sample a single postcode and apply it to every postcode using the map function. Sampling syntax is due to the use of data.table inside PSMA. The last st_as_sf() command converts the points labelled “LONGITUDE” and “LATITUDE” into sf::POINT objects. The results are combined in a single table.

library(PSMA)
samplePCode <- function(pcode, number) {
  d <- fetch_postcodes(pcode)
  return(d[, .SD[sample(.N, min(number, .N))], by=.(POSTCODE)])
}

randomaddresses <- map(basicDemographicsRehab$Postcode,
                       samplePCode,
                       number=addressesPerPostcode) %>%
            bind_rows() %>%
            sf::st_as_sf(coords = c("LONGITUDE", "LATITUDE"),
                         crs=st_crs(basicDemographicsRehab),
                         agr = "constant")
head(randomaddresses)
#> Simple feature collection with 6 features and 13 fields
#> Attribute-geometry relationship: 13 constant, 0 aggregate, 0 identity
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: 145.0403 ymin: -37.87883 xmax: 145.0672 ymax: -37.86408
#> Geodetic CRS:  GDA94
#>   POSTCODE ADDRESS_DETAIL_INTRNL_ID STREET_LOCALITY_INTRNL_ID BUILDING_NAME
#> 1     3145                 13118594                    548166          <NA>
#> 2     3145                 12102427                    491910          <NA>
#> 3     3145                 12346979                    476239          <NA>
#> 4     3145                 11151167                    510963          <NA>
#> 5     3145                 10697450                    617399          <NA>
#> 6     3145                 11913976                    566839          <NA>
#>   LOT_NUMBER FLAT_NUMBER NUMBER_FIRST STREET_NAME STREET_TYPE_CODE lat_int
#> 1       <NA>           6          126    TOORONGA             ROAD     -37
#> 2       <NA>          NA            8     CHAUCER           AVENUE     -37
#> 3       <NA>          NA           59     NIRVANA           AVENUE     -37
#> 4       <NA>          NA           23      BELSON           STREET     -37
#> 5       <NA>          NA           11     ORVILLE           STREET     -37
#> 6       <NA>          NA          442    WAVERLEY             ROAD     -37
#>    lat_rem lon_int lon_rem                   geometry
#> 1 -8640784     145  402797 POINT (145.0403 -37.86408)
#> 2 -8788298     145  603879 POINT (145.0604 -37.87883)
#> 3 -8788018     145  641629  POINT (145.0642 -37.8788)
#> 4 -8659908     145  452869 POINT (145.0453 -37.86599)
#> 5 -8762428     145  608869 POINT (145.0609 -37.87624)
#> 6 -8784328     145  672369 POINT (145.0672 -37.87843)

6. Display sample addresses and postcodes

Note that there are 42000 random addresses. Plotting this many points can be quite slow using mapview, so if you want to view the results, you might need to be patient. (Much faster plotting can be achieved with an API key via mapdeck.)

m <- mapview(randomaddresses, map.type="OpenStreetMap.HOT", cex = 1, color = "blue")
if (!file.exists("map2.html")) {
  mapshot(m, "map2.html")
}

Interactive version of this map

7. Create a street network database

Road distance and travel time from each address to each hospital can be computed using a database of the street network within the bounding polygon defined by basicDemographicsRehab. Street network data can be obtained from OpenStreetMap using the dodgr package, which calls the osmdata package to do the downloading.

Use of a carefully selected polygon of interest will, in many cases, dramatically reduce the download volume compared to a simple rectangular bounding box. Here we use the polygon defined by our nearby postcodes, which first need to be re-projected onto the CRS of OpenStreetMap data. st_union merges all of the polygons to form the single enclosing polygon, and the final command extracts the coordinates in a form required for the dodgr query.

bounding_polygon <- sf::st_transform(basicDemographicsRehab,
                                     sf::st_crs(4326)) %>%
    sf::st_union () #%>%
    #sf::st_coordinates ()
#bounding_polygon <- bounding_polygon [, 1:2]

We now need to create the street network enclosed within the polygon. It is possible to directly retrieve small street networks with dodgr_streetnet, but larger networks tend to result in intermittent errors. The simplest way to create larger street networks is to first retrieve regional OpenStreemap databases and query those, as follows (downloading and converting to a geopackage file to avoid problems with large data sizes):

osmdb <- here::here("data", "australia-latest.osm.pbf")
osmgpkg <- here::here("data", "australia-latest.gpkg")
if (!file.exists(osmdb)) {
  origtimeout <- options(timeout=600)
  download.file("https://download.geofabrik.de/australia-oceania/australia-latest.osm.pbf", destfile = osmdb)
  p <- osmextract::oe_vectortranslate(file_path=osmdb )
  options(timeout=origtimeout)
}

Geopackages are sqlite databases that can be queried as follows:

# list the tables
aus_tables <- sf::read_sf(osmgpkg, query="SELECT * from gpkg_contents")
print(aus_tables)
#> # A tibble: 1 × 10
#>   table_name data_type identifier description last_change         min_x min_y
#>   <chr>      <chr>     <chr>      <chr>       <dttm>              <dbl> <dbl>
#> 1 lines      features  lines      ""          2022-04-05 21:13:15  67.1 -58.4
#> # … with 3 more variables: max_x <dbl>, max_y <dbl>, srs_id <dbl>

# wkt is a spatialfilter defining which lines we read.
wkt <- st_as_text(st_geometry(bounding_polygon))
dandenong_streets <- read_sf(osmgpkg, wkt_filter = wkt, query="select * from lines where highway!='NULL'")

dandenong_streets now contains the roads within the bounding polygon. It is quite a large network and will take time to process.

8. Estimation of travel time

Travel time is estimated using distance along the stret network. The dodgr package needs to decompose the sf-formatted street network, which consists of long, connected road segments, into individual edges. This is done with the weight_streetnet() function, which modifies the distance of each edge to reflect typical travel conditions for a nominated mode of transport.

library (dodgr)
net <- weight_streetnet (dandenong_streets, wt_profile = "motorcar")
format (nrow (net), big.mark = ",")
#> [1] "403,502"

This command has decomposed the 93,884 streets into 403,502 distinct segments. The resultant network has a d_weighted column which preferentially weights the distances for the nominated mode of tranport. Those parts of the network which are unsuitable for vehicular transport have values of .Machine$double.xmax = r .Machine$double.xmax. Because we want to align our random points to the routable component of the network, these need to be removed.

net <- net [which (net$d_weighted < .Machine$double.xmax), ]
nrow (net)
#> [1] 403502

This reduces the number of edges in the network to 403,502. We can now use the net object to calculate the distances, along with simple numeric coordinates of our routing points, projected on to the same CRS as OpenStreetMap (OSM), which is 4326:

fromCoords <- st_coordinates (st_transform (randomaddresses, crs = 4326))
toCoords <- st_coordinates (st_transform (RehabLocations, crs = 4326))

Although not necessary, distance calculation is quicker if we map these from and to points precisely on to the network itself. OSM assigns unique identifiers to every single object, and so our routing coordinates can be converted to OSM identifiers of the nearest street nodes. The nodes themselves are obtained with the dodgr_vertices() function.

nodes <- dodgr_vertices (net)
fromIDX <- match_pts_to_graph (nodes, fromCoords, connected = TRUE)
from <- unique (nodes$id [fromIDX])
to <- nodes$id [match_pts_to_graph (nodes, toCoords, connected = TRUE)]

## Store graph node for each address 
randomaddresses <- mutate(randomaddresses, NodeIDX=fromIDX, GraphNodeID=nodes$id[fromIDX])

The matrices of from and to coordinates have now been converted to simple vectors of OSM identifiers. Calculating the pair-wise distances between all of those coordinates is as simple as,

d <- dodgr_dists (net, from = from, to = to)

And that takes only around 1.1 seconds to calculate distances between (3 rehab centres times 20,000 random addresses = ) 60,000 pairs of points. Travel times may then be presumed directly proportional to those distances.

9. Address-based catchment basins

First assign each point to its nearest hospital according to the street network distances returned from dodgr_dists. Note that points on the outer periphery of the network may not necessarily be connected to the main part of the network, as we’ll see below. The following code assigns each source address to the nearest destination.

DestNames <- c(rownames(RehabLocations), "Disconnected")
DestNumber <- as.numeric (apply(d, MARGIN=1, which.min))
DestNumber [is.na (DestNumber)] <- 4 # the disconnected points
BestDestination <- DestNames[DestNumber]
table (BestDestination)
#> BestDestination
#>     CaseyHospital DandenongHospital      Disconnected  KingstonHospital 
#>              4462              9492                 6             10966

And there are 6 points that are not connected. The allocation of points, including these disconnected ones, can be inspected on a map with the following code, start by setting up a data.frame of fromCoords.

fromCoords <- nodes [match (from, nodes$id), ]
fromCoords$DestNumber <- DestNumber
fromCoords$Destination <- BestDestination

The results can be viewed with mapview, first requiring these points to be converted to sf form, where coords = 2:3 simply specifies the longitude and latitude columns, and the select command filters the data down to just the geometrical points and the DestNumber, so the latter will be automatically used to colour the mapview points.

fromCoords_sf <- st_as_sf (fromCoords, coords = 2:3, crs = 4326) %>%
    select (c (DestNumber, geometry))
m <- mapview (fromCoords_sf, map.type="OpenStreetMap.HOT", cex=1, color=DestNumber)
if (!file.exists("map4.html")) {
  mapshot(m, "map4.html")
}

Interactive version of this map

This map (in its interactive form) clearly reveals that the 6 destinations that are disconnected from the street network all lie in the periphery, and can be simply discarded.

10. Polygon catchment basins

As a final step, we’ll convert those clusters of points into enclosing polygons, using a Voronoi tesselation. sf::st_voronoi doesn’t return the polygons in the same order as the original points, requiring a manual re-sorting in order to use this to match voronoi polygons to points for each catchment.

g <- st_multipoint(as.matrix(fromCoords[,c("x", "y")]))
v <- st_voronoi(x=g) # results in geometry collection objects
v <- st_collection_extract(v) # converts to polygons
fromCoords_sf <- st_as_sf(fromCoords, coords=c("x", "y"))
vorder <- unlist(st_intersects(fromCoords_sf, v))
v <- v[vorder] # polygons in same order as points
v <- st_sf (DestNumber = fromCoords$DestNumber,
            Destination = fromCoords$Destination,
            geometry = v,
            crs = 4326)

We then combine the Voronoi polygons associated with each rehabilitation centre to produce larger polgons defining the each catchment region.

bounding_polygon <- sf::st_transform(basicDemographicsRehab,
                                     sf::st_crs(4326)) %>%
  sf::st_union () 
v <- lapply (1:3, function (i) {
                 v [v$DestNumber == i, ] %>%
                     st_intersection (bounding_polygon) %>%
                     st_union() })
#> Warning: attribute variables are assumed to be spatially constant throughout all
#> geometries

#> Warning: attribute variables are assumed to be spatially constant throughout all
#> geometries

#> Warning: attribute variables are assumed to be spatially constant throughout all
#> geometries
v <- st_sf (DestNumber = 1:3,
            Destination = DestNames [1:3],
            geometry = do.call (c, v))

Then plot with mapview, with easy addition of rehab centres:

m <- mapview (v, map.type="OpenStreetMap.HOT", col.regions=v$DestNumber, alpha.regions=0.4) %>%
    leafem::addFeatures (data = RehabLocations,color='blue', col.regions='blue', cex=10, opacity=1, fillOpacity=1)
mapshot(m, "map5.html")

Interactive version of this map

11. Estimate caseload per centre

Finally, we use a per postcode breakdown of proportion of addresses going to each centre, so that we can compute the number of cases going to each centre.

In step 8 above we recorded the node id of each address. We now join the destination to the random address information based on the node id, allowing us to produce per postcode summaries, and thus per rehabilitation centre estimates.

randomaddresses <- left_join(randomaddresses, fromCoords, by=c("GraphNodeID"="id"))
postcodes <- st_set_geometry(randomaddresses, NULL) %>% group_by(POSTCODE, Destination) %>% summarise(n=length(DestNumber))
head (postcodes)
#> `summarise()` has grouped output by 'POSTCODE'. You can override using the
#> `.groups` argument.
POSTCODE Destination n
3145 Disconnected 2
3145 KingstonHospital 998
3148 KingstonHospital 1000
3156 CaseyHospital 2
3156 DandenongHospital 998
3162 KingstonHospital 1000

This table provides the breakdown for each postcode of cases going to each rehab centre. We simply need to allocate all of these to each centre with the following code, which converts the final estimated total cases to each centre into relative proportions.

postcodes %>%
    filter (Destination != "Disconnected") %>%
    group_by (Destination) %>%
    summarise (total = sum (n)) %>%
    mutate (percent = 100 * total / sum (total))
Destination total percent
CaseyHospital 6419 15.29
DandenongHospital 14222 33.87
KingstonHospital 21349 50.84

Those results reflect random samples from each postcode, and so do not reflect possible demograhic differences in stroke rates between postcodes. That can be derived using the following table of stroke incidence per 100,000:

Age Incidence
0-14 0
15-24 5
25-34 30
35-44 44
45-54 111
55-64 299
65-74 747
75-84 1928
85+ 3976

We have the demographic profile of each postcode in basicDemographicsRehab, for which we now need to regroup some of the columns (0-4 + 5-14, and 15-19 + 20-24). This then gives the total population for that postcode for each demographic group, from which we can work out the expected stroke incidence. The following code also removes previous demographic columns (the select line).

basicDemographicsRehab <- basicDemographicsRehab %>%
        select(-starts_with("POA_"))
s <- 1 / 100000 # rate per 100,000
basicDemographicsRehab <- basicDemographicsRehab %>%
    mutate (stroke_cases = s * ((Age_15_19_yr_P + Age_20_24_yr_P) * 5 +
            Age_25_34_yr_P * 30 +
            Age_35_44_yr_P * 44 +
            Age_45_54_yr_P * 111 +
            Age_55_64_yr_P * 299 +
            Age_65_74_yr_P * 747 +
            Age_75_84_yr_P * 1928 +
            Age_85ov_P * 3976)) %>%
    select (-c (contains ("_yr_"), contains ("85ov")))

The per postcode estimate of stroke cases is then joined to our simulation data.

basicDemographicsRehab <- rename (basicDemographicsRehab, POSTCODE = Postcode)
postcodes <- left_join (postcodes, basicDemographicsRehab, by = "POSTCODE") %>%
    select (POSTCODE, DestNumber, Destination, stroke_cases)
postcodes
#> Note: Using an external vector in selections is ambiguous.
#> ℹ Use `all_of(DestNumber)` instead of `DestNumber` to silence this message.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
POSTCODE n AREA_SQKM Destination stroke_cases
3145 2 8.98 Disconnected 66.22
3145 998 8.98 KingstonHospital 66.22
3148 1000 3.00 KingstonHospital 21.98
3156 2 53.51 CaseyHospital 103.44
3156 998 53.51 DandenongHospital 103.44
3162 1000 4.74 KingstonHospital 69.51
3163 1000 7.21 KingstonHospital 88.12
3165 1000 9.04 KingstonHospital 93.19
3166 1000 8.47 KingstonHospital 72.56
3167 1000 6.65 KingstonHospital 31.68

The number of random addresses with valid destinations is then included in our postcodes data set.

postcodesamples <- filter(postcodes, Destination != "Disconnected") %>% 
  group_by(POSTCODE) %>% 
  summarise(totalsamples=sum(n))
postcodes <- left_join(postcodes, postcodesamples, by="POSTCODE")

Finally the proportion of cases from a postcode attending a rehabilitation center can be computed by dividing the number of random addresses attending a center by the total number of random addresses (usually 1000). The number of cases from a postcode attending a center is therefore the estimated stroke case count for the postcode multiplied by that proportion. The total loading can be computed by adding the contributions from all postcodes.

postcodes %>%
    filter (Destination != "Disconnected") %>%
    group_by (Destination) %>%
    summarise (total = sum (stroke_cases * n/totalsamples)) %>%
    mutate (percent = 100 * total / sum (total))
Destination total percent
CaseyHospital 302 11.91
DandenongHospital 884 34.92
KingstonHospital 1345 53.16