Create simplified regions for Streamlit app#

The stroke demographics app shows a map of the demographic data at the LSOA level. It is useful to show which LSOAs can be grouped together, for example into stroke unit catchment areas or into ambulance service areas.

The app uses simplified LSOA shapes to speed up the calculations. This means that the LSOA in the app are different shapes than those that were used to create the region files available from the Office for National Statistics geoportal and similar services.

In this notebook we will recreate the region boundary shapes from the simplified LSOA shapes.

Notebook setup#

from dataclasses import dataclass
import geopandas as gpd
import os
import pandas as pd
from importlib_resources import files
from shapely.validation import make_valid  # for fixing dodgy polygons

# The stroke-maps package from our other stroke work contains data
# linking different region types.
import stroke_maps
# Define file paths
@dataclass(frozen=True)
class Paths:
    '''Singleton object for storing paths to data and database.'''

    # Inputs:
    data = './data'
    collated = 'collated_data_amb.csv'

    # Outputs:
    shapefiles = 'shapefiles'
    lsoa_shp = 'LSOA_V3_reduced_simplified.geojson'


paths = Paths()

Load data#

Firstly the tabular data to say which LSOA belongs to which region, and only keep the region columns:

df_lsoa = pd.read_csv(os.path.join(paths.data, paths.collated), index_col='LSOA')

cols_regions = [
    'closest_ivt_unit',
    'closest_mt_unit',
    'closest_mt_transfer',
    'la_district_name_2019',
    'rural_urban_2011',
    'ambulance_service',
    'local_authority_district_22',
    'LAD22NM',
    'country'
]

df_lsoa = df_lsoa[cols_regions]

df_lsoa.head().T
LSOA Welwyn Hatfield 010F Welwyn Hatfield 012A Welwyn Hatfield 002F Welwyn Hatfield 002E Welwyn Hatfield 010A
closest_ivt_unit SG14AB SG14AB SG14AB SG14AB SG14AB
closest_mt_unit NW12BU NW12BU NW12BU NW12BU NW12BU
closest_mt_transfer CB20QQ CB20QQ CB20QQ CB20QQ CB20QQ
la_district_name_2019 Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield
rural_urban_2011 Urban city and town Urban city and town Urban city and town Urban city and town Urban city and town
ambulance_service East of England East of England East of England East of England East of England
local_authority_district_22 Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield
LAD22NM Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield Welwyn Hatfield
country England England England England England

Then load the LSOA shape files:

gdf_lsoa = gpd.read_file(os.path.join(
    paths.data, paths.shapefiles, paths.lsoa_shp)
    )

# Make geometry valid:
gdf_lsoa['geometry'] = [
    make_valid(g) if g is not None else g
    for g in gdf_lsoa['geometry'].values
    ]

gdf_lsoa.head()
LSOA11NM LSOA11CD geometry
0 City of London 001A E01000001 POLYGON ((-0.09470 51.52060, -0.09730 51.52160...
1 City of London 001B E01000002 POLYGON ((-0.08810 51.51940, -0.09270 51.52140...
2 City of London 001C E01000003 POLYGON ((-0.09450 51.52200, -0.09680 51.52330...
3 City of London 001E E01000005 POLYGON ((-0.07590 51.51590, -0.07730 51.51740...
4 Barking and Dagenham 016A E01000006 POLYGON ((0.09330 51.53790, 0.08650 51.54180, ...

And a bonus file that links LSOA to more region types:

# Relative import from `stroke-maps` package files:
path_to_file = files('stroke_maps.data').joinpath('regions_lsoa_ew.csv')
df_lsoa_regions = pd.read_csv(path_to_file, index_col='lsoa')

# Rename index to match df_lsoa:
df_lsoa_regions.index.name = 'LSOA'

df_lsoa_regions.head()
lsoa_code region region_code region_type
LSOA
Halton 007A E01012367 NHS Cheshire and Merseyside ICB - 01F E38000068 SICBL
Halton 003A E01012368 NHS Cheshire and Merseyside ICB - 01F E38000068 SICBL
Halton 005A E01012369 NHS Cheshire and Merseyside ICB - 01F E38000068 SICBL
Halton 007B E01012370 NHS Cheshire and Merseyside ICB - 01F E38000068 SICBL
Halton 016A E01012371 NHS Cheshire and Merseyside ICB - 01F E38000068 SICBL

And a second bonus file that links those region types to yet more region types:

(These two files are stored separately to save disk space)

# Load further region data linking SICBL to other regions:
path_to_file = files('stroke_maps.data').joinpath('regions_ew.csv')
df_regions = pd.read_csv(path_to_file)

df_regions.head()
region region_code region_type short_code country icb icb_code isdn
0 Aneurin Bevan University Health Board W11000028 LHB AB Wales NaN NaN NaN
1 NHS Bath and North East Somerset, Swindon and ... E38000231 SICBL BA England NHS Bath and North East Somerset, Swindon and ... E54000040 Gloucester, BSW, BNSSG and Somerset
2 Betsi Cadwaladr University Health Board W11000023 LHB BC Wales NaN NaN NaN
3 NHS Bedfordshire, Luton and Milton Keynes ICB ... E38000249 SICBL BD England NHS Bedfordshire, Luton and Milton Keynes Inte... E54000024 East of England (South)
4 NHS Black Country ICB - D2P2L E38000259 SICBL BL England NHS Black Country Integrated Care Board E54000062 North Midlands

Finally a file to link LSOA to MSOA:

df_msoa = pd.read_csv(
    os.path.join(paths.data, 'lsoa_2021', 'lsoa_to_msoa.csv'),
    index_col='lsoa11nm'
)

# Rename index to match df_lsoa:
df_msoa.index.name = 'LSOA'

df_msoa.head()
lsoa11cd msoa11cd ladcd msoa11nm country
LSOA
City of London 001A E01000001 E02000001 E09000001 City of London 001 E
City of London 001B E01000002 E02000001 E09000001 City of London 001 E
City of London 001C E01000003 E02000001 E09000001 City of London 001 E
City of London 001E E01000005 E02000001 E09000001 City of London 001 E
Barking and Dagenham 016A E01000006 E02000017 E09000002 Barking and Dagenham 016 E

Merge region shapes#

For each column of region data, merge the shapes of all LSOAs that belong to each region.

Save a copy of the merged shapes.

def merge_lsoas_into_region(gdf, col):
    # Keep only the requested column:
    gdf = gdf.copy()
    gdf = gdf[['geometry', col]]
    # Dissolve by value:
    gdf = gdf.dissolve(by=col)
    gdf = gdf.reset_index()
    return gdf
gdf_lsoa.columns
Index(['LSOA11NM', 'LSOA11CD', 'geometry', 'closest_ivt_unit',
       'closest_mt_unit', 'closest_mt_transfer', 'la_district_name_2019',
       'rural_urban_2011', 'ambulance_service', 'local_authority_district_22',
       'LAD22NM', 'country', 'msoa11cd', 'lsoa_code', 'region', 'region_code',
       'region_type', 'short_code', 'icb', 'icb_code', 'isdn', 'lhb',
       'icb_lhb', 'england_wales'],
      dtype='object')
cols_to_merge = [
    'closest_ivt_unit',
    'closest_mt_unit',
    'closest_mt_transfer',
    'rural_urban_2011',
    'ambulance_service',
    'LAD22NM',
    'country',
    'region_code',
    'icb_code',
    'isdn',
    'lhb',
    'icb_lhb',
    'msoa11cd',
    'england_wales'
]

for col in cols_to_merge:
    gdf_merged = merge_lsoas_into_region(gdf_lsoa, col)
    gdf_merged.to_file(
        os.path.join(paths.data, paths.shapefiles, f'outline_{col}.geojson'))