{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create region-averaged data\n",
    "\n",
    "> This is a copy of notebook 3 (\"Create maps\") with the mapping elements removed.\n",
    "\n",
    "Average the data for all LSOAs in a given region to find new values for e.g. population weighted mean IMD score per IVT catchment, or mean transfer time by ambulance trust.\n",
    "\n",
    "Most of this notebook involves:\n",
    "+ Load the data for all LSOAs\n",
    "+ Pick out only the LSOAs in a given region\n",
    "+ Average or sum the data for those LSOAs\n",
    "\n",
    "Notes:\n",
    "* Change from Notebook 3: ethnicity now looks at \"white British\" vs. \"other than white British\" groups rather than \"white\" vs \"other than white\"."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1 Set up"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1 Import libraries and define file paths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataclasses import dataclass\n",
    "import geopandas as gpd\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "from pandas.api.types import is_numeric_dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define file paths\n",
    "@dataclass(frozen=True)\n",
    "class Paths:\n",
    "    '''Singleton object for storing paths to data and database.'''\n",
    "\n",
    "    data = './data'\n",
    "    collated = 'collated_data_amb.csv'\n",
    "\n",
    "    hospitals = 'hospitals'\n",
    "    stroke_hospitals = 'stroke_hospitals_2022.csv'\n",
    "\n",
    "    shapefiles = 'shapefiles'\n",
    "    lsoa_shp = ('Lower_layer_super_output_areas_(E+W)_2011_Boundaries_' +\n",
    "                '(Generalised_Clipped)_V2.zip')\n",
    "\n",
    "\n",
    "paths = Paths()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2 Load data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**LSOA information** - load `collated_data.csv` produced by `01_combine_demographic_data.ipynb` which has information on each LSOA."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LSOA</th>\n",
       "      <th>admissions</th>\n",
       "      <th>closest_ivt_unit</th>\n",
       "      <th>closest_ivt_unit_time</th>\n",
       "      <th>closest_mt_unit</th>\n",
       "      <th>closest_mt_unit_time</th>\n",
       "      <th>closest_mt_transfer</th>\n",
       "      <th>closest_mt_transfer_time</th>\n",
       "      <th>total_mt_time</th>\n",
       "      <th>ivt_rate</th>\n",
       "      <th>...</th>\n",
       "      <th>lsoa_code</th>\n",
       "      <th>region</th>\n",
       "      <th>region_code</th>\n",
       "      <th>region_type</th>\n",
       "      <th>short_code</th>\n",
       "      <th>icb</th>\n",
       "      <th>icb_code</th>\n",
       "      <th>isdn</th>\n",
       "      <th>lhb</th>\n",
       "      <th>icb_lhb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Welwyn Hatfield 010F</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>SG14AB</td>\n",
       "      <td>18.7</td>\n",
       "      <td>NW12BU</td>\n",
       "      <td>36.9</td>\n",
       "      <td>CB20QQ</td>\n",
       "      <td>39.1</td>\n",
       "      <td>57.8</td>\n",
       "      <td>6.8</td>\n",
       "      <td>...</td>\n",
       "      <td>E01033311</td>\n",
       "      <td>NHS Hertfordshire and West Essex ICB - 06K</td>\n",
       "      <td>E38000049</td>\n",
       "      <td>SICBL</td>\n",
       "      <td>HE1</td>\n",
       "      <td>NHS Hertfordshire and West Essex Integrated Ca...</td>\n",
       "      <td>E54000025</td>\n",
       "      <td>East of England (South)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NHS Hertfordshire and West Essex Integrated Ca...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Welwyn Hatfield 012A</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>SG14AB</td>\n",
       "      <td>19.8</td>\n",
       "      <td>NW12BU</td>\n",
       "      <td>36.9</td>\n",
       "      <td>CB20QQ</td>\n",
       "      <td>39.1</td>\n",
       "      <td>58.9</td>\n",
       "      <td>6.8</td>\n",
       "      <td>...</td>\n",
       "      <td>E01023920</td>\n",
       "      <td>NHS Hertfordshire and West Essex ICB - 06K</td>\n",
       "      <td>E38000049</td>\n",
       "      <td>SICBL</td>\n",
       "      <td>HE1</td>\n",
       "      <td>NHS Hertfordshire and West Essex Integrated Ca...</td>\n",
       "      <td>E54000025</td>\n",
       "      <td>East of England (South)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NHS Hertfordshire and West Essex Integrated Ca...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 128 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   LSOA  admissions closest_ivt_unit  closest_ivt_unit_time  \\\n",
       "0  Welwyn Hatfield 010F    0.666667           SG14AB                   18.7   \n",
       "1  Welwyn Hatfield 012A    4.000000           SG14AB                   19.8   \n",
       "\n",
       "  closest_mt_unit  closest_mt_unit_time closest_mt_transfer  \\\n",
       "0          NW12BU                  36.9              CB20QQ   \n",
       "1          NW12BU                  36.9              CB20QQ   \n",
       "\n",
       "   closest_mt_transfer_time  total_mt_time  ivt_rate  ...  lsoa_code  \\\n",
       "0                      39.1           57.8       6.8  ...  E01033311   \n",
       "1                      39.1           58.9       6.8  ...  E01023920   \n",
       "\n",
       "                                       region  region_code  region_type  \\\n",
       "0  NHS Hertfordshire and West Essex ICB - 06K    E38000049        SICBL   \n",
       "1  NHS Hertfordshire and West Essex ICB - 06K    E38000049        SICBL   \n",
       "\n",
       "   short_code                                                icb   icb_code  \\\n",
       "0         HE1  NHS Hertfordshire and West Essex Integrated Ca...  E54000025   \n",
       "1         HE1  NHS Hertfordshire and West Essex Integrated Ca...  E54000025   \n",
       "\n",
       "                      isdn  lhb  \\\n",
       "0  East of England (South)  NaN   \n",
       "1  East of England (South)  NaN   \n",
       "\n",
       "                                             icb_lhb  \n",
       "0  NHS Hertfordshire and West Essex Integrated Ca...  \n",
       "1  NHS Hertfordshire and West Essex Integrated Ca...  \n",
       "\n",
       "[2 rows x 128 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lsoa = pd.read_csv(os.path.join(paths.data, paths.collated))\n",
    "\n",
    "# Load in extra region information:\n",
    "df_lsoa_regions = pd.read_csv('data/lsoa_2021/lsoa_regions.csv', index_col=0)\n",
    "# Remove any repeated columns:\n",
    "cols_to_keep = [c for c in df_lsoa_regions if c not in df_lsoa.columns]\n",
    "# Merge with region information:\n",
    "df_lsoa = pd.merge(df_lsoa, df_lsoa_regions[cols_to_keep],\n",
    "                   left_on='LSOA', right_on='LSOA11NM', how='left')\n",
    "\n",
    "df_lsoa.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check the contents of this dataframe:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LSOA\n",
      "admissions\n",
      "closest_ivt_unit\n",
      "closest_ivt_unit_time\n",
      "closest_mt_unit\n",
      "closest_mt_unit_time\n",
      "closest_mt_transfer\n",
      "closest_mt_transfer_time\n",
      "total_mt_time\n",
      "ivt_rate\n",
      "imd_2019_score\n",
      "la_district_name_2019\n",
      "income_domain_score\n",
      "income_domain_rank\n",
      "idaci_score\n",
      "idaci_rank\n",
      "idaopi_score\n",
      "idaopi_rank\n",
      "ethnic_group_all_categories_ethnic_group\n",
      "ethnic_group_white_total\n",
      "ethnic_group_white_english_welsh_scottish_northern_irish_british\n",
      "ethnic_group_white_irish\n",
      "ethnic_group_white_gypsy_or_irish_traveller\n",
      "ethnic_group_white_other_white\n",
      "ethnic_group_mixed_multiple_ethnic_group_total\n",
      "ethnic_group_mixed_multiple_ethnic_group_white_and_black_caribbean\n",
      "ethnic_group_mixed_multiple_ethnic_group_white_and_black_african\n",
      "ethnic_group_mixed_multiple_ethnic_group_white_and_asian\n",
      "ethnic_group_mixed_multiple_ethnic_group_other_mixed\n",
      "ethnic_group_asian_asian_british_total\n",
      "ethnic_group_asian_asian_british_indian\n",
      "ethnic_group_asian_asian_british_pakistani\n",
      "ethnic_group_asian_asian_british_bangladeshi\n",
      "ethnic_group_asian_asian_british_chinese\n",
      "ethnic_group_asian_asian_british_other_asian\n",
      "ethnic_group_black_african_caribbean_black_british_total\n",
      "ethnic_group_black_african_caribbean_black_british_african\n",
      "ethnic_group_black_african_caribbean_black_british_caribbean\n",
      "ethnic_group_black_african_caribbean_black_british_other_black\n",
      "ethnic_group_other_ethnic_group_total\n",
      "ethnic_group_other_ethnic_group_arab\n",
      "ethnic_group_other_ethnic_group_any_other_ethnic_group\n",
      "all_categories_general_health\n",
      "very_good_or_good_health\n",
      "fair_health\n",
      "bad_or_very_bad_health\n",
      "all_categories_long_term_health_problem_or_disability\n",
      "day_to_day_activities_limited_a_lot\n",
      "day_to_day_activities_limited_a_little\n",
      "day_to_day_activities_not_limited\n",
      "rural_urban_2011\n",
      "population_all\n",
      "age_band_all_0\n",
      "age_band_all_5\n",
      "age_band_all_10\n",
      "age_band_all_15\n",
      "age_band_all_20\n",
      "age_band_all_25\n",
      "age_band_all_30\n",
      "age_band_all_35\n",
      "age_band_all_40\n",
      "age_band_all_45\n",
      "age_band_all_50\n",
      "age_band_all_55\n",
      "age_band_all_60\n",
      "age_band_all_65\n",
      "age_band_all_70\n",
      "age_band_all_75\n",
      "age_band_all_80\n",
      "age_band_all_85\n",
      "age_band_all_90\n",
      "population_females\n",
      "age_band_females_0\n",
      "age_band_females_5\n",
      "age_band_females_10\n",
      "age_band_females_15\n",
      "age_band_females_20\n",
      "age_band_females_25\n",
      "age_band_females_30\n",
      "age_band_females_35\n",
      "age_band_females_40\n",
      "age_band_females_45\n",
      "age_band_females_50\n",
      "age_band_females_55\n",
      "age_band_females_60\n",
      "age_band_females_65\n",
      "age_band_females_70\n",
      "age_band_females_75\n",
      "age_band_females_80\n",
      "age_band_females_85\n",
      "age_band_females_90\n",
      "population_males\n",
      "age_band_males_0\n",
      "age_band_males_5\n",
      "age_band_males_10\n",
      "age_band_males_15\n",
      "age_band_males_20\n",
      "age_band_males_25\n",
      "age_band_males_30\n",
      "age_band_males_35\n",
      "age_band_males_40\n",
      "age_band_males_45\n",
      "age_band_males_50\n",
      "age_band_males_55\n",
      "age_band_males_60\n",
      "age_band_males_65\n",
      "age_band_males_70\n",
      "age_band_males_75\n",
      "age_band_males_80\n",
      "age_band_males_85\n",
      "age_band_males_90\n",
      "ambulance_service\n",
      "local_authority_district_22\n",
      "LAD22NM\n",
      "country\n",
      "LSOA11NM\n",
      "LSOA11CD\n",
      "msoa11cd\n",
      "lsoa_code\n",
      "region\n",
      "region_code\n",
      "region_type\n",
      "short_code\n",
      "icb\n",
      "icb_code\n",
      "isdn\n",
      "lhb\n",
      "icb_lhb\n"
     ]
    }
   ],
   "source": [
    "for c in df_lsoa.columns:\n",
    "    print(c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Hospital data** - load data on each hospital, with aim of getting locations of hospitals that provide IVT."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Hospital_name</th>\n",
       "      <th>Use_IVT</th>\n",
       "      <th>Use_MT</th>\n",
       "      <th>Use_MSU</th>\n",
       "      <th>Country</th>\n",
       "      <th>Strategic Clinical Network</th>\n",
       "      <th>Health Board / Trust</th>\n",
       "      <th>Stroke Team</th>\n",
       "      <th>SSNAP name</th>\n",
       "      <th>...</th>\n",
       "      <th>ivt_rate</th>\n",
       "      <th>Easting</th>\n",
       "      <th>Northing</th>\n",
       "      <th>long</th>\n",
       "      <th>lat</th>\n",
       "      <th>Neuroscience</th>\n",
       "      <th>30 England Thrombectomy Example</th>\n",
       "      <th>hospital_city</th>\n",
       "      <th>Notes</th>\n",
       "      <th>geometry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>RM70AG</td>\n",
       "      <td>RM70AG</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>England</td>\n",
       "      <td>London SCN</td>\n",
       "      <td>Barking</td>\n",
       "      <td>Havering and Redbridge University Hospitals N...</td>\n",
       "      <td>Queens Hospital Romford HASU</td>\n",
       "      <td>...</td>\n",
       "      <td>11.9</td>\n",
       "      <td>551118</td>\n",
       "      <td>187780</td>\n",
       "      <td>0.179030640661934</td>\n",
       "      <td>51.5686465521504</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Romford</td>\n",
       "      <td></td>\n",
       "      <td>POINT (19932.617 6722504.230)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>England</td>\n",
       "      <td>London SCN</td>\n",
       "      <td>Barts Health NHS Trust</td>\n",
       "      <td>The Royal London Hospital</td>\n",
       "      <td>Royal London Hospital HASU</td>\n",
       "      <td>...</td>\n",
       "      <td>13.4</td>\n",
       "      <td>534829</td>\n",
       "      <td>181798</td>\n",
       "      <td>-0.0581329916047372</td>\n",
       "      <td>51.5190178361295</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Royal London</td>\n",
       "      <td></td>\n",
       "      <td>POINT (-6468.377 6713620.832)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postcode Hospital_name Use_IVT Use_MT Use_MSU  Country  \\\n",
       "0   RM70AG        RM70AG       1      1       1  England   \n",
       "1    E11BB         E11BB       1      1       1  England   \n",
       "\n",
       "  Strategic Clinical Network    Health Board / Trust  \\\n",
       "0                 London SCN                 Barking   \n",
       "1                 London SCN  Barts Health NHS Trust   \n",
       "\n",
       "                                         Stroke Team  \\\n",
       "0   Havering and Redbridge University Hospitals N...   \n",
       "1                          The Royal London Hospital   \n",
       "\n",
       "                     SSNAP name  ... ivt_rate Easting Northing  \\\n",
       "0  Queens Hospital Romford HASU  ...     11.9  551118   187780   \n",
       "1    Royal London Hospital HASU  ...     13.4  534829   181798   \n",
       "\n",
       "                  long               lat Neuroscience  \\\n",
       "0    0.179030640661934  51.5686465521504            1   \n",
       "1  -0.0581329916047372  51.5190178361295            1   \n",
       "\n",
       "  30 England Thrombectomy Example hospital_city Notes  \\\n",
       "0                               0       Romford         \n",
       "1                               1  Royal London         \n",
       "\n",
       "                        geometry  \n",
       "0  POINT (19932.617 6722504.230)  \n",
       "1  POINT (-6468.377 6713620.832)  \n",
       "\n",
       "[2 rows x 22 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Read in data on each hospital\n",
    "gdf_units = gpd.read_file(\n",
    "    os.path.join(paths.data, paths.hospitals, paths.stroke_hospitals))\n",
    "\n",
    "# Combine get geometry from Easting and Northing columns\n",
    "gdf_units[\"geometry\"] = gpd.points_from_xy(\n",
    "        gdf_units.Easting, gdf_units.Northing)\n",
    "gdf_units = gdf_units.set_crs(epsg=27700)\n",
    "\n",
    "# Restrict to the units delivering thrombolysis\n",
    "mask = gdf_units['Use_IVT'] == '1'\n",
    "gdf_units = gdf_units[mask]\n",
    "\n",
    "# Convert crs to epsg 3857\n",
    "gdf_units = gdf_units.to_crs(epsg=3857)\n",
    "\n",
    "# Preview dataframe\n",
    "gdf_units.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__Area data__ - Import geojson of all LSOA to calculate area of each region.\n",
    "\n",
    "(use the proper Office for National Statistics file, not the simplified one):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LSOA11CD</th>\n",
       "      <th>LSOA</th>\n",
       "      <th>LSOA11NMW</th>\n",
       "      <th>geometry</th>\n",
       "      <th>polygon_area_km2</th>\n",
       "      <th>closest_ivt_unit</th>\n",
       "      <th>closest_mt_unit</th>\n",
       "      <th>closest_mt_transfer</th>\n",
       "      <th>la_district_name_2019</th>\n",
       "      <th>rural_urban_2011</th>\n",
       "      <th>...</th>\n",
       "      <th>lsoa_code</th>\n",
       "      <th>region</th>\n",
       "      <th>region_code</th>\n",
       "      <th>region_type</th>\n",
       "      <th>short_code</th>\n",
       "      <th>icb</th>\n",
       "      <th>icb_code</th>\n",
       "      <th>isdn</th>\n",
       "      <th>lhb</th>\n",
       "      <th>icb_lhb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>E01000001</td>\n",
       "      <td>City of London 001A</td>\n",
       "      <td>City of London 001A</td>\n",
       "      <td>POLYGON ((532105.092 182011.230, 532162.491 18...</td>\n",
       "      <td>0.343907</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>City of London</td>\n",
       "      <td>Urban major conurbation</td>\n",
       "      <td>...</td>\n",
       "      <td>E01000001</td>\n",
       "      <td>NHS North East London ICB - A3A8R</td>\n",
       "      <td>E38000255</td>\n",
       "      <td>SICBL</td>\n",
       "      <td>NEL</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "      <td>E54000029</td>\n",
       "      <td>London</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>E01000002</td>\n",
       "      <td>City of London 001B</td>\n",
       "      <td>City of London 001B</td>\n",
       "      <td>POLYGON ((532746.813 181786.891, 532671.688 18...</td>\n",
       "      <td>0.583474</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>City of London</td>\n",
       "      <td>Urban major conurbation</td>\n",
       "      <td>...</td>\n",
       "      <td>E01000002</td>\n",
       "      <td>NHS North East London ICB - A3A8R</td>\n",
       "      <td>E38000255</td>\n",
       "      <td>SICBL</td>\n",
       "      <td>NEL</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "      <td>E54000029</td>\n",
       "      <td>London</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>E01000003</td>\n",
       "      <td>City of London 001C</td>\n",
       "      <td>City of London 001C</td>\n",
       "      <td>POLYGON ((532135.145 182198.119, 532158.250 18...</td>\n",
       "      <td>0.147840</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>City of London</td>\n",
       "      <td>Urban major conurbation</td>\n",
       "      <td>...</td>\n",
       "      <td>E01000003</td>\n",
       "      <td>NHS North East London ICB - A3A8R</td>\n",
       "      <td>E38000255</td>\n",
       "      <td>SICBL</td>\n",
       "      <td>NEL</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "      <td>E54000029</td>\n",
       "      <td>London</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>E01000005</td>\n",
       "      <td>City of London 001E</td>\n",
       "      <td>City of London 001E</td>\n",
       "      <td>POLYGON ((533807.946 180767.770, 533649.063 18...</td>\n",
       "      <td>0.491918</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>E11BB</td>\n",
       "      <td>City of London</td>\n",
       "      <td>Urban major conurbation</td>\n",
       "      <td>...</td>\n",
       "      <td>E01000005</td>\n",
       "      <td>NHS North East London ICB - A3A8R</td>\n",
       "      <td>E38000255</td>\n",
       "      <td>SICBL</td>\n",
       "      <td>NEL</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "      <td>E54000029</td>\n",
       "      <td>London</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>E01000006</td>\n",
       "      <td>Barking and Dagenham 016A</td>\n",
       "      <td>Barking and Dagenham 016A</td>\n",
       "      <td>POLYGON ((545122.049 184314.931, 545271.917 18...</td>\n",
       "      <td>0.372257</td>\n",
       "      <td>RM70AG</td>\n",
       "      <td>RM70AG</td>\n",
       "      <td>RM70AG</td>\n",
       "      <td>Barking and Dagenham</td>\n",
       "      <td>Urban major conurbation</td>\n",
       "      <td>...</td>\n",
       "      <td>E01000006</td>\n",
       "      <td>NHS North East London ICB - A3A8R</td>\n",
       "      <td>E38000255</td>\n",
       "      <td>SICBL</td>\n",
       "      <td>NEL</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "      <td>E54000029</td>\n",
       "      <td>London</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NHS North East London Integrated Care Board</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 25 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    LSOA11CD                       LSOA                  LSOA11NMW  \\\n",
       "0  E01000001        City of London 001A        City of London 001A   \n",
       "1  E01000002        City of London 001B        City of London 001B   \n",
       "2  E01000003        City of London 001C        City of London 001C   \n",
       "3  E01000005        City of London 001E        City of London 001E   \n",
       "4  E01000006  Barking and Dagenham 016A  Barking and Dagenham 016A   \n",
       "\n",
       "                                            geometry  polygon_area_km2  \\\n",
       "0  POLYGON ((532105.092 182011.230, 532162.491 18...          0.343907   \n",
       "1  POLYGON ((532746.813 181786.891, 532671.688 18...          0.583474   \n",
       "2  POLYGON ((532135.145 182198.119, 532158.250 18...          0.147840   \n",
       "3  POLYGON ((533807.946 180767.770, 533649.063 18...          0.491918   \n",
       "4  POLYGON ((545122.049 184314.931, 545271.917 18...          0.372257   \n",
       "\n",
       "  closest_ivt_unit closest_mt_unit closest_mt_transfer la_district_name_2019  \\\n",
       "0            E11BB           E11BB               E11BB        City of London   \n",
       "1            E11BB           E11BB               E11BB        City of London   \n",
       "2            E11BB           E11BB               E11BB        City of London   \n",
       "3            E11BB           E11BB               E11BB        City of London   \n",
       "4           RM70AG          RM70AG              RM70AG  Barking and Dagenham   \n",
       "\n",
       "          rural_urban_2011  ...  lsoa_code                             region  \\\n",
       "0  Urban major conurbation  ...  E01000001  NHS North East London ICB - A3A8R   \n",
       "1  Urban major conurbation  ...  E01000002  NHS North East London ICB - A3A8R   \n",
       "2  Urban major conurbation  ...  E01000003  NHS North East London ICB - A3A8R   \n",
       "3  Urban major conurbation  ...  E01000005  NHS North East London ICB - A3A8R   \n",
       "4  Urban major conurbation  ...  E01000006  NHS North East London ICB - A3A8R   \n",
       "\n",
       "  region_code region_type short_code  \\\n",
       "0   E38000255       SICBL        NEL   \n",
       "1   E38000255       SICBL        NEL   \n",
       "2   E38000255       SICBL        NEL   \n",
       "3   E38000255       SICBL        NEL   \n",
       "4   E38000255       SICBL        NEL   \n",
       "\n",
       "                                           icb   icb_code    isdn  lhb  \\\n",
       "0  NHS North East London Integrated Care Board  E54000029  London  NaN   \n",
       "1  NHS North East London Integrated Care Board  E54000029  London  NaN   \n",
       "2  NHS North East London Integrated Care Board  E54000029  London  NaN   \n",
       "3  NHS North East London Integrated Care Board  E54000029  London  NaN   \n",
       "4  NHS North East London Integrated Care Board  E54000029  London  NaN   \n",
       "\n",
       "                                       icb_lhb  \n",
       "0  NHS North East London Integrated Care Board  \n",
       "1  NHS North East London Integrated Care Board  \n",
       "2  NHS North East London Integrated Care Board  \n",
       "3  NHS North East London Integrated Care Board  \n",
       "4  NHS North East London Integrated Care Board  \n",
       "\n",
       "[5 rows x 25 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdf_lsoa = gpd.read_file(os.path.join(\n",
    "    paths.data, paths.shapefiles, paths.lsoa_shp),\n",
    "    crs='EPSG:27700')\n",
    "\n",
    "# Change the projection to a Cartesian system (EPSG:3857) and get area in\n",
    "# square kilometers (through /10**6)\n",
    "gdf_lsoa['polygon_area_km2'] = (gdf_lsoa.to_crs('EPSG:3857').area / 10**6)\n",
    "\n",
    "# Merge with region information:\n",
    "df_lsoa_regions = pd.read_csv('data/lsoa_2021/lsoa_regions.csv', index_col=0)\n",
    "\n",
    "gdf_lsoa = pd.merge(gdf_lsoa, df_lsoa_regions.drop('LSOA11NM', axis='columns'),\n",
    "                    on='LSOA11CD', how='left')\n",
    "\n",
    "# Rename LSOA column to match df_lsoa:\n",
    "gdf_lsoa = gdf_lsoa.rename(columns={'LSOA11NM': 'LSOA'})\n",
    "\n",
    "gdf_lsoa.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3 Define functions for calculating weighted mean and proportions\n",
    "\n",
    "Group data by specified geography (e.g. closest IVT unit) and calculate the weighted average of specified columns - [see stackoverflow tutorial here](https://stackoverflow.com/questions/31521027/groupby-weighted-average-and-sum-in-pandas-dataframe)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def weighted_av(df, group_by, col, new_col, weight='population_all'):\n",
    "    '''\n",
    "    Groupby specified column and then find weighted average of another column\n",
    "    Returns normal mean and weighted mean\n",
    "    Inputs:\n",
    "    - df - dataframe, contains data and columns to do calculation\n",
    "    - group_by - string, column to group by\n",
    "    - col - string, column to find average of\n",
    "    - new_col - string, name of column which will contain weighted mean\n",
    "    - weight - string, name of column to weight by, default 'population_all'\n",
    "    '''\n",
    "    res = (df\n",
    "           .groupby(group_by)\n",
    "           .apply(lambda x: pd.Series([np.mean(x[col]),\n",
    "                                       np.average(x[col], weights=x[weight])],\n",
    "                                      index=['mean', new_col])))\n",
    "    return (res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Group data by specific geography (e.g. ambulance trust) and calculate proportion."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_proportion(subgroup_col, overall_col, proportion_col, group_col, df):\n",
    "    '''\n",
    "    Groupby a particular geography, and find a proportion from two provided\n",
    "    columns\n",
    "    Inputs:\n",
    "    subgroup_col - string, numerator, has population counts for a subgroup\n",
    "    overall_col - string, denominator, has population count overall\n",
    "    proportion_col - string, name for the column produced\n",
    "    group_col - string, column to groupby when calculate proportions\n",
    "    df - dataframe, with columns used above\n",
    "    '''\n",
    "    # Sum the columns for each group\n",
    "    res = df.groupby(group_col).agg({\n",
    "        subgroup_col: 'sum',\n",
    "        overall_col: 'sum'\n",
    "    })\n",
    "\n",
    "    # Find the proportions for each group\n",
    "    res[proportion_col] = res[subgroup_col] / res[overall_col]\n",
    "\n",
    "    return res"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find counts of people in each category, then calculate proportion."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_count_and_proportion(df_lsoa, area, col, count, prop):\n",
    "    '''\n",
    "    Finds count of people in each category for each area (based on LSOA info\n",
    "    and counts), and then calculates proportion in particular category.\n",
    "    Requires col to contain 'True' 'False' where 'True' is the proportion you\n",
    "    want to find (so calculates 'True' / 'True'+'False')\n",
    "    Inputs:\n",
    "    - df_lsoa - dataframe with information on each LSOA\n",
    "    - area - string, column to group areas by\n",
    "    - col - string, column with data of interest\n",
    "    - count - string, column with population count\n",
    "    - prop - string, name of new column with proportion\n",
    "    '''\n",
    "    # Find count for each area of people in each category\n",
    "    counts = (df_lsoa.groupby([area, col])[count].sum())\n",
    "\n",
    "    # Reformat dataframe\n",
    "    counts = counts.reset_index(name='n')\n",
    "    counts = counts.pivot(\n",
    "        index=area,\n",
    "        columns=col,\n",
    "        values='n').reset_index().rename_axis(None, axis=1)\n",
    "\n",
    "    # Set NaN to 0\n",
    "    counts = counts.replace(np.nan, 0)\n",
    "\n",
    "    # Find proportion\n",
    "    counts[prop] = counts['True'] / (counts['True'] + counts['False'])\n",
    "\n",
    "    # Set index to the area unit\n",
    "    counts = counts.set_index(area)\n",
    "\n",
    "    # Rename 'True' and 'False' columns to make their meanings clearer\n",
    "    # when the data is merged into another dataframe:\n",
    "    s = '_'.join(prop.split('_')[1:])\n",
    "    counts = counts.rename(columns={'True': f'{s}_True', 'False': f'{s}_False'})\n",
    "\n",
    "    return (counts)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Calculate new LSOA-level data\n",
    "\n",
    "Must create the following columns in df_lsoa:\n",
    "+ `age_65_plus_count`\n",
    "+ `ethnic_group_other_than_white_british`\n",
    "+ `long_term_health_count`\n",
    "+ `rural`\n",
    "+ `ivt_within_30`\n",
    "+ `closest_hospital_is_mt`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add count of \"other than white British\"\n",
    "df_lsoa['ethnic_group_other_than_white_british'] = (\n",
    "    df_lsoa['ethnic_group_all_categories_ethnic_group'] -\n",
    "    df_lsoa['ethnic_group_white_english_welsh_scottish_northern_irish_british']\n",
    ")\n",
    "\n",
    "# Add column with count of long-term health problem (limited a little or alot)\n",
    "df_lsoa['long_term_health_count'] = (\n",
    "    df_lsoa['all_categories_long_term_health_problem_or_disability'] -\n",
    "    df_lsoa['day_to_day_activities_not_limited']\n",
    ")\n",
    "\n",
    "# Extract if is rural or if is urban (each classification starts with that)\n",
    "df_lsoa['ruc_overall'] = df_lsoa['rural_urban_2011'].str[:5]\n",
    "# Add column where True if rural\n",
    "df_lsoa['rural'] = (df_lsoa['ruc_overall'] == 'Rural').map({\n",
    "    True: 'True', False: 'False'})\n",
    "\n",
    "# Find count of people age 65 plus\n",
    "df_lsoa['age_65_plus_count'] = df_lsoa[[\n",
    "    'age_band_all_65', 'age_band_all_70', 'age_band_all_75',\n",
    "    'age_band_all_80', 'age_band_all_85', 'age_band_all_90']].sum(axis=1)\n",
    "\n",
    "# Create column indicating if IVT unit is within 30 minutes\n",
    "df_lsoa['ivt_within_30'] = (df_lsoa['closest_ivt_unit_time'] < 30).map(\n",
    "    {True: 'True', False: 'False'})\n",
    "\n",
    "# Add column with whether closest hospital offers thrombectomy\n",
    "df_lsoa['closest_hospital_is_mt'] = (\n",
    "    df_lsoa['closest_mt_transfer_time'] == 0).map({True: 'True',\n",
    "                                                   False: 'False'})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Big function for region-level data creation\n",
    "\n",
    "Create one big function that does all of the calculations needed for averaging data across any given region."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def big_data_creation(df_lsoa, gdf_lsoa, group_by):\n",
    "    df_lsoa = df_lsoa.copy()  # just in case\n",
    "    gdf_lsoa = gdf_lsoa.copy()  # just in case\n",
    "    # Find all of the possible options for this region type:\n",
    "    index_values = list(set(df_lsoa[group_by]))\n",
    "    try:\n",
    "        index_values = sorted(index_values)\n",
    "    except TypeError:\n",
    "        pass\n",
    "    \n",
    "    # Empty dataframe to place the results into:\n",
    "    df_results = pd.DataFrame(index=index_values)\n",
    "\n",
    "    # Find population of each region:\n",
    "    pop = df_lsoa[[group_by, 'population_all']].groupby(group_by)['population_all'].sum()\n",
    "    # Find area of each region:\n",
    "    gdf_lsoa = gdf_lsoa[[group_by, 'polygon_area_km2']].groupby(group_by).sum()\n",
    "    gdf_lsoa = pd.merge(gdf_lsoa, pop, left_index=True, right_index=True)\n",
    "    # Divide population by area to get population density\n",
    "    gdf_lsoa['population_density'] = (\n",
    "        gdf_lsoa['population_all'] / gdf_lsoa['polygon_area_km2'])\n",
    "    \n",
    "    df_results = pd.merge(df_results, gdf_lsoa.drop('population_all', axis='columns'),\n",
    "                          left_index=True, right_index=True, how='left')\n",
    "    \n",
    "    # --- Weighted averages ---\n",
    "    # Pre-requisites:\n",
    "    # Must have already calculated the following columns in df_lsoa:\n",
    "    # 'age_65_plus_count'\n",
    "    \n",
    "    cols_for_weighted_av = [\n",
    "        # Population-weighted mean income domain score:\n",
    "        dict(col='income_domain_score',\n",
    "             new_col='income_domain_weighted_mean'),\n",
    "        # Population-weighted mean IMD score:\n",
    "        dict(col='imd_2019_score', new_col='imd_weighted_mean'),\n",
    "        # Average travel time to IVT unit weighted by number of\n",
    "        # over 65 year olds in catchment:\n",
    "        dict(col='closest_ivt_unit_time', new_col='weighted_ivt_time',\n",
    "             weight='age_65_plus_count'),\n",
    "        # Population-weighted mean time to MT unit:\n",
    "        dict(col='total_mt_time', new_col='mt_time_weighted_mean'),\n",
    "        # Population-weighted mean time to IVT unit:\n",
    "        dict(col='closest_ivt_unit_time',\n",
    "             new_col='ivt_time_weighted_mean'),\n",
    "        # Population-weighted mean transfer time between IVT and MT unit:\n",
    "        dict(col='closest_mt_transfer_time',\n",
    "             new_col='mt_transfer_time_weighted_mean'),   \n",
    "    ]\n",
    "    \n",
    "    for kwargs in cols_for_weighted_av:\n",
    "        # Calculate results.\n",
    "        # 'weighted_av()' function returns a df with index named group_by and\n",
    "        # columns named 'mean' and new_col.\n",
    "        df_w = weighted_av(\n",
    "            df_lsoa,\n",
    "            group_by=group_by,\n",
    "            **kwargs\n",
    "        )\n",
    "        # Store in big results df.\n",
    "        # Discard 'mean', keep new_col.\n",
    "        df_results = pd.merge(df_results, df_w[kwargs['new_col']],\n",
    "                              left_index=True, right_index=True, how='left')\n",
    "    \n",
    "    # --- Find proportions ---\n",
    "    # Pre-requisites:\n",
    "    # Must have already calculated the following columns in df_lsoa:\n",
    "    # 'ethnic_group_other_than_white_british'\n",
    "    # 'long_term_health_count'\n",
    "    # 'age_65_plus_count'\n",
    "    cols_for_proportion = [\n",
    "        # Bad or very bad general health\n",
    "        dict(subgroup_col='ethnic_group_other_than_white_british',\n",
    "             overall_col='ethnic_group_all_categories_ethnic_group',\n",
    "             proportion_col='ethnic_minority_proportion',\n",
    "             df=df_lsoa),\n",
    "        # General health - proportion with bad/very bad health\n",
    "        dict(subgroup_col='bad_or_very_bad_health',\n",
    "             overall_col='all_categories_general_health',\n",
    "             proportion_col='bad_health_proportion',\n",
    "             df=df_lsoa),\n",
    "        # Long-term health problem or disability\n",
    "        dict(subgroup_col='long_term_health_count',\n",
    "             overall_col='all_categories_long_term_health_problem_or_disability',\n",
    "             proportion_col='long_term_health_proportion',\n",
    "             df=df_lsoa),\n",
    "        # Age - proportion aged 65+\n",
    "        dict(subgroup_col='age_65_plus_count',\n",
    "             overall_col='population_all',\n",
    "             proportion_col='age_65_plus_proportion',\n",
    "             df=df_lsoa),\n",
    "    ]\n",
    "    # 'find_proportion()' function.\n",
    "    # Notebook 3 stored the proportion_col column, discarded the rest.\n",
    "    # Here, keep all resulting columns. First is number of True,\n",
    "    # second is total number (True + False), third is proportion.\n",
    "    for kwargs in cols_for_proportion:\n",
    "        # Calculate results.\n",
    "        df_p = find_proportion(group_col=group_by, **kwargs)\n",
    "        \n",
    "        # Store in big results df.\n",
    "        df_results = pd.merge(df_results, df_p,\n",
    "                              left_index=True, right_index=True, how='left')\n",
    "    \n",
    "    # --- Find count and proportions ---\n",
    "    # Pre-requisites:\n",
    "    # Must have already calculated the following columns in df_lsoa:\n",
    "    # 'rural'\n",
    "    # 'ivt_within_30'\n",
    "    # 'closest_hospital_is_mt'\n",
    "    cols_for_count_and_proportion = [\n",
    "        # Rural/urban - proportion of people living in rural areas\n",
    "        dict(col='rural', count='population_all', prop='proportion_rural'),\n",
    "        # Proportion of 65+ year olds living within 30 minutes of their closest IVT unit\n",
    "        dict(col='ivt_within_30', count='age_65_plus_count',\n",
    "             prop='proportion_over_65_within_30'),\n",
    "        # Proportion of patients whose nearest hospital is an MT unit\n",
    "        dict(col='closest_hospital_is_mt', count='population_all',\n",
    "             prop='proportion_closest_is_mt'),\n",
    "    ]\n",
    "    # 'find_count_and_proportion()' function.\n",
    "    for kwargs in cols_for_count_and_proportion:\n",
    "        # Calculate results.\n",
    "        df_c = find_count_and_proportion(df_lsoa=df_lsoa, area=group_by, **kwargs)\n",
    "        \n",
    "        # Store in big results df.\n",
    "        df_results = pd.merge(df_results, df_c,\n",
    "                              left_index=True, right_index=True, how='left')\n",
    "    return df_results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the big data collection function for each different region type:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "region_types = [\n",
    "    'LSOA',\n",
    "    'closest_ivt_unit',\n",
    "    'closest_mt_unit',\n",
    "    'closest_mt_transfer',\n",
    "    'ambulance_service',\n",
    "    # 'LAD22NM',\n",
    "    'icb_code',\n",
    "    'isdn',\n",
    "    'lhb',\n",
    "    'icb_lhb'\n",
    "]\n",
    "\n",
    "for region_type in region_types:\n",
    "    df_results = big_data_creation(df_lsoa, gdf_lsoa, region_type)\n",
    "\n",
    "    if region_type == 'closest_ivt_unit':\n",
    "        # Do bonus calculations.\n",
    "        # Extract rate for each unit, with unit as index\n",
    "        ivt_rate = (df_lsoa[['closest_ivt_unit', 'ivt_rate']]\n",
    "                    .drop_duplicates()\n",
    "                    .set_index('closest_ivt_unit')['ivt_rate'])\n",
    "        df_results = pd.merge(df_results, ivt_rate,\n",
    "                              left_index=True, right_index=True, how='left')\n",
    "\n",
    "        # Get admissions to each unit from 2021/22\n",
    "        admissions_2122 = (\n",
    "            gdf_units[['Postcode', 'Admissions 21/22']]\n",
    "               .set_index('Postcode')\n",
    "               .rename(columns={'Admissions 21/22': 'admissions_2122'})\n",
    "               .astype(int)\n",
    "        )\n",
    "        df_results = pd.merge(df_results, admissions_2122,\n",
    "                              left_index=True, right_index=True, how='left')\n",
    "    else:\n",
    "        # Don't do anything extra.\n",
    "        pass\n",
    "\n",
    "    # Make sure data is not overly-precise before saving:\n",
    "    cols_to_round = df_results.select_dtypes(include=['float']).columns\n",
    "    df_results[cols_to_round] = df_results[cols_to_round].round(4)\n",
    "\n",
    "    # Set index name explicitly to make sure something gets saved:\n",
    "    df_results.index.name = region_type\n",
    "    \n",
    "    # Save to file:\n",
    "    df_results.to_csv(f'data/collated_data_by_region/collated_data_regional_{region_type}.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}