Extract hospital performance for pathway model#

Aims#

  • Extract and save hospital performance for pathway simulation model

  • Create breakdowns by weekend/weekday/day/night

Import libraries#

import numpy as np
import pandas as pd

Load data#

  • Load data

  • Restrict data to fields necessary for pathway extraction

  • Remove in-hospital admissions

# Load data
data_loaded = pd.read_csv(
    '../data/2019-11-04-HQIP303-Exeter_MA.csv', low_memory=False)

# Number of years data covers
data_years = 3.0
# Restrict fields
used_fields = [
    'StrokeTeam',
    'MoreEqual80y',
    'S1Gender',
    'S1OnsetInHospital',
    'S1OnsetToArrival_min',
    'S1AdmissionHour',
    'S1AdmissionDay',
    'S1OnsetTimeType',
    'S2BrainImagingTime_min',
    'S2StrokeType',
    'S2Thrombolysis',
    'S2ThrombolysisTime_min']

data_loaded = data_loaded[used_fields]
# Remove in hospital admissions
mask = data_loaded['S1OnsetInHospital'] == 'No'
data_loaded = data_loaded[mask]

Extract hospital performance#

def analyse_by_team(input_data):
    
    # Copy data
    data = input_data.copy()
    
    # Set up results lists
    stroke_team = []
    admissions = []
    age_80_plus = []
    onset_known = []
    known_arrival_within_4hrs = []
    onset_arrival_mins_mu = []
    onset_arrival_mins_sigma = []
    scan_within_4_hrs = []
    arrival_scan_arrival_mins_mu = []
    arrival_scan_arrival_mins_sigma = []
    onset_scan_4_hrs = []
    scan_needle_mins_mu = []
    scan_needle_mins_sigma = []
    thrombolysis_rate = []
    eligible = []
    
    # Split data by stroke team
    groups = data.groupby('StrokeTeam') # creates a new object of groups of data
    group_count = 0
    for index, group_df in groups: # each group has an index + dataframe of data
        group_count += 1

        # Record stroke team
        stroke_team.append(index)

        # Record admission numbers
        admissions.append(group_df.shape[0])

        # Get thrombolysis rate
        thrombolysed = group_df['S2Thrombolysis'] == 'Yes'
        thrombolysis_rate.append(thrombolysed.mean())

        # Record onset known proportion and remove rest
        f = lambda x: x in ['Precise', 'Best estimate']
        mask = group_df['S1OnsetTimeType'].apply(f)
        onset_known.append(mask.mean())
        group_df = group_df[mask]

        # Record onset <4 hours and remove rest
        mask = group_df['S1OnsetToArrival_min'] <= 240
        known_arrival_within_4hrs.append(mask.mean())
        group_df = group_df[mask]

        # Calc proportion 80+ (of those arriving within 4 hours)
        age_filter = group_df['MoreEqual80y'] == 'Yes'
        age_80_plus.append(age_filter.mean())

        # Log mean/sd of onset to arrival
        ln_onset_to_arrival = np.log(group_df['S1OnsetToArrival_min'])
        onset_arrival_mins_mu.append(ln_onset_to_arrival.mean())
        onset_arrival_mins_sigma.append(ln_onset_to_arrival.std())

        # Record scan within 4 hours of arrival (and remove the rest)
        mask = group_df['S2BrainImagingTime_min'] <= 240
        scan_within_4_hrs.append(mask.mean())
        group_df = group_df[mask]
        
        # Log mean/sd of arrival to scan
        ln_arrival_to_scan = np.log(group_df['S2BrainImagingTime_min'])
        arrival_scan_arrival_mins_mu.append(ln_arrival_to_scan.mean())
        arrival_scan_arrival_mins_sigma.append(ln_arrival_to_scan.std())
        
        # Record onset to scan in 4 hours and remove rest
        mask = (group_df['S1OnsetToArrival_min'] + 
                group_df['S2BrainImagingTime_min']) <= 240
        onset_scan_4_hrs.append(mask.mean())
        group_df = group_df[mask]

        # Thrombolysis given (to remaining patients)
        thrombolysed = group_df['S2Thrombolysis'] == 'Yes'
        eligible.append(thrombolysed.mean())

        # Scan to need (Replace any zero scan to needle times with 1)
        mask = group_df['S2ThrombolysisTime_min'] > 0
        thrombolysed = group_df[mask]
        scan_to_needle = (thrombolysed['S2ThrombolysisTime_min'] - 
                          thrombolysed['S2BrainImagingTime_min'])
        mask = scan_to_needle == 0
        scan_to_needle[mask] = 1
        ln_scan_to_needle = np.log(scan_to_needle)
        scan_needle_mins_mu.append(ln_scan_to_needle.mean())
        scan_needle_mins_sigma.append(ln_scan_to_needle.std())
        
    df = pd.DataFrame()
    df['stroke_team'] = stroke_team
    df['thrombolysis_rate'] = thrombolysis_rate
    df['admissions'] = admissions
    df['admissions'] = df['admissions'] /data_years
    df['80_plus'] = age_80_plus
    df['onset_known'] = onset_known
    df['known_arrival_within_4hrs'] = known_arrival_within_4hrs
    df['onset_arrival_mins_mu'] = onset_arrival_mins_mu
    df['onset_arrival_mins_sigma'] = onset_arrival_mins_sigma
    df['scan_within_4_hrs'] = scan_within_4_hrs
    df['arrival_scan_arrival_mins_mu'] = arrival_scan_arrival_mins_mu
    df['arrival_scan_arrival_mins_sigma'] = arrival_scan_arrival_mins_sigma
    df['onset_scan_4_hrs'] = onset_scan_4_hrs
    df['eligable'] = eligible
    df['scan_needle_mins_mu'] = scan_needle_mins_mu
    df['scan_needle_mins_sigma'] = scan_needle_mins_sigma
    
    return df
    
df_all = analyse_by_team(data_loaded)

# Limit to hosp with > 100 admissions/year and >10 thrombolysis in total
admissions = df_all['admissions']
thrombolysed = admissions * df_all['thrombolysis_rate']
mask = (admissions >= 100) & (thrombolysed >= 3.3333)
df_all = df_all[mask]

# Save
df_all.to_csv('hosp_performance_output/hospital_performance.csv', index=False)

# Show data for five hopsitals
df_all.head().T
2 4 6 7 9
stroke_team AGNOF1041H AKCGO9726K AOBTM3098N APXEE8191H ATDID5461S
thrombolysis_rate 0.154839 0.158892 0.085885 0.098634 0.090689
admissions 671.666667 1143.333333 500.666667 439.333333 275.666667
80_plus 0.425459 0.395658 0.48547 0.515679 0.533546
onset_known 0.635236 0.970845 0.619174 0.716237 0.573156
known_arrival_within_4hrs 0.68125 0.428829 0.629032 0.608051 0.660338
onset_arrival_mins_mu 4.576874 4.625486 4.603918 4.590357 4.427826
onset_arrival_mins_sigma 0.557598 0.597451 0.584882 0.496452 0.591373
scan_within_4_hrs 0.965596 0.955882 0.935043 0.966899 0.878594
arrival_scan_arrival_mins_mu 1.6657 2.834183 3.471419 3.31293 4.12569
arrival_scan_arrival_mins_sigma 1.497966 0.999719 1.254744 0.714465 0.549301
onset_scan_4_hrs 0.935867 0.908425 0.846435 0.904505 0.865455
eligable 0.388325 0.419355 0.267819 0.258964 0.315126
scan_needle_mins_mu 3.669602 2.904479 3.694918 3.585094 3.497262
scan_needle_mins_sigma 0.664462 0.874818 0.518929 0.751204 0.608126

Limit full data to units with at least 300 admissions#

units_with_300_admissions = list(set(df_all['stroke_team']))
mask = data_loaded['StrokeTeam'].isin(units_with_300_admissions)
data_restricted = data_loaded[mask]

Produce results for day/night and weekday/weekend#

day_time_values = ['09:00 to 11:59', '12:00 to 14:59', '15:00 to 17:59']
values = data_restricted['S1AdmissionHour'].isin(day_time_values)
data_restricted = data_restricted.assign(day_time=values)    
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
values = data_restricted['S1AdmissionDay'].isin(weekdays)
data_restricted = data_restricted.assign(mon_fri=values)

Weekday

mask = data_restricted['mon_fri']
df = data_restricted[mask]
df = analyse_by_team(df)
df.to_csv(
    'hosp_performance_output/hospital_performance_weekday.csv', index=False)

Weekday day

mask = data_restricted['day_time'] & data_restricted['mon_fri']
df = data_restricted[mask]
df = analyse_by_team(df)
df.to_csv(
    'hosp_performance_output/hospital_performance_weekday_day.csv', index=False)

Weekday night

mask = data_restricted['day_time'] == False & data_restricted['mon_fri']
df = data_restricted[mask]
df = analyse_by_team(df)
df.to_csv(
    'hosp_performance_output/hospital_performance_weekday_night.csv', index=False)

Weekend

mask = data_restricted['mon_fri'] == False
df = data_restricted[mask]
df = analyse_by_team(df)
df.to_csv(
    'hosp_performance_output/hospital_performance_weekend.csv', index=False)

Weekend day

mask = data_restricted['day_time'] & data_restricted['mon_fri'] == False
df = data_restricted[mask]
df = analyse_by_team(df)
df.to_csv(
    'hosp_performance_output/hospital_performance_weekend_day.csv', index=False)

Weekend night

mask = (
    data_restricted['day_time'] == False) & (data_restricted['mon_fri'] == False)
df = data_restricted[mask]
df = analyse_by_team(df)
df.to_csv(
    'hosp_performance_output/hospital_performance_weekend_night.csv', index=False)