Pathway code#

The following is the code for pathway simulation.

# import required modules
import numpy as np
import pandas as pd
from math import sqrt
from scipy import stats 
    
# Model    
def model_ssnap_pathway_scenarios(hospital_performance, calibration=1.0):
    """
    Model of stroke pathway.
    
    Each scenario mimics 100 years of a stroke pathway. Patient times through 
    the pathway are sampled from distributions passed to the model using NumPy.
    
    Array columns:
     0: Patient aged 80+
     1: Allowable onset to needle time (may depend on age)
     2: Onset time known (boolean)
     3: Onset to arrival is less than 4 hours (boolean)
     4: Onset known and onset to arrival is less than 4 hours (boolean)
     5: Onset to arrival minutes
     6: Arrival to scan is less than 4 hours
     7: Arrival to scan minutes
     8: Minutes left to thrombolyse
     9: Onset time known and time left to thrombolyse
    10: Proportion ischaemic stroke (if they are filtered at this stage)
    11: Assign eligible for thrombolysis (for those scanned within 4 hrs of onset)
    12: Thrombolysis planned (scanned within time and eligible)
    13: Scan to needle time
    14: Clip onset to thrombolysis time to maximum allowed onset-to-thrombolysis
    15: Set baseline probability of good outcome based on age group
    16: Convert baseline probability good outcome to odds
    17: Calculate odds ratio of good outcome based on time to thrombolysis
    18: Patient odds of good outcome if given thrombolysis
    19: Patient probability of good outcome if given thrombolysis
    20: Clip patient probability of good outcome to minimum of zero
    21: Individual patient good outcome if given thrombolysis (boolean)*
    21: Individual patient good outcome if not given thrombolysis (boolean)*
    
    *Net population outcome is calculated here by summing probabilities of good
    outcome for all patients, rather than using individual outcomes. These columns
    are added for potential future use.
   
    """

    # Set up allowed time for thrombolysis (for under 80 and 80+)
    allowed_onset_to_needle = (270, 270)
    # Add allowed over-run 
    allowed_overrun_for_slow_scan_to_needle = 15
    # Set proportion of good outcomes for under 80 and 80+)
    good_outcome_base = (0.3499, 0.1318)

    # Set general model parameters
    scenario_counter = 0
    trials = 100
    
    # Set up dataframes
    
    results_columns = [
        'Baseline_good_outcomes_(median)',
        'Baseline_good_outcomes_per_1000_patients_(low_5%)',
        'Baseline_good_outcomes_per_1000_patients_(high_95%)',
        'Baseline_good_outcomes_per_1000_patients_(mean)',
        'Baseline_good_outcomes_per_1000_patients_(stdev)',
        'Baseline_good_outcomes_per_1000_patients_(95ci)',
        'Percent_Thrombolysis_(median%)',
        'Percent_Thrombolysis_(low_5%)',
        'Percent_Thrombolysis_(high_95%)',
        'Percent_Thrombolysis_(mean)',
        'Percent_Thrombolysis_(stdev)',
        'Percent_Thrombolysis_(95ci)',
        'Additional_good_outcomes_per_1000_patients_(median)',
        'Additional_good_outcomes_per_1000_patients_(low_5%)',
        'Additional_good_outcomes_per_1000_patients_(high_95%)',
        'Additional_good_outcomes_per_1000_patients_(mean)',
        'Additional_good_outcomes_per_1000_patients_(stdev)',
        'Additional_good_outcomes_per_1000_patients_(95ci)',
        'Onset_to_needle_(mean)']
    
    results_df = pd.DataFrame(columns=results_columns)
    
    # trial dataframe is set up each scenario, but define column names here
    # Rx = proportion given thrombolysis
    trial_columns = ['Baseline_good_outcomes',
                     'Rx',
                     'Additional_good_outcomes',
                     'onset_to_needle']
   
   
    # Iterate through hospitals
    for hospital in hospital_performance.iterrows():
        scenario_counter += 1
        print(f'Scenario {scenario_counter}', end='\r' )
    
        # Get data for one hospital
        hospital_name = hospital[0]
        hospital_data = hospital[1]
        run_data = hospital_data
    
        # Set up trial results dataframe
        trial_df = pd.DataFrame(columns=trial_columns)
    
        for trial in range(trials):
            # %Set up numpy table
            patient_array = []
            patients_per_run = int(run_data['admissions'])
            patient_array = np.zeros((patients_per_run, 23))
    
            patient_array[:, 0] = \
                np.random.binomial(1, run_data['80_plus'], patients_per_run)
    
            # Assign allowable onset to needle (for under 80 and 80+)
            patient_array[patient_array[:, 0] == 0, 1] = \
                allowed_onset_to_needle[0]
            patient_array[patient_array[:, 0] == 1, 1] = \
                allowed_onset_to_needle[1]
    
            # Assign onset time known
            patient_array[:, 2] = (np.random.binomial(
                1, run_data['onset_known'], patients_per_run) == 1)
    
            # Assign onset to arrival is less than 4 hours
            patient_array[:, 3] = (
                np.random.binomial(1, run_data['known_arrival_within_4hrs'], 
                patients_per_run))
    
            # Onset known and is within 4 hours
            patient_array[:, 4] = patient_array[:, 2] * patient_array[:, 3]
    
            # Assign onset to arrival time (natural log normal distribution) 
            mu = run_data['onset_arrival_mins_mu']
            sigma = run_data['onset_arrival_mins_sigma'] 
            patient_array[:, 5] = np.random.lognormal(
                mu, sigma, patients_per_run)
    
            # Assign arrival to scan is less than 4 hours
            patient_array[:, 6] = (
                np.random.binomial(1, run_data['scan_within_4_hrs'],
                patients_per_run))
    
            # Assign arrival to scan time (natural log normal distribution) 
            mu = run_data['arrival_scan_arrival_mins_mu']
            sigma = run_data['arrival_scan_arrival_mins_sigma']
            patient_array[:, 7] = np.random.lognormal(
                mu, sigma, patients_per_run)
    
            # Minutes left to thrombolyse after scan
            patient_array[:, 8] = patient_array[:, 1] - \
                    (patient_array[:, 5] + patient_array[:, 7])
    
            # Onset time known, scan in 4 hours and 15 min ime left to thrombolyse
            # (1 to proceed, 0 not to proceed)
            patient_array[:, 9] = (patient_array[:, 6] * patient_array[:, 4] * 
                (patient_array[:, 8] >= 15))
            
            # Ischaemic_stroke 
            # This is not used here - dealt with in 'eligble'. Set to 1.
            prop_ischaemic = 1 # run_data['ischaemic_stroke']
            patient_array[:, 10] = np.random.binomial(
                1, prop_ischaemic, patients_per_run)
    
            # Eligable for thrombolysis (proportion of ischaemic patients  
            # eligable for thrombolysis when scanned within 4 hrs )
            patient_array[:, 11] = (
                np.random.binomial(1, run_data['eligable'], patients_per_run))
    
            # Thrombolysis planned (checks this is within thrombolysys time, & 
            # patient considerd eligable for thrombolysis if scanned in time
            patient_array[:, 12] = (patient_array[:, 9] * patient_array[:, 10] *
                patient_array[:, 11])

            # scan to needle
            mu = run_data['scan_needle_mins_mu']
            sigma = run_data['scan_needle_mins_sigma']
            patient_array[:, 13] = np.random.lognormal(
                mu, sigma, patients_per_run)
    
            # Onset to needle 
            patient_array[:, 14] = \
                patient_array[:, 5] + patient_array[:, 7] + patient_array[:, 13]
            
            # Clip to 4.5 hrs + given allowance max
            patient_array[:, 14] = np.clip(patient_array[:, 14], 0, 270 + 
                allowed_overrun_for_slow_scan_to_needle)
    
            # Set baseline probability good outcome (based on age group)
            patient_array[patient_array[:, 0] == 0, 15] = good_outcome_base[0]
            patient_array[patient_array[:, 0] == 1, 15] = good_outcome_base[1]
    
            # Convert baseline probability to baseline odds
            patient_array[:, 16] = (patient_array[:, 15] /
                (1 - patient_array[:, 15]))
    
            # Calculate odds ratio based on time to treatment
            patient_array[:, 17] = 10 ** (0.326956 + 
                (-0.00086211 * patient_array[:, 14]))
    
            # Adjust odds of good outcome
            patient_array[:, 18] = patient_array[:, 16] * patient_array[:, 17]
    
            # Convert odds back to probability
            patient_array[:, 19] = (patient_array[:, 18] / 
                (1 + patient_array[:, 18]))
    
            # Improved probability of good outcome (calc changed probability 
            # then multiply by whether thrombolysis given)
            x = ((patient_array[:, 19] - patient_array[:, 15]) * 
                    patient_array[:, 12])
            
            y = np.zeros(patients_per_run)
            
            # remove any negative probabilities calculated
            # (can occur if long treatment windows set)
            patient_array[:, 20] = np.amax([x, y], axis=0)
    
            # Individual good ouctome due to thrombolysis 
            # This is not currently used in the analysis
            patient_array[:, 21] = np.random.binomial(
                1, patient_array[:, 20], patients_per_run)
    
            # Individual outcomes if no treatment given 
            patient_array[:, 22] = np.random.binomial(
                1, patient_array[:, 15], patients_per_run)
    
            # Calculate overall thrombolysis rate
            thrmobolysis_percent = patient_array[:, 12].mean() * 100
    
            # Baseline good outcomes per 1000 patients
            baseline_good_outcomes_per_1000_patients = (
                (patient_array[:, 22].sum() / patients_per_run) * 1000)
    
            # Calculate overall expected extra good outcomes
            additional_good_outcomes_per_1000_patients = (
                ((patient_array[:, 20].sum() / patients_per_run) * 1000))
            
            # Extract times for thrombolysis
            thrombolysis_results = pd.DataFrame()
            mask = patient_array[:,12] == 1
            thrombolysis_results['onset_to_arrival'] = patient_array[:,5]
            thrombolysis_results['arrival_to_scan'] = patient_array[:,7]
            thrombolysis_results['scan_to_needle'] = patient_array[:,13]
            thrombolysis_results['onset_to_needle'] = patient_array[:,14]
            
            onset_to_needle = \
                    thrombolysis_results['onset_to_needle'][mask].mean()
            
        
            # Save scenario results to dataframe
            result = [baseline_good_outcomes_per_1000_patients, 
                      thrmobolysis_percent,
                      additional_good_outcomes_per_1000_patients,
                      onset_to_needle]
            trial_df.loc[trial] = result
            
    
        trial_result = ([
            trial_df['Baseline_good_outcomes'].median(),
            trial_df['Baseline_good_outcomes'].quantile(0.05),
            trial_df['Baseline_good_outcomes'].quantile(0.95),
            trial_df['Baseline_good_outcomes'].mean(),
            trial_df['Baseline_good_outcomes'].std(),
            (trial_df['Baseline_good_outcomes'].mean() - 
                stats.norm.interval(0.95, loc=trial_df['Baseline_good_outcomes'].mean(),
                scale=trial_df['Baseline_good_outcomes'].std() / sqrt(trials))[0]),
            trial_df['Rx'].median(),
            trial_df['Rx'].quantile(0.05),
            trial_df['Rx'].quantile(0.95),
            trial_df['Rx'].mean(),
            trial_df['Rx'].std(),
            (trial_df['Rx'].mean() - stats.norm.interval(
                0.95, loc=trial_df['Rx'].mean(),
                scale=trial_df['Rx'].std() / sqrt(trials))[0]),
            trial_df['Additional_good_outcomes'].median(),
            trial_df['Additional_good_outcomes'].quantile(0.05),
            trial_df['Additional_good_outcomes'].quantile(0.95),
            trial_df['Additional_good_outcomes'].mean(),
            trial_df['Additional_good_outcomes'].std(),
            (trial_df['Additional_good_outcomes'].mean() - 
                stats.norm.interval(0.95, loc=trial_df['Additional_good_outcomes'].mean(),
                scale=trial_df['Additional_good_outcomes'].std() / sqrt(trials))[0]),
            trial_df['onset_to_needle'].mean()
            ])
        # add scenario results to results dataframe
        results_df.loc[hospital_name] = trial_result

    # Apply calibration
    results_df['calibration'] = calibration
    for col in list(results_df):
        if 'Percent_Thrombolysis' in col or 'Additional_good_outcomes' in col:
            results_df[col] *= calibration
    
    # round all results to 2 decimal places and return    
    results_df = results_df.round(2)
    return (results_df)