2020
2121warnings .filterwarnings ("ignore" )
2222
23+ rng = np .random .default_rng (42 )
24+
25+ def bootstrap_corr (x , y , method = "Pearson" , confidence_interval = 0.95 , n_bootstraps = 2000 , seed = 42 , verbose = True ):
26+ x = np .asarray (x )
27+ y = np .asarray (y )
28+
29+ corr_func = pearsonr
30+ if method == "Spearman" :
31+ corr_func = spearmanr
32+
33+ r_observed , p_value = corr_func (x , y )
34+ n_samples = len (x )
35+
36+ inds = np .arange (n_samples )
37+ boot_corrs = []
38+ for _ in range (n_bootstraps ):
39+ resampled_inds = rng .choice (inds , size = n_samples , replace = True )
40+ rx = x [resampled_inds ]
41+ ry = y [resampled_inds ]
42+ boot_r , _ = corr_func (rx , ry )
43+ boot_corrs .append (boot_r )
44+ alpha = (1.0 - confidence_interval )/ 2.0
45+ ci_low = np .nanpercentile (boot_corrs , 100 * alpha )
46+ ci_high = np .nanpercentile (boot_corrs , 100 * (1.0 - alpha ))
47+
48+ if verbose :
49+ print (f'Number of samples: { n_samples } ' )
50+ print (f'{ method } Correlation: { r_observed :.3g} | 95% CI: [{ ci_low :.2f} , { ci_high :.2f} ] | p-value: { p_value :.3g} ' )
51+
52+ return r_observed , p_value , ci_low , ci_high
53+
2354def run_all_analyses ():
2455 """
2556 Run all analysis functions
@@ -29,6 +60,8 @@ def run_all_analyses():
2960
3061 df = io .load_image_analysis_extracted_features ()
3162
63+ print ("Loaded image analysis extracted features and started running analyses..." )
64+
3265 plot_area_at_glass_all_data (df , FIGS_DIR , OUT_TYPE )
3366 plot_area_at_glass_h2b (df , FIGS_DIR , OUT_TYPE )
3467 plot_migration_timing_all_data (df , FIGS_DIR , OUT_TYPE )
@@ -611,10 +644,8 @@ def plot_gene_expression_experiments(df, figs_dir, out_type):
611644 migration = df_g ['Migration Onset Time (Footprint Area Based)' ][[cond in val for val in df_g ['Experimental Condition' ].values ]].dropna ()
612645 metric = df_g ['gene_metric' ][[cond in val for val in df_g ['Experimental Condition' ].values ]].dropna ()
613646
614- pearson , p_pvalue = pearsonr (migration , metric )
615- spearman , s_pvalue = spearmanr (migration , metric )
616- print (f'Pearson Correlation: { pearson :.3g} | p-value: { p_pvalue :.3g} ' )
617- print (f'Spearman Correlation: { spearman :.3g} | p-value: { s_pvalue :.3g} ' )
647+ pearson , p_pvalue , _ , _ = bootstrap_corr (migration , metric , "Pearson" )
648+ spearman , s_pvalue , _ , _ = bootstrap_corr (migration , metric , "Spearman" )
618649
619650 X = df_g ['Migration Onset Time (Footprint Area Based)' ][[cond in val for val in df_g ['Experimental Condition' ].values ]].dropna ()
620651 Y = df_g ['gene_metric' ][[cond in val for val in df_g ['Experimental Condition' ].values ]].dropna ()
@@ -638,11 +669,9 @@ def plot_gene_expression_experiments(df, figs_dir, out_type):
638669 migration = df_g ['Migration Onset Time (Footprint Area Based)' ]
639670 metric = df_g ['gene_metric' ]
640671
641- pearson , p_pvalue = pearsonr (migration , metric )
642- spearman , s_pvalue = spearmanr (migration , metric )
643- print (f'Pearson Correlation: { pearson :.3g} | p-value: { p_pvalue :.3g} ' )
644- print (f'Spearman Correlation: { spearman :.3g} | p-value: { s_pvalue :.3g} ' )
645-
672+ pearson , p_pvalue , _ , _ = bootstrap_corr (migration , metric , "Pearson" )
673+ spearman , s_pvalue , _ , _ = bootstrap_corr (migration , metric , "Spearman" )
674+
646675 X = df_g ['Migration Onset Time (Footprint Area Based)' ]
647676 Y = df_g ['gene_metric' ]
648677
@@ -719,6 +748,8 @@ def plot_collagenase_analysis(df, figs_dir, out_type):
719748 X = df_gene ['Collagenease concentration (ug/mL)' ]
720749 Y = df_gene ['Migration Onset Time (Footprint Area Based)' ]
721750
751+ _ = bootstrap_corr (X , Y , "Spearman" )
752+
722753 # It's important to add a constant (intercept) to the model
723754 X = sm .add_constant (X )
724755
@@ -747,7 +778,6 @@ def plot_collagenase_analysis(df, figs_dir, out_type):
747778 else :
748779 print ("\n Conclusion: The p-value for the slope is not less than 0.05, so we cannot conclude there is a significant linear relationship." )
749780
750-
751781def plot_mmp_inhibitor_migration (df , figs_dir , out_type ):
752782 """
753783 Parameters
@@ -1015,8 +1045,8 @@ def plot_inside_outside_migration_timing(df, figs_dir, out_type):
10151045 X = dfio_scatter ['Migration Onset Time (Footprint Area Based)' ]
10161046 Y = dfio_scatter ['Migration Onset Time (Inside/Outside Basement Membrane Based)' ]
10171047
1018- p_results = pearsonr (X .values , Y .values )
1019- r_results = spearmanr (X .values , Y .values )
1048+ p_results = bootstrap_corr (X .values , Y .values , "Pearson" )
1049+ r_results = bootstrap_corr (X .values , Y .values , "Spearman" )
10201050 print ('n: {0:d}' .format (n_movies_io ))
10211051 print ('Pearson Correlation: {0:.3g} | p-Value: {1:.3g}' .format (p_results .statistic , p_results .pvalue ))
10221052 print ('Spearman Correlation: {0:.3g} | p-Value: {1:.3g}' .format (r_results .statistic , r_results .pvalue ))
0 commit comments