@@ -599,21 +599,31 @@ def main():
599599 required_files = [in_overall_fp , in_avg_fp , in_zscores_fp , in_freq_fp , in_sess_fp ]
600600 missing = [f for f in required_files if not os .path .exists (f )]
601601 if missing :
602- print (f"[SKIP] Missing required files:" )
602+ print (f"[SKIP] Missing required core files:" )
603603 for f in missing :
604604 print (f" - { f } " )
605- print (f" Run zscore_calculation.py (and clustering.py for cluster graphs) first" )
605+
606+ print (f" Run transition_edges.py first" )
606607 continue
607608
608609 print (f"[INFO] Loading data..." )
609- overall_df = pd .read_csv (in_overall_fp , low_memory = False )
610- avg_df = pd .read_csv (in_avg_fp , low_memory = False ) # raw unfiltered
611- zscores_df = pd .read_csv (in_zscores_fp , low_memory = False )
612- zfilt_df = pd .read_csv (in_zfilt_fp , low_memory = False ) if os .path .exists (in_zfilt_fp ) else pd .DataFrame ()
613-
614- for df in [overall_df , avg_df , zscores_df ]:
615- required_cols = {"team_number" , "from" , "to" , "count" }
616- missing_cols = required_cols - set (df .columns )
610+ overall_df = pd .read_csv (in_overall_fp , low_memory = False )
611+ avg_df = pd .read_csv (in_avg_fp , low_memory = False )
612+
613+ dfs_to_normalize = [overall_df , avg_df ]
614+
615+ has_clusters = os .path .exists (in_cluster_fp ) and os .path .exists (in_zfilt_fp )
616+ if has_clusters :
617+ zfilt_df = pd .read_csv (in_zfilt_fp , low_memory = False )
618+ dfs_to_normalize .append (zfilt_df )
619+ else :
620+ zfilt_df = pd .DataFrame ()
621+
622+ # normalize team_number to string
623+ for df in dfs_to_normalize :
624+ required = {"team_number" , "from" , "to" , "count" }
625+ missing_cols = required - set (df .columns )
626+
617627 if missing_cols :
618628 print (f"[ERROR] Missing columns: { missing_cols } " )
619629 break
0 commit comments