Fix UnboundLocalError error in onerun_SEIR#585
Conversation
| return out_df | ||
| return None |
There was a problem hiding this comment.
is returning None the right answer here? if the prior logical branch isn't visited, are there any side effects here? i guess the timer's, but that seems like a weird use case.
feels more like we should always be creating and returning the df, irrespective of whether it gets stored to disk
|
echoing @pearsonca wouldn't something like this be better? def onerun_SEIR(
sim_id2write: int,
modinf: ModelInfo,
load_ID: bool = False,
sim_id2load: int = None,
config=None,
) -> pd.DataFrame:
npi = None
if modinf.npi_config_seir:
npi = build_npi_SEIR(
modinf=modinf, load_ID=load_ID, sim_id2load=sim_id2load, config=config
)
with Timer("onerun_SEIR.compartments"):
(
unique_strings,
transition_array,
proportion_array,
proportion_info,
) = modinf.compartments.get_transition_array()
with Timer("onerun_SEIR.seeding"):
if load_ID:
initial_conditions = modinf.initial_conditions.get_from_file(
sim_id2load, modinf=modinf
)
else:
initial_conditions = modinf.initial_conditions.get_from_config(
sim_id2write, modinf=modinf
)
seeding_data, seeding_amounts = modinf.get_seeding_data(
sim_id=sim_id2load if load_ID else sim_id2write
)
with Timer("onerun_SEIR.parameters"):
if load_ID:
p_draw = modinf.parameters.parameters_load(
param_df=modinf.read_simID(ftype="spar", sim_id=sim_id2load),
n_days=modinf.n_days,
nsubpops=modinf.nsubpops,
)
else:
p_draw = modinf.parameters.parameters_quick_draw(
n_days=modinf.n_days, nsubpops=modinf.nsubpops
)
parameters = modinf.parameters.parameters_reduce(p_draw, npi)
log_debug_parameters(p_draw, "Parameters without seir_modifiers")
log_debug_parameters(parameters, "Parameters with seir_modifiers")
parsed_parameters = modinf.compartments.parse_parameters(
parameters, modinf.parameters.pnames, unique_strings
)
log_debug_parameters(parsed_parameters, "Unique Parameters used by transitions")
with Timer("onerun_SEIR.compute"):
states = steps_SEIR(
modinf,
parsed_parameters,
transition_array,
proportion_array,
proportion_info,
initial_conditions,
seeding_data,
seeding_amounts,
)
with Timer("onerun_SEIR.postprocess"):
if modinf.write_csv or modinf.write_parquet:
write_spar_snpi(sim_id2write, modinf, p_draw, npi)
out_df = write_seir(sim_id2write, modinf, states)
else:
out_df = states
return out_df |
|
looking at the internals of more broadly: this is another case of the general problem throughout flepimop of entangling calculation and IO. I think somewhere it might be a config parsing error if at least one of write_csv, write_parquet aren't true (might also be a parsing error if both are). |
|
strong agree @pearsonca
strong agree, I'll add it to my list of things to try and disentangle |
Yeah, that's a good point, should be a quick edit to the commit. |
af32caa to
4a99977
Compare
|
so tracking back the code @TimothyWillard @pearsonca the function write_spar_snpi in the seir module is just a wrapper on this function from model info: def write_simID(
self,
ftype: str,
sim_id: int,
df: pd.DataFrame,
input: bool = False,
extension_override: str = "",
):
fname = self.get_filename(
ftype=ftype,
sim_id=sim_id,
input=input,
extension_override=extension_override,
)
# create the directory if it does exists:
os.makedirs(os.path.dirname(fname), exist_ok=True)
# print(f"Writing {fname}")
write_df(
fname=fname,
df=df,
)
return fnamedef write_seir(sim_id, modinf: ModelInfo, states):
# print_disk_diagnosis()
out_df = states2Df(modinf, states)
modinf.write_simID(ftype="seir", sim_id=sim_id, df=out_df)
return out_dfthus, if I'm falling the logic correctly as written the edit will save the data frame to disk even if both flags are set to false, we just want to call states2Df as you do (function below for context, lives in seir module): def states2Df(modinf: ModelInfo, states):
# Tidyup data for R, to save it:
#
# Write output to .snpi.*, .spar.*, and .seir.* files
# states is # both are [ndays x ncompartments x nspatial_nodes ] -> this is important here
# add line of zero to diff, so we get the real cumulative.
# states_diff = np.zeros((states_cumu.shape[0] + 1, *states_cumu.shape[1:]))
# states_diff[1:, :, :] = states_cumu
# states_diff = np.diff(states_diff, axis=0)
ts_index = pd.MultiIndex.from_product(
[
pd.date_range(modinf.ti, modinf.tf, freq="D"),
modinf.compartments.compartments["name"],
],
names=["date", "mc_name"],
)
# prevalence data, we use multi.index dataframe, sparring us the array manipulation we use to do
prev_df = pd.DataFrame(
data=states["prevalence"]
.to_numpy()
.reshape(modinf.n_days * modinf.compartments.get_ncomp(), modinf.nsubpops),
index=ts_index,
columns=modinf.subpop_struct.subpop_names,
).reset_index()
prev_df = pd.merge(
left=modinf.compartments.get_compartments_explicitDF(),
right=prev_df,
how="right",
on="mc_name",
)
prev_df.insert(loc=0, column="mc_value_type", value="prevalence")
ts_index = pd.MultiIndex.from_product(
[
pd.date_range(modinf.ti, modinf.tf, freq="D"),
modinf.compartments.compartments["name"],
],
names=["date", "mc_name"],
)
incid_df = pd.DataFrame(
data=states["incidence"]
.to_numpy()
.reshape(modinf.n_days * modinf.compartments.get_ncomp(), modinf.nsubpops),
index=ts_index,
columns=modinf.subpop_struct.subpop_names,
).reset_index()
incid_df = pd.merge(
left=modinf.compartments.get_compartments_explicitDF(),
right=incid_df,
how="right",
on="mc_name",
)
incid_df.insert(loc=0, column="mc_value_type", value="incidence")
out_df = pd.concat((incid_df, prev_df), axis=0).set_index("date")
out_df["date"] = out_df.index
return out_df
and then just return |
jc-macdonald
left a comment
There was a problem hiding this comment.
looks all good to me
Could reach an `UnboundedLocalError` if `modinf.write_csv` and `modinf.write_parquet` were both `False` as `out_df` would not be set. Solution is to move the call to `states2DF` from `write_seir` to `onerun_SEIR` and return the `out_df` regardless of the `modinf.write_*` attributes.
4a99977 to
f9c0cfb
Compare
Follow up to #585 to account for the arguments of `gempyor.seir.write_seir` taking the out dataframe instead of the states, leaving it to the user to call `gempyor.seir.states2DF`.
Follow up to #585 to account for the arguments of `gempyor.seir.write_seir` taking the out dataframe instead of the states, leaving it to the user to call `gempyor.seir.states2DF`.
Follow up to #585 to account for the arguments of `gempyor.seir.write_seir` taking the out dataframe instead of the states, leaving it to the user to call `gempyor.seir.states2DF`.
Follow up to #585 to account for the arguments of `gempyor.seir.write_seir` taking the out dataframe instead of the states, leaving it to the user to call `gempyor.seir.states2DF`.
Could reach an
UnboundedLocalErrorifmodinf.write_csvandmodinf.write_parquetwere bothFalseasout_dfwould not be set. Solution is to just returnNonein this case.