Skip to content

Commit 436561f

Browse files
committed
Skip correlation calculation in ctx if it already exists in the adj
input file
1 parent ea9e6de commit 436561f

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

src/pyscenic/utils.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,18 @@ def iter_modules(adjc, context):
261261
# Relationship between TF and its target, i.e. activator or repressor, is derived using the original expression
262262
# profiles. The Pearson product-moment correlation coefficient is used to derive this information.
263263

264-
# Add correlation column and create two disjoint set of adjacencies.
265-
LOGGER.info("Calculating Pearson correlations.")
266-
# test for genes present in the adjacencies but not present in the expression matrix:
267-
unique_adj_genes = set(adjacencies[COLUMN_NAME_TF]).union(set(adjacencies[COLUMN_NAME_TARGET])) - set(ex_mtx.columns)
268-
assert len(unique_adj_genes)==0, f"Found {len(unique_adj_genes)} genes present in the network (adjacencies) output, but missing from the expression matrix. Is this a different gene expression matrix?"
269-
LOGGER.warn(f"Note on correlation calculation: the default behaviour for calculating the correlations has changed after pySCENIC verion 0.9.16. Previously, the default was to calculate the correlation between a TF and target gene using only cells with non-zero expression values (mask_dropouts=True). The current default is now to use all cells to match the behavior of the R verision of SCENIC. The original settings can be retained by setting 'rho_mask_dropouts=True' in the modules_from_adjacencies function, or '--mask_dropouts' from the CLI.\n\tDropout masking is currently set to [{rho_mask_dropouts}].")
270-
adjacencies = add_correlation(adjacencies, ex_mtx,
271-
rho_threshold=rho_threshold, mask_dropouts=rho_mask_dropouts)
264+
if not {'regulation', 'rho'}.issubset(adjacencies.columns):
265+
# Add correlation column and create two disjoint set of adjacencies.
266+
LOGGER.info("Calculating Pearson correlations.")
267+
# test for genes present in the adjacencies but not present in the expression matrix:
268+
unique_adj_genes = set(adjacencies[COLUMN_NAME_TF]).union(set(adjacencies[COLUMN_NAME_TARGET])) - set(ex_mtx.columns)
269+
assert len(unique_adj_genes)==0, f"Found {len(unique_adj_genes)} genes present in the network (adjacencies) output, but missing from the expression matrix. Is this a different gene expression matrix?"
270+
LOGGER.warn(f"Note on correlation calculation: the default behaviour for calculating the correlations has changed after pySCENIC verion 0.9.16. Previously, the default was to calculate the correlation between a TF and target gene using only cells with non-zero expression values (mask_dropouts=True). The current default is now to use all cells to match the behavior of the R verision of SCENIC. The original settings can be retained by setting 'rho_mask_dropouts=True' in the modules_from_adjacencies function, or '--mask_dropouts' from the CLI.\n\tDropout masking is currently set to [{rho_mask_dropouts}].")
271+
adjacencies = add_correlation(adjacencies, ex_mtx,
272+
rho_threshold=rho_threshold, mask_dropouts=rho_mask_dropouts)
273+
else:
274+
LOGGER.info("Using existing Pearson correlations from the adjacencies file.")
275+
272276
activating_modules = adjacencies[adjacencies[COLUMN_NAME_REGULATION] > 0.0]
273277
if keep_only_activating:
274278
modules_iter = iter_modules(activating_modules, frozenset([ACTIVATING_MODULE]))

0 commit comments

Comments
 (0)