Skip to content

Commit c32ca68

Browse files
authored
added generate_tree function to Kinome object (#51)
* added generate_tree function to Kinome object * moved generate_tree function to utils.py; created generate_tree wrappers in enrichment results * improved type checking for NaN values in MEA * change named path to output_path
1 parent f82f8d8 commit c32ca68

File tree

6 files changed

+5588
-6
lines changed

6 files changed

+5588
-6
lines changed

CHANGELOG.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,48 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
Added
6+
7+
- New feature for generating kinome tree using same logic as website. Original kinome tree SVG sourced from [CORAL](http://phanstiel-lab.med.unc.edu/CORAL/)
8+
9+
## [1.5.0] - 2025-06-27
10+
11+
Added
12+
13+
- New feature for scoring all the possible phosphosites in an entire protein
14+
15+
## [1.4.1] - 2025-06-19
16+
17+
Added
18+
19+
- Function to retrieve label mapping
20+
21+
Changed
22+
23+
- Label retrieval method in enrichment modules
24+
25+
## [1.4.0] - 2025-06-18
26+
27+
Added
28+
29+
- New feature for displaying different label types in volcanoes: gene name, protein name, matrix name, or curated display name
30+
31+
## [1.3.0] - 2025-05-13
32+
33+
Added
34+
35+
- New feature for running MEA with custom kinase-substrate sets
36+
- The kinase-substrate sets used in the MEA to the results
37+
38+
Changed
39+
40+
- Kinase name in custom sets to uppercase for consistency
41+
42+
Fixed
43+
44+
- Description mispellings
45+
346
## [1.2.0] - 2025-04-15
447

548
Added

src/kinase_library/databases/kinase_data/base_tree.svg

Lines changed: 5286 additions & 0 deletions
Loading

src/kinase_library/enrichment/binary_enrichment.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,4 +615,40 @@ def plot_volcano(self, sig_lff=0, sig_pval=0.1, fg_percent_thresh=0, fg_percent_
615615
symmetric_xaxis=symmetric_xaxis, grid=grid, max_window=max_window,
616616
title=title, xlabel=xlabel, ylabel=ylabel,
617617
plot=plot, save_fig=save_fig, return_fig=return_fig,
618-
ax=ax, **plot_kwargs)
618+
ax=ax, **plot_kwargs)
619+
620+
def generate_tree(self, output_path, sort_by: str ='fisher_pval', sort_direction: str = 'ascending', filter_top: int = None, **kwargs):
621+
"""
622+
Generate a colored kinome tree from the enrichment results.
623+
624+
Parameters
625+
----------
626+
output_path : str
627+
Destination path for the generated kinome tree image.
628+
sort_by : str, optional
629+
Column name to sort the DataFrame by before generating the tree. Default is 'fisher_pval'.
630+
sort_direction : str, optional
631+
Direction to sort the DataFrame, either 'ascending' or 'descending'. Default is 'ascending'.
632+
filter_top : int, optional
633+
If provided, only the top N rows will be included in the tree. Default is None (no filtering).
634+
**kwargs : optional
635+
Additional keyword arguments to be passed to the `utils.generate_tree` function.
636+
"""
637+
638+
# Check if the sort_by column exists in the enrichment results
639+
if sort_by not in self.enrichment_results.columns:
640+
raise ValueError(f"Column '{sort_by}' not found in enrichment results. Available columns: {self.enrichment_results.columns.tolist()}")
641+
642+
# Check if the sort_direction is valid
643+
if sort_direction not in ['ascending', 'descending']:
644+
raise ValueError("sort_direction must be either 'ascending' or 'descending'.")
645+
646+
647+
# Sort the DataFrame based on the specified column and direction
648+
df = self.enrichment_results.sort_values(by=sort_by, ascending=(sort_direction == 'ascending'))
649+
650+
if filter_top is not None:
651+
df = df.head(filter_top)
652+
653+
# This kinome tree coloring will always be based on 'log2_freq_factor'
654+
return utils.generate_tree(df, output_path, "log2_freq_factor", { "high": 3.0, "middle": 0.0, "low": -3.0 }, **kwargs)

src/kinase_library/enrichment/differential_phosphorylation.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import matplotlib.pyplot as plt
1010
import warnings
1111

12-
from ..utils import _global_vars, exceptions
12+
from ..utils import _global_vars, exceptions, utils
1313
from ..modules import data, enrichment
1414
from ..objects import phosphoproteomics as pps
1515
from ..enrichment import binary_enrichment as be
@@ -899,4 +899,40 @@ def plot_down_up_comb_volcanos(self, sig_lff=0, sig_pval=0.1, adj_pval=True, kin
899899
plt.close(fig)
900900

901901
if return_fig:
902-
return fig
902+
return fig
903+
904+
def generate_tree(self, output_path, sort_by: str ='most_sig_log2_freq_factor', sort_direction: str = 'ascending', filter_top: int = None, **kwargs):
905+
"""
906+
Generate a colored kinome tree from the combined enrichment results.
907+
908+
Parameters
909+
----------
910+
output_path : str
911+
Destination path for the generated kinome tree image.
912+
sort_by : str, optional
913+
Column name to sort the DataFrame by before generating the tree. Default is 'most_sig_log2_freq_factor'.
914+
sort_direction : str, optional
915+
Direction to sort the DataFrame, either 'ascending' or 'descending'. Default is 'ascending'.
916+
filter_top : int, optional
917+
If provided, only the top N rows will be included in the tree. Default is None (no filtering).
918+
**kwargs : optional
919+
Additional keyword arguments to be passed to the `utils.generate_tree` function.
920+
"""
921+
922+
# Check if the sort_by column exists in the combined enrichment results
923+
if sort_by not in self.combined_enrichment_results.columns:
924+
raise ValueError(f"Column '{sort_by}' not found in combined enrichment results. Available columns: {self.combined_enrichment_results.columns.tolist()}")
925+
926+
# Check if the sort_direction is valid
927+
if sort_direction not in ['ascending', 'descending']:
928+
raise ValueError("sort_direction must be either 'ascending' or 'descending'.")
929+
930+
931+
# Sort the DataFrame based on the specified column and direction
932+
df = self.combined_enrichment_results.sort_values(by=sort_by, ascending=(sort_direction == 'ascending'))
933+
934+
if filter_top is not None:
935+
df = df.head(filter_top)
936+
937+
# This kinome tree coloring will always be based on 'most_sig_log2_freq_factor'
938+
return utils.generate_tree(df, output_path, "most_sig_log2_freq_factor", { "high": 3.0, "middle": 0.0, "low": -3.0 }, **kwargs)

src/kinase_library/enrichment/mea.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pandas as pd
99
import gseapy as gp
1010

11-
from ..utils import _global_vars, exceptions
11+
from ..utils import _global_vars, exceptions, utils
1212
from ..modules import data, enrichment
1313
from ..objects import phosphoproteomics as pps
1414
from ..logger import logger
@@ -578,4 +578,46 @@ def plot_volcano(self, sig_es=0, sig_pval=0.1, adj_pval=True, kinases=None,
578578
symmetric_xaxis=symmetric_xaxis, grid=grid, max_window=max_window,
579579
title=title, xlabel=xlabel, ylabel=ylabel,
580580
plot=plot, save_fig=save_fig, return_fig=return_fig,
581-
ax=ax, **plot_kwargs)
581+
ax=ax, **plot_kwargs)
582+
583+
def generate_tree(self, output_path, sort_by: str ='p-value', sort_direction: str = 'ascending', filter_top: int = None, **kwargs):
584+
"""
585+
Generate a colored kinome tree from the enrichment results.
586+
587+
Parameters
588+
----------
589+
output_path : str
590+
Destination path for the generated kinome tree image.
591+
sort_by : str, optional
592+
Column name to sort the DataFrame by before generating the tree. Default is 'p-value'.
593+
sort_direction : str, optional
594+
Direction to sort the DataFrame, either 'ascending' or 'descending'. Default is 'ascending'.
595+
filter_top : int, optional
596+
If provided, only the top N rows will be included in the tree. Default is None (no filtering).
597+
**kwargs : optional
598+
Additional keyword arguments to be passed to the `utils.generate_tree` function.
599+
"""
600+
601+
# Check if the sort_by column exists in the enrichment results
602+
if sort_by not in self.enrichment_results.columns:
603+
raise ValueError(f"Column '{sort_by}' not found in enrichment results. Available columns: {self.enrichment_results.columns.tolist()}")
604+
605+
# Check if the sort_direction is valid
606+
if sort_direction not in ['ascending', 'descending']:
607+
raise ValueError("sort_direction must be either 'ascending' or 'descending'.")
608+
609+
610+
# Sort the DataFrame based on the specified column and direction
611+
df = self.enrichment_results.sort_values(by=sort_by, ascending=(sort_direction == 'ascending'))
612+
613+
if filter_top is not None:
614+
df = df.head(filter_top)
615+
616+
# Compute max and min NES values, ignoring NaN or non-numeric values
617+
nes_values = df['NES']
618+
nes_values = nes_values[pd.to_numeric(nes_values, errors='coerce').notnull()]
619+
maxNes = nes_values.max()
620+
minNes = nes_values.min()
621+
622+
# This kinome tree coloring will always be based on 'NES'
623+
return utils.generate_tree(df, output_path, "NES", { "high": maxNes, "middle": 0.0, "low": minNes }, **kwargs)

src/kinase_library/utils/utils.py

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
##################################
55
"""
66
import re
7+
import os
78
import string
89
import numpy as np
910
import pandas as pd
@@ -607,4 +608,142 @@ def list_series_to_df(subs_list, col_name=None):
607608
if isinstance(subs_list, pd.Series):
608609
return(subs_list.to_frame(name=col_name))
609610
if isinstance(subs_list, list):
610-
return(pd.Series(subs_list).to_frame(name=col_name))
611+
return(pd.Series(subs_list).to_frame(name=col_name))
612+
613+
def generate_tree(
614+
kinase_matrix: pd.DataFrame,
615+
output_path: str,
616+
color_column: str,
617+
color_thresholds: dict,
618+
node_size: int = 5,
619+
branch_color: str = "#663636",
620+
low_color: str = "#999acf",
621+
mid_color: str = "#c8c8c8",
622+
high_color: str = "#fa6464",
623+
):
624+
"""
625+
Generic function to generate a colored kinome tree. See DiffPhosEnrichmentResults.generate_tree(), MeaEnrichmentResults.generate_tree(), and EnrichmentResults.generate_tree() for specific implementations.
626+
627+
Parameters
628+
----------
629+
kinase_matrix : pd.DataFrame
630+
DataFrame containing kinases as indices and numerical columns to color the nodes. e.g. the output of kl.Substrate('PSVEPPLsQETFSDL').predict()
631+
output_path : str
632+
Path to save the tree image.
633+
color_column : str
634+
Column name in the kinase matrix to use for coloring the nodes.
635+
color_thresholds : dict
636+
Dictionary containing the color thresholds for low, middle, and high values. e.g. { "high": 3.0, "middle": 0.0, "low": -3.0 }.
637+
node_size : int
638+
Size of the nodes (SVG circles). Default is 5.
639+
branch_color : str
640+
Hex color for the tree branches. Default is "#663636".
641+
low_color : str
642+
Hex color for the low end of the heatmap. Default is "#999acf".
643+
mid_color : str
644+
Hex color for the midpoint of the heatmap. Default is "#c8c8c8".
645+
high_color : str
646+
Hex color for the high end of the heatmap. Default is "#fa6464".
647+
"""
648+
649+
def hex_to_rgb(hex_color):
650+
"""Convert hex string like '#FF0000' to (255, 0, 0)"""
651+
hex_color = hex_color.lstrip('#')
652+
return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
653+
654+
655+
def rgb_to_hex(rgb):
656+
"""Convert (255, 0, 0) to '#FF0000'"""
657+
return '#{:02X}{:02X}{:02X}'.format(*rgb)
658+
659+
660+
def calculate_color(value, high_color_hex, low_color_hex, high_value, low_value):
661+
high_color = hex_to_rgb(high_color_hex)
662+
low_color = hex_to_rgb(low_color_hex)
663+
664+
new_value = max(min(value, high_value), low_value)
665+
new_value -= low_value
666+
667+
percentage = new_value / (high_value - low_value) if high_value != low_value else 0
668+
669+
red = high_color[0] * percentage + low_color[0] * (1.0 - percentage)
670+
green = high_color[1] * percentage + low_color[1] * (1.0 - percentage)
671+
blue = high_color[2] * percentage + low_color[2] * (1.0 - percentage)
672+
673+
return rgb_to_hex((round(red), round(green), round(blue)))
674+
675+
676+
def calculate_heatmap_color_midpoint(value, high_color, mid_color, low_color, high_value, mid_point_value, low_value):
677+
if value < mid_point_value:
678+
return calculate_color(value, mid_color, low_color, mid_point_value, low_value)
679+
return calculate_color(value, high_color, mid_color, high_value, mid_point_value)
680+
681+
682+
def calculate_heatmap_color(value, high_value, mid_point_value, low_value, high_color, mid_color, low_color):
683+
return calculate_heatmap_color_midpoint(
684+
value,
685+
high_color,
686+
mid_color,
687+
low_color,
688+
high_value,
689+
mid_point_value,
690+
low_value
691+
)
692+
693+
# Check if output_path is valid
694+
if not isinstance(output_path, str) or not output_path.endswith('.svg'):
695+
raise ValueError("Output path must be a valid string ending with '.svg'.")
696+
697+
if kinase_matrix.get(color_column, None) is None:
698+
raise ValueError(f"Column '{color_column}' not found in the kinase matrix. Please provide a valid column name.")
699+
700+
# Check if color thresholds are valid
701+
if not all(key in color_thresholds for key in ["high", "middle", "low"]):
702+
raise ValueError("Color thresholds must contain 'high', 'middle', and 'low' keys.")
703+
704+
kinases = kinase_matrix.index
705+
706+
# Create map for quick access to kinase names
707+
kinase_uniprot_mapping = {
708+
row['UNIPROT_ID']: row['MATRIX_NAME']
709+
for _, row in data.get_kinome_info().iterrows()
710+
}
711+
712+
# Load the base SVG
713+
import xml.etree.ElementTree as ET
714+
current_dir = os.path.dirname(__file__)
715+
svg_path = os.path.abspath(os.path.join(current_dir, "../databases/kinase_data/base_tree.svg"))
716+
tree = ET.parse(svg_path)
717+
root = tree.getroot()
718+
719+
# Set the branch color if not default
720+
if branch_color != "#663636":
721+
for line in root.findall('.//svg:path', namespaces={'svg': 'http://www.w3.org/2000/svg'}):
722+
line.set('fill', branch_color)
723+
724+
ns = {'svg': 'http://www.w3.org/2000/svg'}
725+
726+
# For the top X kinases in kinase matrix, set the opacity to 1
727+
for circle in root.findall('.//svg:circle', namespaces=ns):
728+
uniprot_id = circle.get('class').split("_")[-1]
729+
if kinase_uniprot_mapping.get(uniprot_id, None) in kinases:
730+
circle.set('opacity', '1')
731+
732+
for circle in root.findall('.//svg:circle', namespaces=ns):
733+
uniprot_id = circle.get('class').split("_")[-1]
734+
if kinase_uniprot_mapping.get(uniprot_id, None) in kinases:
735+
val = kinase_matrix.at[kinase_uniprot_mapping.get(uniprot_id, None), color_column]
736+
if pd.notna(val) and isinstance(val, (int, float, np.integer, np.floating)):
737+
color = calculate_heatmap_color(val, color_thresholds["high"], color_thresholds["middle"], color_thresholds["low"],
738+
high_color, mid_color, low_color)
739+
circle.set('fill', color)
740+
circle.set('stroke', "gray")
741+
circle.set('stroke-width', "0.5px")
742+
circle.set('opacity', '1')
743+
circle.set('r', str(node_size))
744+
745+
try:
746+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
747+
tree.write(output_path)
748+
except Exception as e:
749+
raise Exception(f"Error saving SVG file: {e}")

0 commit comments

Comments
 (0)