Skip to content

Instantly share code, notes, and snippets.

@dBenedek
Created June 27, 2022 09:27
Show Gist options
  • Save dBenedek/9210816bbdb14c857f73ce5571c5b76c to your computer and use it in GitHub Desktop.
Save dBenedek/9210816bbdb14c857f73ce5571c5b76c to your computer and use it in GitHub Desktop.
[Create heatmap] Create heatmap with the pheatmap package #pheatmap #heatmap #plot
library("tidyverse")
library("pheatmap")
library("fgsea")
# Load gene set data:
kegg_gs <- gmtPathways("/home/rstudio/san1/BD/datasets/kegg_metabolic_human_20211026.gmt")
pathways <- data.frame(name=names(kegg_gs)) %>%
mutate(category=str_replace(as.character(name), "^\\d+\\s", ""),
category=str_replace(as.character(category), "\\s-\\s\\d+.+", "")) %>%
mutate(pathway=str_extract(name, "-\\s\\d+\\s.+"),
pathway=str_replace(pathway, "-\\s\\d+\\s", ""),
pathway=str_replace(pathway, "\\s\\[PATH.*", "")) %>%
arrange(category)
# Prepare data (here it is a GSVA matrix with N samples and M pathways):
data <- gsva_pathways %>%
scale() %>%
t() %>%
as.data.frame() %>%
rownames_to_column("pathway") %>%
mutate(pathway=str_extract(pathway, "-\\s\\d+\\s.+\\[PATH"),
pathway=str_replace(pathway, "-\\s\\d+\\s", ""),
pathway=str_replace(pathway, "\\s\\[PATH", "")) %>%
column_to_rownames("pathway")
# Prepare column annotation (some clinical data and clustering results together):
col_annot <- as.data.frame(km.res_dktk$cluster) %>%
dplyr::rename("cluster"="km.res_dktk$cluster") %>%
rownames_to_column("sample") %>%
left_join((clin_dat_dktk %>%
dplyr::select(c(sample, keck_predominant_subtype))),
by="sample") %>%
dplyr::rename("keck_subtype"="keck_predominant_subtype") %>%
arrange(cluster) %>%
mutate(cluster=factor(cluster, levels=c("1", "2")),
keck_subtype=factor(keck_subtype, levels=c("BA", "CL", "IMS"))) %>%
column_to_rownames("sample")
# Annotation colors:
aka <- list(cluster = c("1"="#E7B800", "2"="#2E9FDF"),
keck_subtype=c("BA"="pink", "CL"="darkblue", "IMS"="red"),
category=c("Amino acid metabolism"="#EF5350",
"Biosynthesis of other secondary metabolites"="#F48FB1",
"Carbohydrate metabolism"="#AB47BC",
"Energy metabolism"="#5C6BC0",
"Glycan biosynthesis and metabolism"="#29B6F6",
"Lipid metabolism"="#26A69A",
"Metabolism of cofactors and vitamins"="#9CCC65",
"Metabolism of other amino acids"="#FFEE58",
"Metabolism of terpenoids and polyketides"="#FFA726",
"Not included in regular maps"="#8D6E63",
"Nucleotide metabolism"="#78909C",
"Xenobiotics biodegradation and metabolism"="#ECEFF1"))
# Plot heatmap:
pheatmap(data,
main="Title",
annotation_col = col_annot,
annotation_row = pathways %>%
column_to_rownames("pathway") %>%
dplyr::select(-name),
cluster_rows = F,
cluster_cols = F,
show_colnames = F,
annotation_colors = aka,
gaps_col = head(as.numeric(cumsum(table(col_annot$cluster))), -1)) # separates column groups by gap
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment