dBenedek · June 27, 2022 09:27
diff --git a/pheatmap_heatmap.R b/pheatmap_heatmap.R
 library("tidyverse")
 library("pheatmap")
 library("fgsea")


 # Load gene set data:
 kegg_gs <- gmtPathways("/home/rstudio/san1/BD/datasets/kegg_metabolic_human_20211026.gmt")
 pathways <- data.frame(name=names(kegg_gs)) %>% 
   mutate(category=str_replace(as.character(name), "^\\d+\\s", ""),
          category=str_replace(as.character(category), "\\s-\\s\\d+.+", "")) %>% 
   mutate(pathway=str_extract(name, "-\\s\\d+\\s.+"),
          pathway=str_replace(pathway, "-\\s\\d+\\s", ""),
          pathway=str_replace(pathway, "\\s\\[PATH.*", "")) %>% 
   arrange(category)


 # Prepare data (here it is a GSVA matrix with N samples and M pathways):
 data <- gsva_pathways %>% 
   scale() %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("pathway") %>% 
  mutate(pathway=str_extract(pathway, "-\\s\\d+\\s.+\\[PATH"),
         pathway=str_replace(pathway, "-\\s\\d+\\s", ""),
         pathway=str_replace(pathway, "\\s\\[PATH", "")) %>% 
  column_to_rownames("pathway")


 # Prepare column annotation (some clinical data and clustering results together):
 col_annot <- as.data.frame(km.res_dktk$cluster) %>% 
   dplyr::rename("cluster"="km.res_dktk$cluster") %>% 
   rownames_to_column("sample") %>% 
   left_join((clin_dat_dktk %>% 
                 dplyr::select(c(sample, keck_predominant_subtype))),
               by="sample") %>% 
   dplyr::rename("keck_subtype"="keck_predominant_subtype") %>% 
   arrange(cluster) %>% 
   mutate(cluster=factor(cluster, levels=c("1", "2")),
          keck_subtype=factor(keck_subtype, levels=c("BA", "CL", "IMS"))) %>% 
   column_to_rownames("sample")



 # Annotation colors:
 aka <- list(cluster = c("1"="#E7B800", "2"="#2E9FDF"),
            keck_subtype=c("BA"="pink", "CL"="darkblue", "IMS"="red"),
            category=c("Amino acid metabolism"="#EF5350",
                       "Biosynthesis of other secondary metabolites"="#F48FB1",
                       "Carbohydrate metabolism"="#AB47BC",
                       "Energy metabolism"="#5C6BC0",
                       "Glycan biosynthesis and metabolism"="#29B6F6",  
                       "Lipid metabolism"="#26A69A",
                       "Metabolism of cofactors and vitamins"="#9CCC65",
                       "Metabolism of other amino acids"="#FFEE58",
                       "Metabolism of terpenoids and polyketides"="#FFA726",    
                       "Not included in regular maps"="#8D6E63", 
                       "Nucleotide metabolism"="#78909C",
                       "Xenobiotics biodegradation and metabolism"="#ECEFF1"))


 # Plot heatmap:
 pheatmap(data,
         main="Title",
         annotation_col = col_annot,
         annotation_row = pathways %>% 
            column_to_rownames("pathway") %>% 
            dplyr::select(-name),
         cluster_rows = F,
         cluster_cols = F,
         show_colnames = F,
         annotation_colors = aka,
         gaps_col = head(as.numeric(cumsum(table(col_annot$cluster))), -1)) # separates column groups by gap
	library("tidyverse")
	library("pheatmap")
	library("fgsea")


	# Load gene set data:
	kegg_gs <- gmtPathways("/home/rstudio/san1/BD/datasets/kegg_metabolic_human_20211026.gmt")
	pathways <- data.frame(name=names(kegg_gs)) %>%
	mutate(category=str_replace(as.character(name), "^\\d+\\s", ""),
	category=str_replace(as.character(category), "\\s-\\s\\d+.+", "")) %>%
	mutate(pathway=str_extract(name, "-\\s\\d+\\s.+"),
	pathway=str_replace(pathway, "-\\s\\d+\\s", ""),
	pathway=str_replace(pathway, "\\s\\[PATH.*", "")) %>%
	arrange(category)


	# Prepare data (here it is a GSVA matrix with N samples and M pathways):
	data <- gsva_pathways %>%
	scale() %>%
	t() %>%
	as.data.frame() %>%
	rownames_to_column("pathway") %>%
	mutate(pathway=str_extract(pathway, "-\\s\\d+\\s.+\\[PATH"),
	pathway=str_replace(pathway, "-\\s\\d+\\s", ""),
	pathway=str_replace(pathway, "\\s\\[PATH", "")) %>%
	column_to_rownames("pathway")


	# Prepare column annotation (some clinical data and clustering results together):
	col_annot <- as.data.frame(km.res_dktk$cluster) %>%
	dplyr::rename("cluster"="km.res_dktk$cluster") %>%
	rownames_to_column("sample") %>%
	left_join((clin_dat_dktk %>%
	dplyr::select(c(sample, keck_predominant_subtype))),
	by="sample") %>%
	dplyr::rename("keck_subtype"="keck_predominant_subtype") %>%
	arrange(cluster) %>%
	mutate(cluster=factor(cluster, levels=c("1", "2")),
	keck_subtype=factor(keck_subtype, levels=c("BA", "CL", "IMS"))) %>%
	column_to_rownames("sample")



	# Annotation colors:
	aka <- list(cluster = c("1"="#E7B800", "2"="#2E9FDF"),
	keck_subtype=c("BA"="pink", "CL"="darkblue", "IMS"="red"),
	category=c("Amino acid metabolism"="#EF5350",
	"Biosynthesis of other secondary metabolites"="#F48FB1",
	"Carbohydrate metabolism"="#AB47BC",
	"Energy metabolism"="#5C6BC0",
	"Glycan biosynthesis and metabolism"="#29B6F6",
	"Lipid metabolism"="#26A69A",
	"Metabolism of cofactors and vitamins"="#9CCC65",
	"Metabolism of other amino acids"="#FFEE58",
	"Metabolism of terpenoids and polyketides"="#FFA726",
	"Not included in regular maps"="#8D6E63",
	"Nucleotide metabolism"="#78909C",
	"Xenobiotics biodegradation and metabolism"="#ECEFF1"))


	# Plot heatmap:
	pheatmap(data,
	main="Title",
	annotation_col = col_annot,
	annotation_row = pathways %>%
	column_to_rownames("pathway") %>%
	dplyr::select(-name),
	cluster_rows = F,
	cluster_cols = F,
	show_colnames = F,
	annotation_colors = aka,
	gaps_col = head(as.numeric(cumsum(table(col_annot$cluster))), -1)) # separates column groups by gap