3 Cases
3.1 PBMC example
3.1.1 Marker gene heatmap
library(yulab.utils)
pload(dplyr)
pload(ggplot2)
pload(ggfun)
pload(ggtree)
pload(aplot)
pload(Seurat)
# celltype <- c("Naive CD4 T", "B", "NK")
# id <- pbmc@active.ident
# md = data.frame(cell=names(id), group=id)
# md <- md[md$group %in% celltype, ]
# md$group <- as.character(md$group)
# md <- lapply(split(md, md$group), function(x) x[1:30,]) |> rbindlist()
#
# pbmc <- pbmc[, md$cell]
pbmc <- qs::qread("data/pbmc-subset.qs")
m <- FindAllMarkers(pbmc, only.pos=TRUE)
top10 <- m %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 2) %>%
slice_head(n = 10)
pbmc <- ScaleData(pbmc, features = rownames(pbmc))
x <- pbmc[['RNA']]$scale.data[top10$gene,]
y <- mat2df(x) -> y
y$gene <- rownames(x)[y$row]
y$cell <- colnames(x)[y$col]
p <- ggplot(y, aes(cell, gene, fill = value)) +
geom_tile()+ scale_fill_viridis_c(name = "Gene Expression") +
theme_noxaxis() +
scale_y_discrete(position = "right") +
xlab(NULL) +
ylab(NULL)
gene_cls <- hclust(dist((x)))
cell_cls <- hclust(dist(t((x))))
gene_tree <- ggtree(gene_cls, branch.length = 'none')
cell_tree = ggtree(cell_cls, branch.length = 'none') + layout_dendrogram()
id <- pbmc@active.ident
md = data.frame(cell=names(id), group=id)
p_cell_type <- ggplot(md, aes(cell, y=1, fill = group)) +
geom_tile() +
scale_fill_brewer(palette="Set1", name = 'Cell Type') +
theme_nothing()
g <- p |> insert_left(gene_tree, width = .2) |>
insert_top(p_cell_type, height = .02) |>
insert_top(cell_tree, height = .1)
g
3.1.2 Degree of genes in the PPI network
pload(igraph)
pload(clusterProfiler)
ppi <- getPPI(top10$gene, output='igraph', taxID='9606')
deg <- stack(degree(ppi, v = V(ppi))) |> setNames(c("degree", "gene"))
p_ppi <- ggplot(deg, aes(1, gene, fill=degree, size=degree)) + geom_point(shape=21) +
scale_fill_gradientn(colors = hcl.colors(20, "RdYlGn")) + theme_nothing()
3.1.3 Genes in pathways
pload(tidytree)
pload(msigdbr)
node <- c(35, 34, 32)
genes <- lapply(node, function(n) offspring(gene_tree, n, tiponly=T)$label) |>
setNames(c("CD4", "NK", "B"))
pload(clusterProfiler)
gene_set <- msigdbr(species="human", category="C2")
names(gene_set)
## [1] "gs_cat" "gs_subcat"
## [3] "gs_name" "gene_symbol"
## [5] "entrez_gene" "ensembl_gene"
## [7] "human_gene_symbol" "human_entrez_gene"
## [9] "human_ensembl_gene" "gs_id"
## [11] "gs_pmid" "gs_geoid"
## [13] "gs_exact_source" "gs_url"
## [15] "gs_description"
cc <- compareCluster(genes, enricher, TERM2GENE=gene_set[, c("gs_name", "gene_symbol")])
res <- cc@compareClusterResult |>
group_by(Cluster) |>
slice_head(n = 5) |>
as.data.frame()
gene2path <- lapply(res$geneID, function(x) unlist(strsplit(x, split="/"))) |>
setNames(res$Description) |>
stack() |>
setNames(c("gene", "pathway"))
pcp <- ggplot(gene2path, aes(pathway, gene)) +
geom_point(size=5, shape=21, fill = "steelblue") +
theme_minimal() +
theme_noyaxis() +
theme(axis.text.x = element_text(angle=30, hjust=1)) +
xlab(NULL) +
ylab(NULL)
gg <- g |> insert_right(p_ppi, width=.05) |>
insert_right(pcp, width=.5)
gg
3.1.4 All in one
pp <- plot_list(cell_tree, p_cell_type, heights=c(1, .5))
pp <- plot_list(gene_tree, pp, widths=c(.3, 1))
pp <- plot_list(p, pp, p_ppi, pcp, widths=c(1, .8, .05, .6))
pp <- pp + patchwork::plot_annotation(tag_levels='a')
final_plot <- plot_list(pp, gg, ncol=1, tag_levels='A', heights=c(1, 1.2))
final_plot
3.2 Oncoplot example
pload(aplotExtra)
pload(RTCGA.mRNA)
pload(RTCGA)
laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
laml.clin <- system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools')
laml <- maftools::read.maf(maf = laml.maf, clinicalData = laml.clin)
## -Reading
## -Validating
## -Silent variants: 475
## -Summarizing
## -Processing clinical data
## -Finished in 0.260s elapsed (0.000s cpu)
onco <- oncoplot(maf = laml, genes = 20)
plot_tcga_expr <- function(mRNA, genes, name = "Gene Expression") {
d = expressionsTCGA(mRNA, extract.cols = genes)
dd = gather(d, gene, expression, -c(1,2))
ggplot(dd, aes(expression, gene, fill=stat(x))) +
ggridges::geom_density_ridges_gradient() +
scale_fill_viridis_c(option="C", name = name) +
theme_minimal() +
theme_noyaxis() +
xlab(NULL) +
ylab(NULL) +
theme(legend.position='bottom')
}
onco_genes <- aplotExtra:::get_oncoplot_genes(laml)
brca <- plot_tcga_expr(BRCA.mRNA, onco_genes, "Gene Expression in TCGA\nbreast cancer patients")
ov <- plot_tcga_expr(OV.mRNA, onco_genes, "Gene Expression in TCGA\novarian cancer patients")
op <- insert_right(onco, brca, width = .6) |>
insert_right(ov, width=.6)
op
3.3 funky heatmap
library(aplotExtra)
library(tidyverse)
data("mtcars")
d <- yulab.utils::scale_range(mtcars) |>
rownames_to_column("id") |>
arrange(desc(mpg))
g1 <- funky_text(d)
g2 <- funky_bar(d, 2) + scale_fill_gradient(low = "#CC4C02", high = "#FFFFE5")
g3 <- funky_bar(d, 3) + scale_fill_gradient(low = "steelblue", high = "firebrick")
g4 <- funky_point(d, 4:7) + scale_fill_gradient(low = "#CC4C02", high = "#FFFFE5")
g5 <- funky_point(d, 8:12) + scale_fill_gradient(low = "#08519C", high = "#F7FBFF")
#funky_heatmap(data=mtcars)
fp <- funky_heatmap(g1, g2, g4, g5, options=theme_stamp())
fp
3.4 Genomics track example
3.4.1 Setup packages and parameteres
pload(ChIPseeker)
pload(TxDb.Hsapiens.UCSC.hg19.knownGene)
pload(org.Hs.eg.db)
pload(IRanges)
pload(GenomicFeatures)
pload(gggenes)
pload(HiContacts)
pload(cowplot)
# This analysis uses processed genomic data from:
# 1. Hi-C data: GM12878 (GSM1551550) and K562 (GSM1551618)
# 2. ChIP-seq data for histone modifications:
# - H3K27ac (GM12878: GSM733771, K562: GSM733656)
# - H3K4me1 (GM12878: GSM733772, K562: GSM733692)
# - H3K4me3 (GM12878: GSM733708, K562: GSM733680)
# - H3K36me3 (GM12878: GSM733679, K562: GSM733714)
# Controls: GM12878 (GSM733742), K562 (GSM733780)
# Analysis parameters:
# - MACS2 peak calling: default parameters except:
# * genome size: -g 2.7e9 (human)
# - MACS2 bdgdiff: default parameters except:
# * d1: 6284864 (GM12878 sequencing depth) and d2: 18559220 (K562 sequencing depth)
# Load processed genomic data
epigenetics_data <- qs::qread("./data/epigenetics_data.qs")
# Global parameters and themes
plot_params <- list(
scale = "log10",
limits = c(0, 1.7),
chr = "chr6",
x_min = 43600000,
x_max = 44100000,
highlight_regions = list(
region1 = c(43738000, 43754500),
region2 = c(43900000, 44000000),
region3 = c(44060000, 44100000)
),
peak_color = c("peak_in_K562" = "#E05B5B", "peak_in_GM12878" = "#519CBA")
)
common_theme <- theme_minimal() +
theme(
axis.title.x = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.border = element_blank(),
plot.title = element_text(hjust = 0.5)
)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
# Function to add highlight rectangles
addHighlights <- function(p, ymax, alpha=0.2) {
p +
annotate("rect",
xmin=plot_params$highlight_regions$region1[1],
xmax=plot_params$highlight_regions$region1[2],
ymin=0, ymax=ymax, fill="#c6c3c3", alpha=alpha) +
annotate("rect",
xmin=plot_params$highlight_regions$region2[1],
xmax=plot_params$highlight_regions$region2[2],
ymin=0, ymax=ymax, fill="#c6c3c3", alpha=alpha) +
annotate("rect",
xmin=plot_params$highlight_regions$region3[1],
xmax=plot_params$highlight_regions$region3[2],
ymin=0, ymax=ymax, fill="#c6c3c3", alpha=alpha)
}
# Generate gene plot
generateGenePlot <- function(txdb, chr, x_min, x_max, OrgDb, show_legend = FALSE) {
# Get genes in the region
win <- GRanges(seqnames = chr,
ranges = IRanges::IRanges(start = x_min, end = x_max))
all_genes <- GenomicFeatures::genes(txdb)
gene_df <- data.frame(subsetByOverlaps(x = all_genes, ranges = win, type = "any"))
gene_df$gene_id <- factor(gene_df$gene_id, levels = unique(gene_df$gene_id))
# Process gene data
gene_df <- gene_df[,c("seqnames", "gene_id", "start", "end", "strand")]
colnames(gene_df)[1] <- "chromosome"
gene_df$forward <- ifelse(gene_df$strand=="+", TRUE, FALSE)
# Convert gene IDs
changeid <- bitr(geneID = gene_df$gene_id,
fromType = "ENTREZID", toType = "SYMBOL",
OrgDb = OrgDb)
colnames(changeid)[1] <- c("gene_id")
gene_df <- merge(gene_df, changeid, all.x = TRUE)
gene_df <- gene_df[, c(2:ncol(gene_df))]
colnames(gene_df)[ncol(gene_df)] <- "gene_symbol"
gene_df$start <- pmax(gene_df$start, x_min)
gene_df$end <- pmin(gene_df$end, x_max)
p <- ggplot(gene_df, aes(xmin = start, xmax = end,
y = gene_symbol, forward=forward, fill = gene_symbol)) +
geom_gene_arrow() +
xlim(c(x_min, x_max)) +
theme_genes() +
scale_fill_brewer(palette = "Set3") +
labs(fill = "gene symbol") +
common_theme
if (!show_legend) {
p <- p + theme(legend.position = "none")
}
return(p)
}
# Generate HiC plot
generateHicPlot <- function(data, label, use_scores = NULL, show_legend = TRUE) {
if (is.null(use_scores)) {
p <- HiContacts::plotMatrix(data,
maxDistance = plot_params$x_max - plot_params$x_min,
caption = FALSE,
scale = plot_params$scale,
limits = plot_params$limits) +
xlim(plot_params$x_min, plot_params$x_max) +
labs(fill = 'hic score') +
ylab(paste0(label, " hic")) +
common_theme
} else {
p <- HiContacts::plotMatrix(data,
maxDistance = plot_params$x_max - plot_params$x_min,
use.scores = use_scores,
caption = FALSE,
cmap = HiContacts::bbrColors()) +
xlim(plot_params$x_min, plot_params$x_max) +
labs(fill = 'log2fc') +
ylab(paste0(label, " ")) +
common_theme
}
if (!show_legend) {
p <- p + theme(legend.position = "none")
}
return(p)
}
# Generate coverage plot
generateCoveragePlot <- function(data, label, ymax, colors = NULL,
add_facet = FALSE, show_legend = TRUE) {
p <- covplot(data,
weightCol = "V5",
chrs = plot_params$chr,
xlim = c(plot_params$x_min, plot_params$x_max),
ylab = label,
title = "") +
ylim(c(0, ymax)) +
common_theme
if (!is.null(colors)) {
p <- p + scale_color_manual(values = colors) +
scale_fill_manual(values = colors)
} else {
p <- p + scale_fill_brewer(palette = "Set2") +
scale_color_brewer(palette = "Set2")
}
if (add_facet) {
p <- p + facet_grid(.id ~ .) +
theme(strip.text.y = element_blank())
}
if (!show_legend) {
p <- p + theme(legend.position = "none")
}
return(p)
}
3.4.2 Visualization of comparative genomics data tracks
# Generate HiC comparision plot
combine_hic_p <- generateHicPlot(epigenetics_data[["div_hic"]],
"K562/GM12878 hic compare",
use_scores = "balanced.fc")
# Generate histone modification comparision plots
histone_plots <- list(
H3K27ac = list(data = epigenetics_data[["H3K27ac_peak"]], ymax = 100),
H3K36me3 = list(data = epigenetics_data[["H3K36me3_peak"]], ymax = 10),
H3K4me1 = list(data = epigenetics_data[["H3K4me1_peak"]], ymax = 30),
H3K4me3 = list(data = epigenetics_data[["H3K4me3_peak"]], ymax = 25)
) %>%
map2(names(.), function(x, name) {
p <- generateCoveragePlot(x$data, name, x$ymax,
plot_params$peak_color, add_facet = FALSE)
addHighlights(p, x$ymax)
})
# Generate gene track
gene_p <- generateGenePlot(txdb = txdb,
chr = plot_params$chr,
x_min = plot_params$x_min,
x_max = plot_params$x_max,
OrgDb = "org.Hs.eg.db")
gene_p_highlighted <- addHighlights(gene_p, ymax=9)
# Combine all plots using aplot
combined_p <- combine_hic_p %>%
insert_bottom(histone_plots$H3K27ac, height = .2) %>%
insert_bottom(histone_plots$H3K36me3, height = .2) %>%
insert_bottom(histone_plots$H3K4me1, height = .2) %>%
insert_bottom(histone_plots$H3K4me3, height = .2) %>%
insert_bottom(gene_p_highlighted, height = .5)
combined_p
3.4.3 Visualization of different genomics data tracks
# Generate HiC plots
GM12878_hic_p <- generateHicPlot(epigenetics_data[["GM12878_hic"]],
"GM12878", show_legend = FALSE)
K562_hic_p <- generateHicPlot(epigenetics_data[["K562_hic"]],
"K562", show_legend = FALSE)
# Generate ChIP-seq coverage plots
GM12878_cov_p <- generateCoveragePlot(epigenetics_data[["GM12878_peak"]],
"GM12878 ChIP-seq",
2000,
add_facet = TRUE,
show_legend = FALSE)
GM12878_cov_p <- addHighlights(GM12878_cov_p, ymax = 2000)
K562_cov_p <- generateCoveragePlot(epigenetics_data[["K562_peak"]],
"K562 ChIP-seq",
2000,
add_facet = TRUE,
show_legend = FALSE)
K562_cov_p <- addHighlights(K562_cov_p, ymax = 2000)
# Combine plots using aplot
GM12878_p <- (GM12878_hic_p + theme(legend.position = 'none')) %>%
insert_bottom(GM12878_cov_p) %>%
insert_bottom(gene_p_highlighted, height = .6)
K562_p <- (K562_hic_p + theme(legend.position = 'none')) %>%
insert_bottom(K562_cov_p) %>%
insert_bottom(gene_p_highlighted, height = .6)
# Add legends and create final view
split_p <- plot_list(
gglist = list(GM12878_p, K562_p,
plot_grid(get_legend(generateHicPlot(epigenetics_data[["K562_hic"]], "K562")),
get_legend(generateCoveragePlot(epigenetics_data[["K562_peak"]],
"K562 ChIP-seq",
2000,
add_facet = TRUE)),
nrow = 1)),
design = c("AAAAAABBBBBBCC")
)
split_p