25 SVP: Gene Set Activity in Spatial and Single-Cell Data
Not every downstream analysis begins with cluster markers or differential expression. In many settings, the real question is whether a predefined biological program, pathway, or cell-type signature is active in a given cell or spatial location.
The SVP ecosystem is designed for this use case. This chapter shows how gene set activity can be quantified and explored in both single-cell and spatial settings, using SingleCellExperiment-compatible workflows.
Because the required packages and example datasets are lightweight enough for CI, most chunks in this chapter are executed in the GitHub Actions bookdown workflow when the dependencies are available.
library(SpatialExperiment)
library(SingleCellExperiment)
library(scuttle)
library(SVP)
library(ggplot2)
library(ggsc)
library(sclet)25.1 runSGSA
runSGSA() calculates the activity of gene sets in spatial or single-cell data.
sceSubPbmc is a small SingleCellExperiment dataset derived from PBMC3k and contains 1304 genes across 800 randomly selected cells.
data(sceSubPbmc)
sceSubPbmc <- NormalizeData(sceSubPbmc)
# the using runMCA to perform MCA (Multiple Correspondence Analysis)
# this is refer to the CelliD, but we using the Eigen to speed up.
# You can view the help information of runMCA using ?runMCA.
sceSubPbmc <- runMCA(sceSubPbmc, assay.type = 'logcounts')CellCycle.Hs contains cell-cycle-related gene sets. The S and G2M signatures follow the Seurat convention, while the G1 signature is derived from the G1_PHASE human gene set in MSigDB after removing duplicated genes.
# Next, we calculate gene set activity scores.
# Here, we use the cell-cycle signatures, but the same workflow applies
# to KEGG pathways, GO terms, Hallmark collections, transcription factor
# target sets, and other curated gene sets.
data(CellCycle.Hs)
sceSubPbmc <- runSGSA(sceSubPbmc, gset.idx.list = CellCycle.Hs, gsvaExp.name = 'CellCycle')
# The result is an SVPE object, which extends SingleCellExperiment.
sceSubPbmc# You can retrieve the score matrix directly.
sceSubPbmc |> gsvaExp('CellCycle')
sceSubPbmc |> gsvaExp("CellCycle") |> assay() |> t() |> head()# The resulting scores can then be visualized or used in downstream testing.
sceSubPbmc <- sceSubPbmc |>
scater::runPCA(assay.type = 'logcounts', ntop = 600) |>
scater::runUMAP(dimred = 'PCA')
# withReducedDim = TRUE, the original reducetion results from original gene features
# will be add the colData in the sce.cellcycle.
sce.cellcycle <- sceSubPbmc |> gsvaExp('CellCycle', withReducedDim=TRUE)
sce.cellcycle25.2 runLISA
hpda_spe_cell_dec is the result of runSGSA with HPDA A sample from (doi:10.1038/s41587-019-0392-8). Each item in the matrix is the marker gene set activity in the spot. The marker gene set represents the cell type. We can use svp the investigate if a cell type cluters and if two cell types appear in the same region. Usage of local indicators of spatial association (LISA) to identify the hotspot in the spatial space
data(hpda_spe_cell_dec)
# r$> assay(hpda_spe_cell_dec)[1:5,1:5]
# 5 x 5 sparse Matrix of class "dgCMatrix"
# Spot1 Spot2 Spot3 Spot4 Spot5
# Acinar cells 3.283881e-04 6.931743e-06 4.820951e-05 1.882577e-03 9.269843e-04
# Cancer clone A 2.880388e-04 2.013973e-04 9.167013e-05 9.642225e-05 6.704812e-05
# Cancer clone B 1.569730e-04 2.744810e-04 2.151324e-04 1.502651e-05 4.283322e-06
# Ductal APOL1 high-hypoxic 3.132481e-05 3.902286e-04 4.240874e-05 3.140970e-06 1.464118e-04
# Ductal CRISP3 high-centroacinar like 9.546611e-05 5.025320e-03 2.112512e-03 1.140155e-03 3.873088e-03
svres <- runDetectSVG(hpda_spe_cell_dec, assay.type = 'affi.score',
method = 'moransi', action = 'only')
# r$> svres |> dplyr::arrange(rank) |> head()
# obs expect.moransi sd.moransi Z.moransi pvalue padj rank
# Cancer clone A 0.7086916 -0.00234192 0.02803920 25.35855 3.616015e-142 7.232030e-141 1
# Acinar cells 0.6958980 -0.00234192 0.02788076 25.04379 1.020141e-138 1.020141e-137 2
# Cancer clone B 0.6586104 -0.00234192 0.02803039 23.57985 3.102650e-123 2.068434e-122 3
# mDCs A 0.5824019 -0.00234192 0.02760394 21.18334 6.801396e-100 3.400698e-99 4
# Ductal CRISP3 high-centroacinar like 0.5605955 -0.00234192 0.02794187 20.14673 1.437344e-90 5.749375e-90 5
# Endothelial cells 0.5362475 -0.00234192 0.02757573 19.53128 2.976221e-85 9.920735e-85 6lisa.res12 <- hpda_spe_cell_dec |>
runLISA(
features = c(1, 2, 3),
assay.type = 'affi.score',
weight.method = "knn",
k = 10,
action = 'get',
)
lisa.res12
lisa.res12[['Acinar cells']] |> head()
lisa.res12[["Cancer clone A"]] |> head()
colData(hpda_spe_cell_dec)$`cluster.test.Cancer.A` <- lisa.res12[["Cancer clone A"]] |>
dplyr::pull(cluster.test)
colData(hpda_spe_cell_dec)$`cluster.test.Acinar` <- lisa.res12[["Acinar cells"]] |>
dplyr::pull(cluster.test)
colData(hpda_spe_cell_dec)$`cluster.test.Cancer.B` <- lisa.res12[["Cancer clone B"]] |>
dplyr::pull(cluster.test)p1 <- sc_spatial(hpda_spe_cell_dec,
features = rownames(hpda_spe_cell_dec),
mapping = aes(x=x, y=y, color=cluster.test.Cancer.A),
plot.pie = T,
pie.radius.scale = .8,
bg_circle_radius = 1.1,
color=NA,
linewidth=2
) + scale_color_manual(values=c("black", "white"))
p1f1 <- sc_spatial(hpda_spe_cell_dec, features="Cancer clone A",
mapping=aes(x = x, y = y),
pointsize=10) +
geom_scattermore2(mapping = aes(bg_color=cluster.test.Cancer.A,
subset=cluster.test.Cancer.A=="High"),
bg_line_width = .16,
gap_line_width = .14,
pointsize = 7) +
scale_bg_color_manual(values=c('black'))
f1f2 <- sc_spatial(hpda_spe_cell_dec, features="Acinar cells",
mapping=aes(x=x,y=y),
pointsize=10) +
geom_scattermore2(mapping = aes(bg_color=cluster.test.Acinar, subset=cluster.test.Acinar=="High"),
bg_line_width = .16,
gap_line_width = .14,
pointsize = 7) +
scale_bg_color_manual(values=c('black'))
f2f3 <- sc_spatial(hpda_spe_cell_dec, features="Cancer clone B",
mapping=aes(x=x,y=y),
pointsize=10) +
geom_scattermore2(mapping = aes(bg_color=cluster.test.Cancer.B, subset=cluster.test.Cancer.B=="High"),
bg_line_width = .18,
gap_line_width = .14,
pointsize = 8) +
scale_bg_color_manual(values=c('black'))
f325.3 runLOCALBV
explore the local bivariate relationship in the spatial space.
res1 <- hpda_spe_cell_dec |> runLOCALBV(
features1 = 'Cancer clone A',
features2 = 'Cancer clone B',
assay.type='affi.score'
)
res1
res1[['Cancer clone A_VS_Cancer clone B']] |> head()
# LocalLee Gi E.Gi Var.Gi Z.Gi Pr (z > E(Gi)) cluster.no.test cluster.test
# Spot1 0.2656572 0.0007993283 0.00234192 1.685012e-06 -1.1883644 0.8826551 Low NoSign
# Spot2 0.1928726 0.0006520670 0.00234192 3.060135e-06 -0.9660035 0.8329788 Low NoSign
# Spot3 0.2098906 0.0007134499 0.00234192 2.544445e-06 -1.0209003 0.8463492 Low NoSign
# Spot4 0.2609793 0.0007131499 0.00234192 2.545557e-06 -1.0208654 0.8463409 Low NoSign
# Spot5 0.1769749 0.0007300670 0.00234192 3.059710e-06 -0.9214789 0.8215998 Low NoSign
# Spot6 0.1736672 0.0004656579 0.00234192 2.175086e-06 -1.2721999 0.8983489 Low NoSign