25 SVP: Gene Set Activity in Spatial and Single-Cell Data

Not every downstream analysis begins with cluster markers or differential expression. In many settings, the real question is whether a predefined biological program, pathway, or cell-type signature is active in a given cell or spatial location.

The SVP ecosystem is designed for this use case. This chapter shows how gene set activity can be quantified and explored in both single-cell and spatial settings, using SingleCellExperiment-compatible workflows.

Because the required packages and example datasets are lightweight enough for CI, most chunks in this chapter are executed in the GitHub Actions bookdown workflow when the dependencies are available.

library(SpatialExperiment)
library(SingleCellExperiment)
library(scuttle)
library(SVP)
library(ggplot2)
library(ggsc)
library(sclet)

25.1 runSGSA

runSGSA() calculates the activity of gene sets in spatial or single-cell data.

sceSubPbmc is a small SingleCellExperiment dataset derived from PBMC3k and contains 1304 genes across 800 randomly selected cells.

data(sceSubPbmc)
sceSubPbmc <- NormalizeData(sceSubPbmc)
# the using runMCA to perform MCA (Multiple Correspondence Analysis)
# this is refer to the CelliD, but we using the Eigen to speed up.
# You can view the help information of runMCA using ?runMCA.
sceSubPbmc <- runMCA(sceSubPbmc, assay.type = 'logcounts')

CellCycle.Hs contains cell-cycle-related gene sets. The S and G2M signatures follow the Seurat convention, while the G1 signature is derived from the G1_PHASE human gene set in MSigDB after removing duplicated genes.

# Next, we calculate gene set activity scores.
# Here, we use the cell-cycle signatures, but the same workflow applies
# to KEGG pathways, GO terms, Hallmark collections, transcription factor
# target sets, and other curated gene sets.
data(CellCycle.Hs)
sceSubPbmc <- runSGSA(sceSubPbmc, gset.idx.list = CellCycle.Hs, gsvaExp.name = 'CellCycle')
# The result is an SVPE object, which extends SingleCellExperiment.
sceSubPbmc

# You can retrieve the score matrix directly.
sceSubPbmc |> gsvaExp('CellCycle') 
sceSubPbmc |> gsvaExp("CellCycle") |> assay() |> t() |> head()

# The resulting scores can then be visualized or used in downstream testing.
sceSubPbmc <- sceSubPbmc |> 
                scater::runPCA(assay.type = 'logcounts', ntop = 600) |>
                scater::runUMAP(dimred = 'PCA')
# withReducedDim = TRUE, the original reducetion results from original gene features
# will be add the colData in the sce.cellcycle.
sce.cellcycle <- sceSubPbmc |> gsvaExp('CellCycle', withReducedDim=TRUE)
sce.cellcycle

sce.cellcycle |> sc_violin(
                      features = rownames(sce.cellcycle), 
                      mapping = aes(x=seurat_annotations, fill = seurat_annotations)
                   ) + 
                   scale_x_discrete(guide=guide_axis(angle=-45))

sce.cellcycle |> sc_feature(features= "S", reduction='UMAP')

library(scran)
cellcycle.test.res <- sce.cellcycle |> findMarkers(
                group = sce.cellcycle$seurat_annotations, 
                test.type = 'wilcox', 
                assay.type = 'affi.score', 
                add.summary = TRUE
                )
cellcycle.test.res$B

25.2 runLISA

hpda_spe_cell_dec is the result of runSGSA with HPDA A sample from (doi:10.1038/s41587-019-0392-8). Each item in the matrix is the marker gene set activity in the spot. The marker gene set represents the cell type. We can use svp the investigate if a cell type cluters and if two cell types appear in the same region. Usage of local indicators of spatial association (LISA) to identify the hotspot in the spatial space

data(hpda_spe_cell_dec)

# r$> assay(hpda_spe_cell_dec)[1:5,1:5]
# 5 x 5 sparse Matrix of class "dgCMatrix"
#                                             Spot1        Spot2        Spot3        Spot4        Spot5
# Acinar cells                         3.283881e-04 6.931743e-06 4.820951e-05 1.882577e-03 9.269843e-04
# Cancer clone A                       2.880388e-04 2.013973e-04 9.167013e-05 9.642225e-05 6.704812e-05
# Cancer clone B                       1.569730e-04 2.744810e-04 2.151324e-04 1.502651e-05 4.283322e-06
# Ductal APOL1 high-hypoxic            3.132481e-05 3.902286e-04 4.240874e-05 3.140970e-06 1.464118e-04
# Ductal CRISP3 high-centroacinar like 9.546611e-05 5.025320e-03 2.112512e-03 1.140155e-03 3.873088e-03

svres <- runDetectSVG(hpda_spe_cell_dec, assay.type = 'affi.score', 
                      method = 'moransi', action = 'only') 

# r$> svres |> dplyr::arrange(rank) |> head()
#                                            obs expect.moransi sd.moransi Z.moransi        pvalue          padj rank
# Cancer clone A                       0.7086916    -0.00234192 0.02803920  25.35855 3.616015e-142 7.232030e-141    1
# Acinar cells                         0.6958980    -0.00234192 0.02788076  25.04379 1.020141e-138 1.020141e-137    2
# Cancer clone B                       0.6586104    -0.00234192 0.02803039  23.57985 3.102650e-123 2.068434e-122    3
# mDCs A                               0.5824019    -0.00234192 0.02760394  21.18334 6.801396e-100  3.400698e-99    4
# Ductal CRISP3 high-centroacinar like 0.5605955    -0.00234192 0.02794187  20.14673  1.437344e-90  5.749375e-90    5
# Endothelial cells                    0.5362475    -0.00234192 0.02757573  19.53128  2.976221e-85  9.920735e-85    6

lisa.res12 <- hpda_spe_cell_dec |>
   runLISA(
     features = c(1, 2, 3), 
     assay.type = 'affi.score',
     weight.method = "knn",
     k = 10,
     action = 'get',
   )
lisa.res12
lisa.res12[['Acinar cells']] |> head()
lisa.res12[["Cancer clone A"]] |> head()
colData(hpda_spe_cell_dec)$`cluster.test.Cancer.A` <- lisa.res12[["Cancer clone A"]] |>
dplyr::pull(cluster.test)
colData(hpda_spe_cell_dec)$`cluster.test.Acinar` <- lisa.res12[["Acinar cells"]] |>
dplyr::pull(cluster.test)
colData(hpda_spe_cell_dec)$`cluster.test.Cancer.B` <- lisa.res12[["Cancer clone B"]] |>
dplyr::pull(cluster.test)

p1 <- sc_spatial(hpda_spe_cell_dec,
            features = rownames(hpda_spe_cell_dec),
            mapping = aes(x=x, y=y, color=cluster.test.Cancer.A),
            plot.pie = T,
            pie.radius.scale = .8,
            bg_circle_radius = 1.1,
            color=NA,
            linewidth=2
) + scale_color_manual(values=c("black", "white"))
p1

f1 <- sc_spatial(hpda_spe_cell_dec, features="Cancer clone A",
            mapping=aes(x = x, y = y),
            pointsize=10) + 
        geom_scattermore2(mapping = aes(bg_color=cluster.test.Cancer.A, 
                          subset=cluster.test.Cancer.A=="High"),
                          bg_line_width = .16,
                          gap_line_width = .14,
                          pointsize = 7) +
        scale_bg_color_manual(values=c('black'))
f1

f2 <- sc_spatial(hpda_spe_cell_dec, features="Acinar cells",
            mapping=aes(x=x,y=y),
            pointsize=10) +
        geom_scattermore2(mapping = aes(bg_color=cluster.test.Acinar, subset=cluster.test.Acinar=="High"),
                          bg_line_width = .16,
                          gap_line_width = .14,
                          pointsize = 7) +
        scale_bg_color_manual(values=c('black'))
f2

f3 <- sc_spatial(hpda_spe_cell_dec, features="Cancer clone B",
            mapping=aes(x=x,y=y),
            pointsize=10) +
        geom_scattermore2(mapping = aes(bg_color=cluster.test.Cancer.B, subset=cluster.test.Cancer.B=="High"),
                          bg_line_width = .18,
                          gap_line_width = .14,
                          pointsize = 8) +
        scale_bg_color_manual(values=c('black'))
f3

25.3 runLOCALBV

explore the local bivariate relationship in the spatial space.

res1 <- hpda_spe_cell_dec |> runLOCALBV(
          features1 = 'Cancer clone A', 
          features2 = 'Cancer clone B', 
          assay.type='affi.score'
        )
res1
res1[['Cancer clone A_VS_Cancer clone B']] |> head()


#        LocalLee           Gi       E.Gi       Var.Gi       Z.Gi Pr (z > E(Gi)) cluster.no.test cluster.test
# Spot1 0.2656572 0.0007993283 0.00234192 1.685012e-06 -1.1883644      0.8826551             Low       NoSign
# Spot2 0.1928726 0.0006520670 0.00234192 3.060135e-06 -0.9660035      0.8329788             Low       NoSign
# Spot3 0.2098906 0.0007134499 0.00234192 2.544445e-06 -1.0209003      0.8463492             Low       NoSign
# Spot4 0.2609793 0.0007131499 0.00234192 2.545557e-06 -1.0208654      0.8463409             Low       NoSign
# Spot5 0.1769749 0.0007300670 0.00234192 3.059710e-06 -0.9214789      0.8215998             Low       NoSign
# Spot6 0.1736672 0.0004656579 0.00234192 2.175086e-06 -1.2721999      0.8983489             Low       NoSign