2 Batch Correction

2.1 Load datasets

library(TENxPBMCData)

pbmc3k <- TENxPBMCData(dataset = "pbmc3k")
pbmc4k <- TENxPBMCData(dataset = "pbmc4k")

2.2 Preprocess

library(sclet)

sce_process <- function(sce) {
  sce <- QCMetrics(sce)
  sce[["percent.mt"]] <- PercentageFeatureSet(sce, "^MT-")
  sce <- subset(sce, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
  sce <- NormalizeData(sce)
  sce <- FindVariableFeatures(sce)
  return(sce)
}

pbmc3k <- sce_process(pbmc3k)
pbmc4k <- sce_process(pbmc4k)

pbmc2 <- list(pbmc3k = pbmc3k, pbmc4k = pbmc4k)

2.3 Batch correction

pbmc2 <- BatchRemover(pbmc2)

2.4 Clustering

pbmc2 <- runPCA(pbmc2, subset_row = VariableFeatures(pbmc2), exprs_values = "reconstructed")
pbmc2 <- FindNeighbors(pbmc2, dims = 1:10)
pbmc2 <- FindClusters(pbmc2)
pbmc2 <- RunUMAP(pbmc2, 1:10)

2.5 Visualization

library(ggplot2)
library(ggsc)

sc_dim(pbmc2, reduction="UMAP", mapping=aes(color=batch)) 

sc_dim(pbmc2, reduction="UMAP") + facet_grid(.~batch) + theme_bw()