空间转录组数据注释分析：SPOTlight反卷积

创作时间:

作者:

@小白创作中心

空间转录组数据注释分析：SPOTlight反卷积

引用

来源

https://cloud.tencent.com/developer/article/2507452

空间转录组学（Spatial Transcriptomics, ST）技术能够提供组织中基因表达的空间信息，但缺乏单细胞分辨率。为了解决这一问题，Holger Heyn团队开发了SPOTlight这一计算工具，通过整合空间转录组数据和单细胞转录组数据，实现对组织中细胞类型和状态的空间定位分析。本文将详细介绍SPOTlight的工作原理及其使用方法。

SPOTlight工具简介

SPOTlight的核心是基于种子的非负矩阵分解（seeded Non-negative Matrix Factorization, NMF）回归。该方法通过使用细胞类型标记基因进行初始化，并利用非负最小二乘法（Non-negative Least Squares, NNLS）来进一步解析ST捕获位置（spot）的空间分布。

软件安装

# 设置Bioconductor镜像
options(BioC_mirror="https://mirrors.westlake.edu.cn/bioconductor")
options("repos"=c(CRAN="https://mirrors.westlake.edu.cn/CRAN/"))
# 安装SPOTlight
library(devtools)
install_github("https://github.com/MarcElosua/SPOTlight")

输入数据

ST (sparse) matrix：空间表达矩阵，可以是raw count，标准化后的data，行为基因，列为spot
Single cell (sparse) matrix：单细胞表达矩阵，可以是raw count，标准化后的data，行为基因，列为细胞
Vector：单细胞的每个细胞注释标签向量，顺序对应单细胞矩阵的列
其他格式：输入数据还可以为SpatialExperiment或SingleCellExperiment对象

数据准备

# 加载空转数据
library(TENxVisiumData)
spe <- MouseKidneyCoronal()
rownames(spe) <- rowData(spe)$symbol

# 加载单细胞数据
library(TabulaMurisSenisData)
sce <- TabulaMurisSenisDroplet(tissues = "Kidney")$Kidney
sce <- sce[, sce$age == "18m"]
sce <- sce[, !sce$free_annotation %in% c("nan", "CD45")]

数据预处理

特征基因选择

# 特征基因选择
sce <- logNormCounts(sce)
genes <- !grepl(pattern = "^Rp[l|s]|Mt", x = rownames(sce))
dec <- modelGeneVar(sce, subset.row = genes)
hvg <- getTopHVGs(dec, n = 3000)

获取标记基因

# 获取标记基因
colLabels(sce) <- colData(sce)$free_annotation
mgs <- scoreMarkers(sce, subset.row = genes)
mgs_fil <- lapply(names(mgs), function(i) {
  x <- mgs[[i]]
  x <- x[x$mean.AUC > 0.8, ]
  x <- x[order(x$mean.AUC, decreasing = TRUE), ]
  x$gene <- rownames(x)
  x$cluster <- i
  data.frame(x)
})
mgs_df <- do.call(rbind, mgs_fil)

细胞降采样

# 细胞降采样
idx <- split(seq(ncol(sce)), sce$free_annotation)
n_cells <- 20
cs_keep <- lapply(idx, function(i) {
  n <- length(i)
  if (n < n_cells)
    n_cells <- n
  sample(i, n_cells)
})
sce <- sce[, unlist(cs_keep)]

反卷积分析

# 反卷积
res <- SPOTlight(
  x = sce,
  y = spe,
  groups = as.character(sce$free_annotation),
  mgs = mgs_df,
  hvg = hvg,
  weight_id = "mean.AUC",
  group_id = "cluster",
  gene_id = "gene")
mat <- res$mat
mod <- res$NMF

结果可视化

Topic profiles评估

# 可视化Topic profiles
plotTopicProfiles(
  x = mod,
  y = sce$free_annotation,
  facet = FALSE,
  min_prop = 0.01,
  ncol = 1) +
  theme(aspect.ratio = 1)

空间相关性矩阵

# 绘制相关性图
plotCorrelationMatrix(mat)

细胞类型共定位

# 细胞类型共定位分析
plotInteractions(mat, which = "heatmap", metric = "prop")
plotInteractions(mat, which = "heatmap", metric = "jaccard")
plotInteractions(mat, which = "network")

空间切片展示

# 饼图展示
ct <- colnames(mat)
mat[mat < 0.1] <- 0
paletteMartin <- c(
  "#000000", "#004949", "#009292", "#ff6db6", "#ffb6db", 
  "#490092", "#006ddb", "#b66dff", "#6db6ff", "#b6dbff", 
  "#920000", "#924900", "#db6d00", "#24ff24", "#ffff6d")
pal <- colorRampPalette(paletteMartin)(length(ct))
names(pal) <- ct

plotSpatialScatterpie(
  x = spe,
  y = mat,
  cell_types = colnames(mat),
  img = FALSE,
  scatterpie_alpha = 1,
  pie_scale = 0.4) +
  scale_fill_manual(
    values = pal,
    breaks = names(pal))