目的
このチュートリアルでは、処理群とコントロール群のPBMC(人外周血単核球)データセットを統合し、細胞タイプ特異的な反応と統合の役割を理解することを目的としています。ここでは、Seuratを使用して複雑な細胞タイプの比較分析を行う方法を紹介します。
目標
- 両データセットに存在する細胞タイプの識別
- コントロール群と処理群で共通する細胞タイプマーカーの取得
- 刺激による特異的反応を示す細胞タイプの探索
オブジェクトの作成
まず、2つのカウント行列を読み込み、Seuratオブジェクトを作成します。
# パッケージの読み込み
library(Seurat)
library(cowplot)
# データの読み込み
ctrl_data <- read.table(file = "../data/immune_control_expression_matrix.txt.gz", sep = "\t")
stim_data <- read.table(file = "../data/immune_stimulated_expression_matrix.txt.gz", sep = "\t")
# コントロール群オブジェクトの作成
control <- CreateSeuratObject(counts = ctrl_data, project = "IMMUNE_CTRL", min.cells = 5)
control$condition <- "CTRL"
control <- subset(control, subset = nFeature_RNA > 500)
control <- NormalizeData(control, verbose = FALSE)
control <- FindVariableFeatures(control, selection.method = "vst", nfeatures = 2000)
# 処理群オブジェクトの作成
stimulated <- CreateSeuratObject(counts = stim_data, project = "IMMUNE_STIM", min.cells = 5)
stimulated$condition <- "STIM"
stimulated <- subset(stimulated, subset = nFeature_RNA > 500)
stimulated <- NormalizeData(stimulated, verbose = FALSE)
stimulated <- FindVariableFeatures(stimulated, selection.method = "vst", nfeatures = 2000)
統合
次に、FindIntegrationAnchors関数を使用してアンカーを識別し、IntegrateData関数を使って2つのデータセットを統合します。
# アンカーの識別
anchors <- FindIntegrationAnchors(object.list = list(control, stimulated), dims = 1:20)
# 統合
combined <- IntegrateData(anchorset = anchors, dims = 1:20)
全体的な分析
統合されたデータセットに対して、可視化とクラスタリングの標準ワークフローを実行します。
DefaultAssay(combined) <- "integrated"
# スケーリングとPCA
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
# t-SNEとクラスタリング
combined <- RunUMAP(combined, reduction = "pca", dims = 1:20)
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20)
combined <- FindClusters(combined, resolution = 0.5)
# 可視化
p1 <- DimPlot(combined, reduction = "umap", group.by = "condition")
p2 <- DimPlot(combined, reduction = "umap", label = TRUE)
plot_grid(p1, p2)
# 条件ごとの可視化
DimPlot(combined, reduction = "umap", split.by = "condition")
保守的なマーカーの識別
条件間で保守的な細胞タイプマーカーを識別するために、FindConservedMarkers関数を使用します。
DefaultAssay(combined) <- "RNA"
conserved_markers <- FindConservedMarkers(combined, ident.1 = 7, grouping.var = "condition", verbose = FALSE)
head(conserved_markers)
# マーカーの可視化
FeaturePlot(combined, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A",
"CCL2", "PPBP"), min.cutoff = "q9")
# クラスターの名前変更
combined <- RenameIdents(combined, `0` = "CD14 Mono", `1` = "CD4 Naive T", `2` = "CD4 Memory T",
`3` = "CD16 Mono", `4` = "B", `5` = "CD8 T", `6` = "T activated", `7` = "NK", `8` = "DC", `9` = "B Activated",
`10` = "Mk", `11` = "pDC", `12` = "Eryth", `13` = "Mono/Mk Doublets")
DimPlot(combined, label = TRUE)
# DotPlotでの可視化
Idents(combined) <- factor(Idents(combined), levels = c("Mono/Mk Doublets", "pDC",
"Eryth", "Mk", "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated",
"CD4 Naive T", "CD4 Memory T"))
markers_to_plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5",
"CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1",
"GPR183", "PPBP", "GNG11", "HBA2", "HBB", "TSPAN13", "IL3RA", "IGJ")
DotPlot(combined, features = rev(markers_to_plot), cols = c("blue", "red"), dot.scale = 8,
split.by = "condition") + RotatedAxis()
条件間の差分発現遺伝子の識別
刺激群とコントロール群の細胞を対比した後、比較分析を行い、刺激による変化を観察します。
t_cells <- subset(combined, idents = "CD4 Naive T")
Idents(t_cells) <- "condition"
avg_t_cells <- log1p(AverageExpression(t_cells, verbose = FALSE)$RNA)
avg_t_cells$gene <- rownames(avg_t_cells)
cd14_mono <- subset(combined, idents = "CD14 Mono")
Idents(cd14_mono) <- "condition"
avg_cd14_mono <- log1p(AverageExpression(cd14_mono, verbose = FALSE)$RNA)
avg_cd14_mono$gene <- rownames(avg_cd14_mono)
genes_to_label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8")
p1 <- ggplot(avg_t_cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells")
p1 <- LabelPoints(plot = p1, points = genes_to_label, repel = TRUE)
p2 <- ggplot(avg_cd14_mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes")
p2 <- LabelPoints(plot = p2, points = genes_to_label, repel = TRUE)
plot_grid(p1, p2)
# B細胞の差分発現遺伝子
combined$celltype_condition <- paste(Idents(combined), combined$condition, sep = "_")
combined$celltype <- Idents(combined)
Idents(combined) <- "celltype_condition"
b_interferon_response <- FindMarkers(combined, ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE)
head(b_interferon_response, n = 15)
# FeaturePlotとVlnPlotでの可視化
FeaturePlot(combined, features = c("CD3D", "GNLY", "IFI6"), split.by = "condition", max.cutoff = 3,
cols = c("grey", "red"))
plots <- VlnPlot(combined, features = c("LYZ", "ISG15", "CXCL10"), split.by = "condition", group.by = "celltype",
pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)