The vignette depends on Seurat package.
library(CIARA)
required <- c("Seurat")
if (!all(unlist(lapply(required, function(pkg) requireNamespace(pkg, quietly = TRUE)))))
knitr::opts_chunk$set(eval = FALSE)
In this vignette it is shown the analysis performed on single cell RNA seq human gastrula data from Tyser, R.C.V. et al., 2021.
We load the raw count matrix and umap coordinate defined in the original paper. Raw count matrix can be downloaded here
umap_elmir <- readRDS(system.file("extdata", "annot_umap.rds", package = "CIARA"))
coordinate_umap_human <- umap_elmir[, 2:3]
Obtain normalized count matrix and knn matrix ( euclidean distance on highly variable genes) using Seurat.
human_data_seurat <- cluster_analysis_integrate_rare(raw_counts_human_data, "Human_data", 0.1, 5, 30)
norm_human_data <- as.matrix(Seurat::GetAssayData(human_data_seurat, slot = "data", assay = "RNA"))
knn_human_data <- as.matrix(human_data_seurat@graphs$RNA_nn)
Cluster annotation provided in the original paper
CIARA (Cluster Independent Algorithm for the identification of RAre cell types) is a cluster independent approach that selects genes localized in a small number of neighboring cells from high dimensional PCA space. We don’t execute the CIARA algorithm and we directly load the result
ciara_genes <- row.names(result)[result[, 1]<1]
ciara_genes_top <- row.names(result)[order(as.numeric(result[, 1]))]
p <- list()
for(i in (ciara_genes_top)[1:5]) {
q <- plot_gene(norm_human_data_ciara, coordinate_umap_human, i, i)
p <- list(p, q)
}
p
#> [[1]]
#> [[1]][[1]]
#> [[1]][[1]][[1]]
#> [[1]][[1]][[1]][[1]]
#> [[1]][[1]][[1]][[1]][[1]]
#> list()
#>
#> [[1]][[1]][[1]][[1]][[2]]
#>
#>
#> [[1]][[1]][[1]][[2]]
#>
#>
#> [[1]][[1]][[2]]
#>
#>
#> [[1]][[2]]
#>
#>
#> [[2]]
plot_genes_sum(coordinate_umap_human, norm_human_data_ciara, (ciara_genes), "Sum from top CIARA genes")
It is also possible to explore wich genes are highly localized in which cells in an interactive way
norm_counts_small <- apply(norm_human_data_ciara, 1, function(x) {
y <- x/sum(x)
return(y)
})
gene_sum <- apply(norm_counts_small, 1, sum)
genes_name_text <- selection_localized_genes(norm_human_data_ciara, ciara_genes, min_number_cells = 4, max_number_genes = 4)
colnames(coordinate_umap_human) <- c("UMAP_1", "UMAP_2")
if ((requireNamespace("plotly", quietly = TRUE))) {
plot_interactive(coordinate_umap_human, gene_sum, genes_name_text, min_x = NULL, max_x = NULL, min_y = NULL, max_y = NULL)
}
We run louvain cluster analysis implemented in Seurat using as features the highly localized genes provided by CIARA
human_data_ciara <- cluster_analysis_integrate_rare(raw_counts_human_data, "Elmir data", 0.01, 5, 30, (ciara_genes))
We can explore how much the number of clusters changes with the resolution. The original cluster 1 and 2 for resolution 0.01 are constantly present for higher values of resolution.
if ((requireNamespace("clustree", quietly = TRUE))) {
find_resolution(human_data_ciara, seq(0.01, 1, 0.1))
}
Cluster 2 (7 cells) is made up by primordial germ cells (PGCs). These cells expressed typical PGCs markers ad NANOS3, NANOG and DPPA5
` Merge the original cluster annotation with the one found using CIARA highly localized genes
Detect the clusters enriched in highly localized genes and then performs on these a sub-cluster analysis (using louvain algorithm implemented in Seurat)
result_test <- test_hvg(raw_counts_human_data, final_cluster_human, (ciara_genes), background, 100, 0.05)
raw_endoderm <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "Endoderm"]
raw_hemo <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "Hemogenic Endothelial Progenitors"]
raw_exe_meso <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "ExE Mesoderm"]
combined_endoderm <- cluster_analysis_sub(raw_endoderm, 0.2, 5, 30, "Endoderm")
combined_hemo <- cluster_analysis_sub(raw_hemo, 0.6, 5, 30, "Hemogenic Endothelial Progenitors")
combined_exe_meso <- cluster_analysis_sub(raw_exe_meso, 0.5, 5, 30, "ExE Mesoderm")
all_sub_cluster <- c(combined_endoderm$seurat_clusters, combined_hemo$seurat_clusters, combined_exe_meso$seurat_clusters)
final_cluster_human_version_sub <- merge_cluster(final_cluster_human, all_sub_cluster)
table(as.vector(final_cluster_human_version_sub))
#>
#> Advanced Mesoderm Axial Mesoderm
#> 164 23
#> Emergent Mesoderm Endoderm_0
#> 185 79
#> Endoderm_1 Endoderm_2
#> 45 11
#> Epiblast Erythroblasts
#> 133 32
#> ExE Mesoderm_0 ExE Mesoderm_1
#> 46 37
#> Hemogenic Endothelial Progenitors_0 Hemogenic Endothelial Progenitors_1
#> 33 27
#> Hemogenic Endothelial Progenitors_2 Hemogenic Endothelial Progenitors_3
#> 23 15
#> Hemogenic Endothelial Progenitors_4 Nascent Mesoderm
#> 13 98
#> Non-Neural Ectoderm PGC
#> 29 7
#> Primitive Streak
#> 195
Seurat::DefaultAssay(human_data_seurat) <- "RNA"
markers_human_final <- markers_cluster_seurat(human_data_seurat, final_cluster_human_version_sub, names(human_data_seurat$RNA_snn_res.0.1), 5)
markers_human_top_final <- markers_human_final[[1]]
markers_human_all_final <- markers_human_final[[3]]
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "Hemogenic Endothelial Progenitors_4", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "Endoderm_2", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "ExE Mesoderm_0", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
top_endo <- white_black_markers(final_cluster_human_version_sub, "Endoderm_2", norm_human_data, markers_human_all_final, 0)
top_endo <- names(top_endo)[top_endo]
mean_top_endo <- apply(norm_human_data[top_endo, final_cluster_human_version_sub == "Endoderm_2"], 1, mean)
mean_top_endo <- sort(mean_top_endo, decreasing = T)
top_endo <- names(mean_top_endo)
names(top_endo) <- rep("Endoderm_2", length(top_endo))
top_hemo <- white_black_markers(final_cluster_human_version_sub, "Hemogenic Endothelial Progenitors_4", norm_human_data, markers_human_all_final, 0)
top_hemo <- names(top_hemo)[top_hemo]
mean_top_hemo <- apply(norm_human_data[top_hemo, final_cluster_human_version_sub == "Hemogenic Endothelial Progenitors_4"], 1, mean)
mean_top_hemo <- sort(mean_top_hemo, decreasing = T)
top_hemo <- names(mean_top_hemo)
names(top_hemo) <- rep("Hemogenic Endothelial Progenitors_4", length(top_hemo))
top_meso <- white_black_markers(final_cluster_human_version_sub, "ExE Mesoderm_1", norm_human_data, markers_human_all_final, 0)
top_meso <- names(top_meso)[top_meso]
mean_top_meso <- apply(norm_human_data[top_meso, final_cluster_human_version_sub == "ExE Mesoderm_1"], 1, mean)
mean_top_meso <- sort(mean_top_meso, decreasing = T)
top_meso <- names(mean_top_meso)
names(top_meso) <- rep("ExE Mesoderm_1", length(top_meso))
load(system.file("extdata", "norm_human_data_plot.Rda", package = "CIARA"))
load(system.file("extdata", "top_meso.Rda", package = "CIARA"))
load(system.file("extdata", "top_endo.Rda", package = "CIARA"))
load(system.file("extdata", "top_hemo.Rda", package = "CIARA"))
toMatch <- c("Endoderm")
plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse="|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse="|"), final_cluster_human)], top_endo, 20, max_size=5, text_size=10)
toMatch <- c("Hemogenic Endothelial Progenitors")
plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse = "|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse = "|"), final_cluster_human)], top_hemo, 20, max_size = 5, text_size = 8)
toMatch <- c("ExE Mesoderm")
plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse = "|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse = "|"), final_cluster_human)], top_meso, length(top_meso), max_size = 5, text_size = 8)
Expression of some of the highly localized genes detected by CIARA that are markers of the three rare populations of cells.
utils::sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] CIARA_0.1.0 rmarkdown_2.28
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 sys_3.4.3 jsonlite_1.8.9
#> [4] magrittr_2.0.3 spatstat.utils_3.1-0 farver_2.1.2
#> [7] vctrs_0.6.5 ROCR_1.0-11 memoise_2.0.1
#> [10] spatstat.explore_3.3-3 htmltools_0.5.8.1 sass_0.4.9
#> [13] sctransform_0.4.1 parallelly_1.38.0 KernSmooth_2.23-24
#> [16] bslib_0.8.0 htmlwidgets_1.6.4 ica_1.0-3
#> [19] plyr_1.8.9 plotly_4.10.4 zoo_1.8-12
#> [22] cachem_1.1.0 buildtools_1.0.0 igraph_2.1.1
#> [25] mime_0.12 lifecycle_1.0.4 pkgconfig_2.0.3
#> [28] Matrix_1.7-1 R6_2.5.1 fastmap_1.2.0
#> [31] fitdistrplus_1.2-1 future_1.34.0 shiny_1.9.1
#> [34] digest_0.6.37 colorspace_2.1-1 patchwork_1.3.0
#> [37] Seurat_5.1.0 tensor_1.5 RSpectra_0.16-2
#> [40] irlba_2.3.5.1 crosstalk_1.2.1 labeling_0.4.3
#> [43] progressr_0.14.0 fansi_1.0.6 spatstat.sparse_3.1-0
#> [46] httr_1.4.7 polyclip_1.10-7 abind_1.4-8
#> [49] compiler_4.4.1 withr_3.0.2 viridis_0.6.5
#> [52] fastDummies_1.7.4 highr_0.11 ggforce_0.4.2
#> [55] MASS_7.3-61 tools_4.4.1 lmtest_0.9-40
#> [58] httpuv_1.6.15 future.apply_1.11.3 goftest_1.2-3
#> [61] glue_1.8.0 nlme_3.1-166 promises_1.3.0
#> [64] grid_4.4.1 Rtsne_0.17 cluster_2.1.6
#> [67] reshape2_1.4.4 generics_0.1.3 gtable_0.3.6
#> [70] spatstat.data_3.1-2 tidyr_1.3.1 data.table_1.16.2
#> [73] tidygraph_1.3.1 sp_2.1-4 utf8_1.2.4
#> [76] spatstat.geom_3.3-3 RcppAnnoy_0.0.22 ggrepel_0.9.6
#> [79] RANN_2.6.2 pillar_1.9.0 stringr_1.5.1
#> [82] spam_2.11-0 RcppHNSW_0.6.0 later_1.3.2
#> [85] splines_4.4.1 dplyr_1.1.4 tweenr_2.0.3
#> [88] lattice_0.22-6 survival_3.7-0 deldir_2.0-4
#> [91] tidyselect_1.2.1 maketools_1.3.1 miniUI_0.1.1.1
#> [94] pbapply_1.7-2 knitr_1.48 gridExtra_2.3
#> [97] scattermore_1.2 xfun_0.48 graphlayouts_1.2.0
#> [100] matrixStats_1.4.1 stringi_1.8.4 lazyeval_0.2.2
#> [103] yaml_2.3.10 evaluate_1.0.1 codetools_0.2-20
#> [106] ggraph_2.2.1 tibble_3.2.1 cli_3.6.3
#> [109] uwot_0.2.2 xtable_1.8-4 reticulate_1.39.0
#> [112] munsell_0.5.1 jquerylib_0.1.4 Rcpp_1.0.13
#> [115] globals_0.16.3 spatstat.random_3.3-2 png_0.1-8
#> [118] spatstat.univar_3.0-1 parallel_4.4.1 ggplot2_3.5.1
#> [121] dotCall64_1.2 listenv_0.9.1 viridisLite_0.4.2
#> [124] scales_1.3.0 ggridges_0.5.6 SeuratObject_5.0.2
#> [127] leiden_0.4.3.1 purrr_1.0.2 rlang_1.1.4
#> [130] cowplot_1.1.3