CIARA

The vignette depends on Seurat package.

library(CIARA)

required <- c("Seurat")
if (!all(unlist(lapply(required, function(pkg) requireNamespace(pkg, quietly = TRUE)))))
  knitr::opts_chunk$set(eval = FALSE)

In this vignette it is shown the analysis performed on single cell RNA seq human gastrula data from Tyser, R.C.V. et al., 2021.

Load human gastrula data and create norm counts and knn matrices using Seurat

We load the raw count matrix and umap coordinate defined in the original paper. Raw count matrix can be downloaded here

umap_elmir <- readRDS(system.file("extdata", "annot_umap.rds", package = "CIARA"))

coordinate_umap_human <- umap_elmir[, 2:3]

Obtain normalized count matrix and knn matrix ( euclidean distance on highly variable genes) using Seurat.

  human_data_seurat <- cluster_analysis_integrate_rare(raw_counts_human_data, "Human_data", 0.1, 5, 30)

norm_human_data <- as.matrix(Seurat::GetAssayData(human_data_seurat, slot = "data", assay = "RNA"))

knn_human_data <- as.matrix(human_data_seurat@graphs$RNA_nn)

Cluster annotation provided in the original paper


original_cluster_human <- as.vector(umap_elmir$cluster_id)
names(original_cluster_human) <- names(umap_elmir$cell_name)
plot_umap(coordinate_umap_human, original_cluster_human)
#> [[1]]

Run CIARA

CIARA (Cluster Independent Algorithm for the identification of RAre cell types) is a cluster independent approach that selects genes localized in a small number of neighboring cells from high dimensional PCA space. We don’t execute the CIARA algorithm and we directly load the result

background <- get_background_full(norm_human_data, threshold = 1, n_cells_low = 3, n_cells_high = 20)
result <- CIARA(norm_human_data, knn_human_data, background, cores_number = 1, p_value = 0.001, odds_ratio = 2, local_region = 1, approximation = FALSE)
load(system.file("extdata", "result.Rda", package = "CIARA"))

Identifying highly localized genes

ciara_genes <- row.names(result)[result[, 1]<1]
ciara_genes_top <- row.names(result)[order(as.numeric(result[, 1]))]
norm_human_data_ciara <- norm_human_data[ciara_genes, ]
load(system.file("extdata", "norm_human_data_ciara.Rda", package = "CIARA"))

p <- list()
for(i in (ciara_genes_top)[1:5]) {
  q <- plot_gene(norm_human_data_ciara, coordinate_umap_human, i, i)
  p <- list(p, q)
}
p
#> [[1]]
#> [[1]][[1]]
#> [[1]][[1]][[1]]
#> [[1]][[1]][[1]][[1]]
#> [[1]][[1]][[1]][[1]][[1]]
#> list()
#> 
#> [[1]][[1]][[1]][[1]][[2]]

#> 
#> 
#> [[1]][[1]][[1]][[2]]

#> 
#> 
#> [[1]][[1]][[2]]

#> 
#> 
#> [[1]][[2]]

#> 
#> 
#> [[2]]

background <- row.names(result)
plot_genes_sum(coordinate_umap_human, norm_human_data_ciara, (ciara_genes), "Sum from top CIARA genes")

It is also possible to explore wich genes are highly localized in which cells in an interactive way

norm_counts_small <- apply(norm_human_data_ciara, 1, function(x) {
  y <- x/sum(x)
  return(y)
  })
gene_sum <- apply(norm_counts_small, 1, sum)
    
genes_name_text <- selection_localized_genes(norm_human_data_ciara, ciara_genes, min_number_cells = 4, max_number_genes = 4)
colnames(coordinate_umap_human) <- c("UMAP_1", "UMAP_2")

if ((requireNamespace("plotly", quietly = TRUE))) {
plot_interactive(coordinate_umap_human, gene_sum, genes_name_text, min_x = NULL, max_x = NULL, min_y = NULL, max_y = NULL)
  }

Cluster analysis using CIARA highly localized genes

We run louvain cluster analysis implemented in Seurat using as features the highly localized genes provided by CIARA

human_data_ciara <- cluster_analysis_integrate_rare(raw_counts_human_data, "Elmir data", 0.01, 5, 30, (ciara_genes))

We can explore how much the number of clusters changes with the resolution. The original cluster 1 and 2 for resolution 0.01 are constantly present for higher values of resolution.

if ((requireNamespace("clustree", quietly = TRUE))) {
  find_resolution(human_data_ciara, seq(0.01, 1, 0.1))
  }

Cluster 2 (7 cells) is made up by primordial germ cells (PGCs). These cells expressed typical PGCs markers ad NANOS3, NANOG and DPPA5

ciara_cluster_human <- human_data_ciara$RNA_snn_res.0.01
load(system.file("extdata", "ciara_cluster_human.Rda", package = "CIARA"))
plot_umap(coordinate_umap_human, ciara_cluster_human)
#> [[1]]

plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOS3", "NANOS3")

plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOG", "NANOG")

plot_gene(norm_human_data_ciara, coordinate_umap_human, "DPPA5", "DPPA5")

` Merge the original cluster annotation with the one found using CIARA highly localized genes

final_cluster_human <- merge_cluster(original_cluster_human, ciara_cluster_human, 10)
#> Clusters with number of cells below 10 in the new partition: 1
final_cluster_human[final_cluster_human == "2-step_2"] <- "PGC"
plot_umap(coordinate_umap_human, final_cluster_human)
#> [[1]]

Sub-cluster analysis using CIARA

Detect the clusters enriched in highly localized genes and then performs on these a sub-cluster analysis (using louvain algorithm implemented in Seurat)


result_test <- test_hvg(raw_counts_human_data, final_cluster_human, (ciara_genes), background, 100, 0.05)
result_test[[2]]

raw_endoderm <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "Endoderm"]
raw_hemo <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "Hemogenic Endothelial Progenitors"]
raw_exe_meso <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "ExE Mesoderm"]



combined_endoderm <- cluster_analysis_sub(raw_endoderm, 0.2, 5, 30, "Endoderm")

combined_hemo <- cluster_analysis_sub(raw_hemo, 0.6, 5, 30, "Hemogenic Endothelial Progenitors")

combined_exe_meso <- cluster_analysis_sub(raw_exe_meso, 0.5, 5, 30, "ExE Mesoderm")

all_sub_cluster <- c(combined_endoderm$seurat_clusters, combined_hemo$seurat_clusters, combined_exe_meso$seurat_clusters)
final_cluster_human_version_sub <- merge_cluster(final_cluster_human, all_sub_cluster)
load(system.file("extdata", "final_cluster_human_version_sub.Rda", package = "CIARA"))
plot_umap(coordinate_umap_human, final_cluster_human_version_sub)
#> [[1]]

table(as.vector(final_cluster_human_version_sub))
#> 
#>                   Advanced Mesoderm                      Axial Mesoderm 
#>                                 164                                  23 
#>                   Emergent Mesoderm                          Endoderm_0 
#>                                 185                                  79 
#>                          Endoderm_1                          Endoderm_2 
#>                                  45                                  11 
#>                            Epiblast                       Erythroblasts 
#>                                 133                                  32 
#>                      ExE Mesoderm_0                      ExE Mesoderm_1 
#>                                  46                                  37 
#> Hemogenic Endothelial Progenitors_0 Hemogenic Endothelial Progenitors_1 
#>                                  33                                  27 
#> Hemogenic Endothelial Progenitors_2 Hemogenic Endothelial Progenitors_3 
#>                                  23                                  15 
#> Hemogenic Endothelial Progenitors_4                    Nascent Mesoderm 
#>                                  13                                  98 
#>                 Non-Neural Ectoderm                                 PGC 
#>                                  29                                   7 
#>                    Primitive Streak 
#>                                 195

CIARA identifies three rare population of cells with highly distinctive transcriptional profile

Seurat::DefaultAssay(human_data_seurat) <- "RNA"
markers_human_final <- markers_cluster_seurat(human_data_seurat, final_cluster_human_version_sub, names(human_data_seurat$RNA_snn_res.0.1), 5)

markers_human_top_final <- markers_human_final[[1]]
markers_human_all_final <- markers_human_final[[3]]
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "Hemogenic Endothelial Progenitors_4", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "Endoderm_2", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "ExE Mesoderm_0", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)

top_endo <- white_black_markers(final_cluster_human_version_sub, "Endoderm_2", norm_human_data, markers_human_all_final, 0)
top_endo <- names(top_endo)[top_endo]


mean_top_endo <- apply(norm_human_data[top_endo, final_cluster_human_version_sub == "Endoderm_2"], 1, mean)
mean_top_endo <- sort(mean_top_endo, decreasing = T)

top_endo <- names(mean_top_endo)
names(top_endo) <- rep("Endoderm_2", length(top_endo))

top_hemo <- white_black_markers(final_cluster_human_version_sub, "Hemogenic Endothelial Progenitors_4", norm_human_data, markers_human_all_final, 0)
top_hemo <- names(top_hemo)[top_hemo]


mean_top_hemo <- apply(norm_human_data[top_hemo, final_cluster_human_version_sub == "Hemogenic Endothelial Progenitors_4"], 1, mean)
mean_top_hemo <- sort(mean_top_hemo, decreasing = T)

top_hemo <- names(mean_top_hemo)
names(top_hemo) <- rep("Hemogenic Endothelial Progenitors_4", length(top_hemo))
top_meso <- white_black_markers(final_cluster_human_version_sub, "ExE Mesoderm_1", norm_human_data, markers_human_all_final, 0)
top_meso <- names(top_meso)[top_meso]


mean_top_meso <- apply(norm_human_data[top_meso, final_cluster_human_version_sub == "ExE Mesoderm_1"], 1, mean)
mean_top_meso <- sort(mean_top_meso, decreasing = T)

top_meso <- names(mean_top_meso)
names(top_meso) <- rep("ExE Mesoderm_1", length(top_meso))
norm_human_data_plot <- norm_human_data[c(top_endo, top_hemo, top_meso), ]
load(system.file("extdata", "norm_human_data_plot.Rda", package = "CIARA"))
load(system.file("extdata", "top_meso.Rda", package = "CIARA"))
load(system.file("extdata", "top_endo.Rda", package = "CIARA"))
load(system.file("extdata", "top_hemo.Rda", package = "CIARA"))



toMatch <- c("Endoderm")

plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse="|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse="|"), final_cluster_human)], top_endo, 20, max_size=5, text_size=10)



toMatch <- c("Hemogenic Endothelial Progenitors")
plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse = "|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse = "|"), final_cluster_human)], top_hemo, 20, max_size = 5, text_size = 8)



toMatch <- c("ExE Mesoderm")
plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse = "|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse = "|"), final_cluster_human)], top_meso, length(top_meso), max_size = 5, text_size = 8)

Expression of some of the highly localized genes detected by CIARA that are markers of the three rare populations of cells.


plot_gene(norm_human_data_ciara, coordinate_umap_human, "C1R", "C1R")

plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOS3", "NANOS3")

plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOG", "NANOG")

plot_gene(norm_human_data_ciara, coordinate_umap_human, "SOX17", "SOX17")

plot_gene(norm_human_data_ciara, coordinate_umap_human, "DPPA5", "DPPA5")

plot_gene(norm_human_data_ciara, coordinate_umap_human, "CSF1", "CSF1")

utils::sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] CIARA_0.1.0    rmarkdown_2.28
#> 
#> loaded via a namespace (and not attached):
#>   [1] RColorBrewer_1.1-3     sys_3.4.3              jsonlite_1.8.9        
#>   [4] magrittr_2.0.3         spatstat.utils_3.1-0   farver_2.1.2          
#>   [7] vctrs_0.6.5            ROCR_1.0-11            memoise_2.0.1         
#>  [10] spatstat.explore_3.3-3 htmltools_0.5.8.1      sass_0.4.9            
#>  [13] sctransform_0.4.1      parallelly_1.38.0      KernSmooth_2.23-24    
#>  [16] bslib_0.8.0            htmlwidgets_1.6.4      ica_1.0-3             
#>  [19] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12            
#>  [22] cachem_1.1.0           buildtools_1.0.0       igraph_2.1.1          
#>  [25] mime_0.12              lifecycle_1.0.4        pkgconfig_2.0.3       
#>  [28] Matrix_1.7-1           R6_2.5.1               fastmap_1.2.0         
#>  [31] fitdistrplus_1.2-1     future_1.34.0          shiny_1.9.1           
#>  [34] digest_0.6.37          colorspace_2.1-1       patchwork_1.3.0       
#>  [37] Seurat_5.1.0           tensor_1.5             RSpectra_0.16-2       
#>  [40] irlba_2.3.5.1          crosstalk_1.2.1        labeling_0.4.3        
#>  [43] progressr_0.14.0       fansi_1.0.6            spatstat.sparse_3.1-0 
#>  [46] httr_1.4.7             polyclip_1.10-7        abind_1.4-8           
#>  [49] compiler_4.4.1         withr_3.0.2            viridis_0.6.5         
#>  [52] fastDummies_1.7.4      highr_0.11             ggforce_0.4.2         
#>  [55] MASS_7.3-61            tools_4.4.1            lmtest_0.9-40         
#>  [58] httpuv_1.6.15          future.apply_1.11.3    goftest_1.2-3         
#>  [61] glue_1.8.0             nlme_3.1-166           promises_1.3.0        
#>  [64] grid_4.4.1             Rtsne_0.17             cluster_2.1.6         
#>  [67] reshape2_1.4.4         generics_0.1.3         gtable_0.3.6          
#>  [70] spatstat.data_3.1-2    tidyr_1.3.1            data.table_1.16.2     
#>  [73] tidygraph_1.3.1        sp_2.1-4               utf8_1.2.4            
#>  [76] spatstat.geom_3.3-3    RcppAnnoy_0.0.22       ggrepel_0.9.6         
#>  [79] RANN_2.6.2             pillar_1.9.0           stringr_1.5.1         
#>  [82] spam_2.11-0            RcppHNSW_0.6.0         later_1.3.2           
#>  [85] splines_4.4.1          dplyr_1.1.4            tweenr_2.0.3          
#>  [88] lattice_0.22-6         survival_3.7-0         deldir_2.0-4          
#>  [91] tidyselect_1.2.1       maketools_1.3.1        miniUI_0.1.1.1        
#>  [94] pbapply_1.7-2          knitr_1.48             gridExtra_2.3         
#>  [97] scattermore_1.2        xfun_0.48              graphlayouts_1.2.0    
#> [100] matrixStats_1.4.1      stringi_1.8.4          lazyeval_0.2.2        
#> [103] yaml_2.3.10            evaluate_1.0.1         codetools_0.2-20      
#> [106] ggraph_2.2.1           tibble_3.2.1           cli_3.6.3             
#> [109] uwot_0.2.2             xtable_1.8-4           reticulate_1.39.0     
#> [112] munsell_0.5.1          jquerylib_0.1.4        Rcpp_1.0.13           
#> [115] globals_0.16.3         spatstat.random_3.3-2  png_0.1-8             
#> [118] spatstat.univar_3.0-1  parallel_4.4.1         ggplot2_3.5.1         
#> [121] dotCall64_1.2          listenv_0.9.1          viridisLite_0.4.2     
#> [124] scales_1.3.0           ggridges_0.5.6         SeuratObject_5.0.2    
#> [127] leiden_0.4.3.1         purrr_1.0.2            rlang_1.1.4           
#> [130] cowplot_1.1.3