# **MODIFY THIS CHUNK**
project_id  <- "HDMA-public" # determines the name of the cache folder
doc_id      <- "02-global_analysis/02" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out         <- here::here("output/", doc_id); dir.create(out, recursive = TRUE)
figout      <- here::here("figures/", doc_id, "/")
cache       <- paste0("~/scratch/cache/", project_id, "/", doc_id, "/")

1 Overview

Construct a dendrogram over clusters.

2 Set up

# libraries
library(here)
library(dplyr)
library(tidyr)
library(ggplot2)
library(purrr)
library(glue)
library(readr)
library(tibble)
library(stringr)
library(Seurat)
library(cowplot)
library(data.table)
library(pvclust)
library(dendextend)
library(ArchR)
library(here)

# shared project scripts/functions
source(here::here("code/utils/plotting_config.R"))
source(here::here("code/utils/matrix_helpers.R"))
source(here::here("code/utils/seurat_helpers.R"))
source(here::here("code/utils/sj_scRNAseq_helpers.R"))
source(here::here("code/utils/hdma_palettes.R"))

ggplot2::theme_set(theme_BOR())

set.seed(100)

3 Load metadata

Get organ metadata:

organ_meta <- read_tsv(here::here("code/02-global_analysis/01-organs_keep.tsv"))

## Rows: 12 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): organcode, organ, iteration
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

organ_meta

## # A tibble: 12 × 3
##    organcode organ            iteration
##    <chr>     <chr>            <chr>    
##  1 AG        Adrenal          v4       
##  2 BR        Brain            v4       
##  3 EY        Eye              v6       
##  4 HT        Heart            v4       
##  5 LI        Liver            v4       
##  6 LU        Lung             v8       
##  7 MU        Muscle           v4       
##  8 SK        Skin             v4       
##  9 SP        Spleen           v4       
## 10 ST        StomachEsophagus v2       
## 11 TM        Thymus           v4       
## 12 TR        Thyroid          v2

Set up cluster metadata, which we’ll use to label the dendrogram:

cluster_meta <- map_dfr(list.files(here::here("output/01-preprocessing/02/shared/meta"),
                                   pattern = "*_meta_cluster.txt", full.names = TRUE),
                        ~ fread(.x, data.table = FALSE)) %>% 
  mutate(Cluster = glue("{organ_code}_{L1_clusterID}")) %>% 
  mutate(Cluster_labelled = glue("{organ_code}_{L1_clusterID}_{L3_clusterName}")) %>% 
  left_join(enframe(cmap_organ, "organ", "Color")) %>% 
  left_join(enframe(cmap_compartment, "L0_clusterName", "Color_compartment")) %>% 
  tibble::column_to_rownames(var = "Cluster")

## Joining with `by = join_by(organ)`
## Joining with `by = join_by(L0_clusterName)`

Load cell metadata:

load(here::here("output/02-global_analysis/01/cell_meta_all.Rda"))

Load sample metadata:

sample_meta <- read_csv(here::here("output/01-preprocessing/02/shared/SampleMetadata.csv"))

## Rows: 130 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): SampleName, R1_BC_Row, StorageCode, TubeLabel, Organ_Desc, Organ, ...
## dbl  (5): Batch, BioID, PCW, PCD, Total cells into splitpool (thousands)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

4 Cluster average expression

To be able to assess some global relationships between all clusters, let’s take average expression across cells in each cluster:

# for each organ, load up the RNA object and calculate cluster mean expression across all genes
for (i in 1:nrow(organ_meta)) {
  
  print(organ_meta$organ[i])
  seurat_path <- sprintf(here::here("output/01-preprocessing/02/%s/rna_preprocess_output/RNA_obj_clustered_final.rds"), organ_meta$organ[i])
  
  if (file.exists(seurat_path) & !file.exists(glue("{out}/{organ_meta$organcode[i]}_avg_exp.decontx.Rds"))) {
    
    obj <- readRDS(seurat_path)
    
    # prepend clusters with organ code
    obj$Clusters <- paste0(organ_meta$organcode[i], "_", obj$Clusters)
    Idents(obj) <- "Clusters"
    
    # get average expression for both decontx and RNA assays
    obj_avg <- Seurat::AverageExpression(obj, return.seurat = FALSE, assays = "decontX")$decontX
    saveRDS(obj_avg, glue("{out}/{organ_meta$organcode[i]}_avg_exp.decontx.Rds"))
    
    # RNA data isn't log-normalized
    DefaultAssay(obj) <- "RNA"
    obj <- NormalizeData(obj)
    
    obj_avg <- Seurat::AverageExpression(obj, return.seurat = FALSE, assays = "RNA")$RNA
    saveRDS(obj_avg, glue("{out}/{organ_meta$organcode[i]}_avg_exp.rna.Rds"))
    
  } else {
    next()
  }
  
}

5 Dendrogram based on RNA

Here, we can approximately follow a method used by Tasic et al 2018 and Yao et al 2021 in Allen Institute publications, where dendrograms are computed by using the mean cluster expression on top pairwise differentially expressed genes. For construction of the dendrogram, we will consult the build_dend() function used in the scrattch.hicat package: https://github.com/AllenInstitute/scrattch.hicat/blob/7ccfbc24e3a51326740d08b5b306b18afeb890d9/R/dendro.R#L49

Instead of using pairwise differentially expressed genes, we will use the top cluster markers in the one-vs-all differential expression analyses computed for each organ.

n <- 20
top_markers <- map2(organ_meta$organ, organ_meta$iteration, function(organ, iter) {
  
  # if data for that organ exists,
  if (!is.na(iter)) {
    
    # load the markers corresponding to the final iteration
    markers <- fread(
      file.path(here::here("output/01-preprocessing/02/"), organ, glue("rna_preprocess_output/cluster_markers/cluster_markers_SCTdecontXnorm_{iter}.tsv")),
      data.table = FALSE)
    
    # for each cluster, filter to only positive markers, and select the top 20 based on LFC
    markers %>% 
      group_by(cluster) %>% 
      filter(avg_log2FC > 0) %>% 
      top_n(n, avg_log2FC) %>%
      pull(gene) %>% 
      unique()
    
  }
})

top_markers <- top_markers %>% unlist() %>% unique()
length(top_markers)

## [1] 1652

saveRDS(top_markers, file = glue("{out}/top_markers.Rds"))

avg_exp_decontx_list <- map(list.files(out, pattern = "*_avg_exp.decontx.Rds", full.names = TRUE), ~ readRDS(.x))
map(avg_exp_decontx_list, dim)

## [[1]]
## [1] 25314    10
## 
## [[2]]
## [1] 32500    18
## 
## [[3]]
## [1] 32067    22
## 
## [[4]]
## [1] 32053    16
## 
## [[5]]
## [1] 32748    14
## 
## [[6]]
## [1] 33472    18
## 
## [[7]]
## [1] 32862    22
## 
## [[8]]
## [1] 34185    19
## 
## [[9]]
## [1] 32513    12
## 
## [[10]]
## [1] 33035    22
## 
## [[11]]
## [1] 33648    18
## 
## [[12]]
## [1] 26163    12

# subset each mat to the collection of marker genes, to save time in the merge
avg_exp_decontx_subset_list <- map(avg_exp_decontx_list, ~ .x[rownames(.x) %in% top_markers, ])
map(avg_exp_decontx_subset_list, dim)

## [[1]]
## [1] 1636   10
## 
## [[2]]
## [1] 1652   18
## 
## [[3]]
## [1] 1652   22
## 
## [[4]]
## [1] 1651   16
## 
## [[5]]
## [1] 1652   14
## 
## [[6]]
## [1] 1652   18
## 
## [[7]]
## [1] 1652   22
## 
## [[8]]
## [1] 1652   19
## 
## [[9]]
## [1] 1652   12
## 
## [[10]]
## [1] 1652   22
## 
## [[11]]
## [1] 1652   18
## 
## [[12]]
## [1] 1646   12

#' A version of base::merge with some handy defaults (bind columns of different
#' matrices, according to their rownames, allowing for differences in the rows
#' present in the two dfs)
#' 
#' @param m1 and m2, data frames to merge by rownames
merge2 <- function(m1, m2) {
  
  # all controls whether or not to keep genes (rows) that apppear in some matrices 
  # but not others. We keep them and fill missing values with NA. This is to allow
  # keeping genes that might be highly organ specific.
  merge(m1, m2, by = "row.names", all = TRUE) %>% 
    # merge returns a df where the first column contains the rownames, here
    # we just move it out to allow for subsequent merges
    tibble::column_to_rownames("Row.names")
  
}

# combine all avg exp matrices
avg_exp_decontx_mat_subset <- Reduce(merge2, avg_exp_decontx_subset_list)
dim(avg_exp_decontx_mat_subset)

## [1] 1652  203

any(is.na(avg_exp_decontx_mat_subset))

## [1] TRUE

avg_exp_decontx_mat_subset[is.na(avg_exp_decontx_mat_subset)] <- 0
any(is.na(avg_exp_decontx_mat_subset))

## [1] FALSE

# avg_exp_rna_mat <- Reduce(merge2, avg_exp_rna_list)
# dim(avg_exp_rna_mat)
# any(is.na(avg_exp_rna_mat))

saveRDS(avg_exp_decontx_mat_subset, file = glue("{out}/avg_expr_marker_subset.decontx.Rds"))

Dendrogram helpers:

# adapting from scrattch.hicat
# https://github.com/AllenInstitute/scrattch.hicat/blob/7ccfbc24e3a51326740d08b5b306b18afeb890d9/R/dendro.R#L49
pvclust_show_signif_gradient <- function(dend, pvclust_obj, signif_type = c("bp", "au"),
                                         signif_col_fun = NULL, ...) {
  
  signif_type <- match.arg(signif_type)
  pvalue_per_node <- pvclust_obj$edges[[signif_type]]
  ord <- rank(get_branches_heights(dend, sort = FALSE))
  pvalue_per_node <- pvalue_per_node[ord]
  signif_col <- signif_col_fun(100)
  pvalue_by_all_nodes <- rep(NA, nnodes(dend))
  ss_leaf <- which_leaf(dend)
  pvalue_by_all_nodes[!ss_leaf] <- pvalue_per_node
  pvalue_by_all_nodes <- na_locf(pvalue_by_all_nodes)
  the_cols <- signif_col[round(pvalue_by_all_nodes * 100)]
  signif_lwd = seq(0.5,2,length.out=100)
  the_lwds = signif_lwd[round(pvalue_by_all_nodes * 100)]
  dend = dend %>%
    assign_values_to_branches_edgePar(the_cols, "col") %>%
    assign_values_to_branches_edgePar(the_lwds, "lwd") %>%
    assign_values_to_branches_edgePar(pvalue_by_all_nodes, "conf") 
}


#' A distance function for Spearman rank correlation
#' from https://davetang.org/muse/2010/11/26/hierarchical-clustering-with-p-values-using-spearman/
spearman <- function(x, ...) {
  x <- as.matrix(x)
  res <- as.dist(1 - cor(x, method = "spearman", use = "everything"))
  res <- as.dist(res)
  attr(res, "method") <- "spearman"
  return(res)
}


build_dend <- function(mat, n_boot = 100, distance = spearman, hclust = "complete") {
  
  pvclust_res <- pvclust::pvclust(mat,
                                  nboot = n_boot,
                                  method.dist = distance,
                                  method.hclust = hclust)
  
  dend <- as.dendrogram(pvclust_res$hclust)
  # dend <- dend %>% pvclust_show_signif_gradient(
  #   pvclust_res,
  #   signif_type = "bp",
  #   signif_col_fun = colorRampPalette(c("gray80", "gray50", "black")))
  
  return(list(dend = dend, pvclust_res = pvclust_res))
  
}

Construct dendrogram, first for the decontX data:

dim(avg_exp_decontx_mat_subset)

# get correlation matrix
cor_decontx <- cor(avg_exp_decontx_mat_subset, method = "spearman")

# build dendrogram
dend_res_decontx <- build_dend(avg_exp_decontx_mat_subset, distance = spearman, hclust = "average", n_boot = 1000)

save(avg_exp_decontx_mat_subset, cor_decontx, dend_res_decontx,
     file = glue("{out}/dend_res_decontx.Rda"))

load(glue("{out}/dend_res_decontx.Rda"))

Label and color the leaves:

dend_labeled_decontx <- dend_res_decontx$dend
labels(dend_labeled_decontx) <- cluster_meta[labels(dend_labeled_decontx), "Cluster_labelled"]
labels_colors(dend_labeled_decontx) <- cluster_meta[colnames(cor_decontx), ]$Color[order.dendrogram(dend_labeled_decontx)]

dend_labeled_decontx2 <- dend_res_decontx$dend
labels(dend_labeled_decontx2) <- cluster_meta[labels(dend_labeled_decontx2), "Cluster_labelled"]
labels_colors(dend_labeled_decontx2) <- cluster_meta[colnames(cor_decontx), ]$Color_compartment[order.dendrogram(dend_labeled_decontx2)]

Pvclust results and dendrogram with tree colored by significance of the branches:

par(mar=c(4,4,4,20))
plot(dend_res_decontx$pvclust_res, horiz = TRUE)

par(mar=c(20,4,4,4))
plot(dend_labeled_decontx, horiz = FALSE)
legend("top", legend = names(cmap_organ), fill = cmap_organ, horiz = TRUE)

par(mar=c(20,4,4,4))
plot(dend_labeled_decontx2, horiz = FALSE)
legend("topright", legend = names(cmap_compartment), fill = cmap_compartment)

Plot the global correlation matrix:

hm_fun <- purrr::partial(pheatmap::pheatmap,
                         mat = cor_decontx[
                           rev(order.dendrogram(dend_labeled_decontx)),
                           rev(order.dendrogram(dend_labeled_decontx))],
                         main = "Correlations; order from bootstrapped tree; mouse only",
                         cluster_rows = FALSE,
                         cluster_cols = FALSE,
                         annotation_col = cluster_meta %>% dplyr::select(organ),
                         annotation_colors = list("organ" = cmap_organ),
                         # use the project's RNA-specific palette
                         color = colorRampPalette(cmap_rna)(n=100),
                         scale = "none",
                         breaks = seq(0, 1, 0.01),
                         border_color = NA,
                         cellwidth = 13,
                         cellheight = 13)

# make png/pdf
hm_fun(filename = glue("{figout}/cor_heatmap.decontx.pdf"))
hm_fun(filename = glue("{figout}/cor_heatmap.decontx.png"))

knitr::include_graphics(glue("{figout}/cor_heatmap.decontx.png"))

Let’s layer on additional information:

cluster_meta2 <- cluster_meta %>% 
  tibble::rownames_to_column("Cluster") %>% 
  # get the order from the dendrogram
  left_join(data.frame(Cluster = colnames(cor_decontx)[order.dendrogram(dend_labeled_decontx)]) %>% 
              tibble::rowid_to_column("Order"),
            by = "Cluster") %>% 
  # order by dendro order
  arrange(Order) %>% 
  mutate(Cluster_labelled = factor(Cluster_labelled, levels = unique(.$Cluster_labelled)))

write_tsv(cluster_meta2, glue("{out}/cluster_metadata_dend_order.tsv"))

Set up the plot:

# set theme
theme_d <- function()  {
  
  theme(
    axis.text.y = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = -90, size = 20, hjust = 0),
    panel.border = element_blank(),
    legend.position = "right", # changed this from bottom to right for proper grid alignment
    legend.text = element_text(angle = 0, hjust = 0, size = 20))
  
}

p1 <- cell_meta_rna %>%
  left_join(cluster_meta2 %>% dplyr::select(Cluster, Cluster_labelled), by = "Cluster") %>%
  ggplot(aes(x = Cluster_labelled)) +
  geom_bar(aes(fill = PCW), position = "fill") +
  scale_fill_manual(values = cmap_pcw) +
  theme_d() +
  coord_flip() +
  ggtitle("PCW")

p2 <- cluster_meta2 %>% 
  ggplot(aes(x = Cluster_labelled, y = median_numi)) +
  geom_col(aes(fill = median_numi)) +
  # geom_raster(aes(fill = median_numi)) +
  scale_fill_gradientn(colors = ylrd) +
  theme_d() +
  coord_flip() +
  ggtitle("median UMIs/cell") +
  ylab(NULL)

p3 <- cluster_meta2 %>% 
  ggplot(aes(x = Cluster_labelled, y = median_ngene)) +
  geom_col(aes(fill = median_ngene)) +
  # geom_raster(aes(fill = median_ngene)) +
  scale_fill_gradientn(colors = ylrd) +
  theme_d() +
  coord_flip() +
  ggtitle("median # genes/cell") +
  ylab(NULL)

p4 <- cluster_meta2 %>% 
  ggplot(aes(x = Cluster_labelled, y = median_nfrags)) +
  geom_col(aes(fill = median_nfrags)) +
  # geom_raster(aes(fill = median_nfrags)) +
  scale_fill_gradientn(colors = ylrd) +
  theme_d() +
  coord_flip() +
  ggtitle("median \n# fragments/cell") +
  ylab(NULL)

p5 <- cluster_meta2 %>% 
  ggplot(aes(x = Cluster_labelled, y = median_tss)) +
  geom_col(aes(fill = median_tss)) +
  # geom_raster(aes(fill = median_tss)) +
  scale_fill_gradientn(colors = ylrd) +
  theme_d() +
  coord_flip() +
  ggtitle("median \nTSS enrichment") +
  ylab(NULL)

p6 <- cluster_meta2 %>% 
  ggplot(aes(x = Cluster_labelled, y = 1)) +
  geom_col(aes(fill = L0_clusterName)) +
  scale_fill_manual(values = cmap_compartment) +
  theme_d() +
  coord_flip() +
  ggtitle("Compartment") +
  ylab(NULL)

p7 <- cluster_meta2 %>% 
  ggplot(aes(x = Cluster_labelled, y = 1)) +
  geom_col(aes(fill = organ)) +
  scale_fill_manual(values = cmap_organ) +
  theme_d() + theme(legend.position = "none") +
  coord_flip() +
  ggtitle("Organ") +
  ylab(NULL)

meta_withbioid <- cell_meta_rna %>%
  left_join(cluster_meta2 %>% dplyr::select(Cluster, Cluster_labelled), by = "Cluster") %>%
  left_join(sample_meta %>% dplyr::select(Sample=SampleName, BioID))

## Joining with `by = join_by(Sample)`

meta_withbioid$BioID <- as.factor(meta_withbioid$BioID)

pal <- colorRampPalette(c("lightgray", "orange", "brown"))(23)

p8 <- meta_withbioid %>% 
  ggplot(aes(x = Cluster_labelled)) +
  geom_bar(aes(fill = BioID), position = "fill") +
  scale_fill_manual(values=pal) + 
  theme_d() +
  coord_flip() +
  ggtitle("BioID")

# we have orders of magnitudes different # of cells in some clusters,
# so let's put these on a log10 scale
p0 <- cluster_meta2 %>% 
  ggplot(aes(x = Cluster_labelled, y = log10(ncell))) +
  geom_col(aes(fill = organ)) +
  scale_x_discrete(position = "top") +
  scale_fill_manual(values = cmap_organ) +
  theme(panel.border = element_blank(), legend.position = "right") +
  coord_flip() +
  ggtitle("# cells")


plot_grid(p1, p8, p2, p3, p4, p5, p6, p7, p0, nrow = 1,
          rel_widths = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2),
          align = "h", axis = "tb")

6 Visualize the top marker heatmap

# scale
avg_exp_decontx_mat_subset_scaled <- avg_exp_decontx_mat_subset %>%
  # genes are in rows; for each gene, scale expression to [0, 1]
  apply(1, scales::rescale) %>%
  t()

Loop through clusters in dendrogram order to get their top marker genes, to try and get a diagonal heatmap:

# get an order for marker genes based on the dendrogram order. First we will simply
# load the markers, as before, but without getting unique genes, and keeping
# cluster from which each marker originated
n <- 20
top_markers_df <- map2_dfr(organ_meta$organ, organ_meta$iteration, function(organ, iter) {

  # if data for that organ exists,
  if (!is.na(iter)) {

    # load the markers corresponding to the final iteration
    markers <- fread(
      file.path(here::here("output/01-preprocessing/02/"), organ, glue("rna_preprocess_output/cluster_markers/cluster_markers_SCTdecontXnorm_{iter}.tsv")),
      data.table = FALSE)

    # for each cluster, filter to only positive markers, and select the top 20 based on LFC
    markers %>%
      group_by(cluster) %>%
      filter(avg_log2FC > 0) %>%
      top_n(n, avg_log2FC) %>%
      tibble::add_column(organ = organ, .before = 1)

  }
})

head(top_markers_df)

## # A tibble: 6 × 8
## # Groups:   cluster [1]
##   organ       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
##   <chr>       <dbl>      <dbl> <dbl> <dbl>     <dbl>   <int> <chr>  
## 1 Adrenal 0               2.86 0.998 0.55  0               0 NPC1   
## 2 Adrenal 0               2.46 0.904 0.152 0               0 C1orf54
## 3 Adrenal 0               2.31 0.99  0.508 0               0 GALC   
## 4 Adrenal 0               2.26 0.989 0.271 0               0 USP36  
## 5 Adrenal 0               2.05 1     0.689 0               0 GRK5   
## 6 Adrenal 5.54e-305       1.91 1     0.664 1.40e-300       0 CDK8

# reformat df
top_markers_df <- top_markers_df %>%
  left_join(cluster_meta2, by = c("organ" = "organ", "cluster" = "L1_clusterID")) %>%
  dplyr::select(-cluster)

## Adding missing grouping variables: `cluster`

# arrange by Order, then pull out markers
markers_ordered_unique <- top_markers_df %>% arrange(Order) %>% group_by(Cluster) %>% pull(gene) %>% unique()
length(markers_ordered_unique)

## [1] 1652

all(is.numeric(avg_exp_decontx_mat_subset_scaled[markers_ordered_unique, cluster_meta2$Cluster]))

## [1] TRUE

cluster_meta2 <- as.data.frame(cluster_meta2)
rownames(cluster_meta2) <- cluster_meta2$Cluster

# make a function to plot the heatmap in dendrogram order, and with markers ordered diagonally
marker_hm_fun <- purrr::partial(
  pheatmap::pheatmap,
  mat = avg_exp_decontx_mat_subset_scaled[markers_ordered_unique, cluster_meta2$Cluster],
  border_color = NA,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  annotation_col = cluster_meta2 %>% dplyr::select(organ),
  annotation_colors = list("organ" = cmap_organ),
  # use the project's RNA-specific palette
  color = colorRampPalette(cmap_rna)(n=100),
  show_rownames = TRUE,
  cellwidth = 13,
  cellheight = 5,
  main = "Average expression of top 20 marker genes per cluster, scaled to [0,1] across clusters")

# make png/pdf
marker_hm_fun(filename=paste0(figout, "/marker_heatmap.decontx.pdf"))
# marker_hm_fun(filename = glue("{figout}/marker_heatmap.decontx.png"))
# 
# knitr::include_graphics(glue("{figout}/marker_heatmap.decontx.png"))

pheatmap::pheatmap(
  mat = avg_exp_decontx_mat_subset_scaled[markers_ordered_unique, cluster_meta2$Cluster],
  border_color = NA,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  # annotation_col = cluster_meta2 %>% dplyr::select(organ),
  # annotation_colors = list("organ" = cmap_organ),
  # use the project's RNA-specific palette
  color = colorRampPalette(cmap_rna)(n=100),
  show_rownames = TRUE,
  cellwidth = 8,
  cellheight = 4,
  main = "Average expression of top 20 marker genes per cluster, scaled to [0,1] across clusters")

### manual selection of marker genes inspect the marker genes in a few cell groups of interest

top_markers_df[grep("hepatocyte", top_markers_df$L3_clusterName),"gene"] %>% 
    table %>% as.data.frame %>% dplyr::arrange(desc(Freq))

##                  . Freq
## 1              A2M    2
## 2             AHSG    2
## 3           AKR1C1    2
## 4               C3    2
## 5               C5    2
## 6           CYP3A5    2
## 7              FGA    2
## 8              FGB    2
## 9              FGG    2
## 10           ITIH1    2
## 11        MIR122HG    2
## 12          PTP4A1    2
## 13          SLC2A2    2
## 14        SLC39A14    2
## 15 SLCO1B3-SLCO1B7    2
## 16             AFP    1
## 17            AGMO    1
## 18          CASC19    1
## 19           CPT1A    1
## 20             CTH    1
## 21           EFNA1    1
## 22 ENSG00000228697    1
## 23 ENSG00000230490    1
## 24 ENSG00000230647    1
## 25 ENSG00000231424    1
## 26 ENSG00000284418    1
## 27 ENSG00000289368    1
## 28             FN1    1
## 29          GAREM1    1
## 30            GAS2    1
## 31           GLUD1    1
## 32            GPC3    1
## 33             HAL    1
## 34          HMGCS1    1
## 35       HNF1A-AS1    1
## 36        HSP90AA1    1
## 37           HSPD1    1
## 38           HSPH1    1
## 39           ITIH2    1
## 40            LDLR    1
## 41       LINC00470    1
## 42       LINC00907    1
## 43       LINC01146    1
## 44       LINC02027    1
## 45       LINC02532    1
## 46            MEG9    1
## 47          NLGN4Y    1
## 48       NPSR1-AS1    1
## 49            PCK1    1
## 50         PIK3C2G    1
## 51             POR    1
## 52        PPARGC1A    1
## 53           RAPH1    1
## 54            SIK2    1
## 55        SLC22A25    1
## 56         SLC22A9    1
## 57          SLC2A9    1
## 58          SLC7A2    1
## 59           SMOC1    1
## 60      SNAP25-AS1    1
## 61          SNHG14    1
## 62            SOX5    1
## 63            TLE1    1
## 64          TTC39C    1
## 65          ZNF331    1

top_markers_df[grep("endothelial", top_markers_df$L3_clusterName),"gene"] %>% 
    table %>% as.data.frame %>% dplyr::arrange(desc(Freq))

##                   . Freq
## 1              FLT1    7
## 2             PTPRB    7
## 3             MECOM    6
## 4            ADGRL4    5
## 5            CALCRL    5
## 6               KDR    5
## 7              LDB2    5
## 8            PECAM1    5
## 9              ANO2    4
## 10         ARHGAP29    4
## 11            DACH1    4
## 12             EMCN    4
## 13             ETS1    4
## 14          GALNT18    4
## 15          RAPGEF5    4
## 16           SHANK3    4
## 17           ATP11A    3
## 18           CADPS2    3
## 19              CPM    3
## 20           DIPK2B    3
## 21             FGD4    3
## 22             FLI1    3
## 23            NALF1    3
## 24              A2M    2
## 25          ADAMTS9    2
## 26         ARHGAP18    2
## 27            ARL15    2
## 28            CHRM3    2
## 29           COL4A1    2
## 30            DAPK1    2
## 31            DOCK4    2
## 32  ENSG00000289873    2
## 33            EPAS1    2
## 34           FENDRR    2
## 35            HSPG2    2
## 36          KHDRBS2    2
## 37            MAGI1    2
## 38            NEDD9    2
## 39            PREX2    2
## 40            PTPRM    2
## 41             RGS6    2
## 42           SH3RF3    2
## 43             SOX5    2
## 44            STOX2    2
## 45              TEK    2
## 46             TTC6    2
## 47              VWF    2
## 48            ABCB1    1
## 49         ADAMTSL1    1
## 50           ADGRB3    1
## 51           AKAP12    1
## 52            BMPER    1
## 53            BTNL9    1
## 54          CABLES1    1
## 55            CCBE1    1
## 56           COBLL1    1
## 57          COL15A1    1
## 58          COL27A1    1
## 59       CSGALNACT1    1
## 60           CTNNA3    1
## 61             DLC1    1
## 62            DOCK9    1
## 63             EBF1    1
## 64            ELMO1    1
## 65  ENSG00000226149    1
## 66  ENSG00000226197    1
## 67  ENSG00000234810    1
## 68  ENSG00000251600    1
## 69  ENSG00000287042    1
## 70              ERG    1
## 71              EYS    1
## 72          FAM184A    1
## 73             FAT3    1
## 74            FBXL7    1
## 75            FGFR2    1
## 76            FLRT2    1
## 77              FN1    1
## 78            FOXP2    1
## 79            FREM2    1
## 80           FRMD4B    1
## 81             GPC5    1
## 82           IGFBP5    1
## 83           IGFBP7    1
## 84            ITGA1    1
## 85            ITGA8    1
## 86            KALRN    1
## 87           KCNIP4    1
## 88             LEF1    1
## 89        LINC01331    1
## 90        LINC02147    1
## 91            LRMDA    1
## 92            MCTP1    1
## 93            MEIS1    1
## 94      MIR4435-2HG    1
## 95             MYLK    1
## 96            MYO1B    1
## 97             NRG3    1
## 98             OIT3    1
## 99            PDE3B    1
## 100          PIEZO2    1
## 101         PIK3C2G    1
## 102         PLEKHG1    1
## 103         PLEKHH2    1
## 104           PRKCH    1
## 105           RBPMS    1
## 106          RNF220    1
## 107          SEL1L3    1
## 108          SEMA5A    1
## 109         SEPTIN9    1
## 110          SLC7A1    1
## 111           SNED1    1
## 112         ST6GAL1    1
## 113      ST6GALNAC3    1
## 114         ST8SIA6    1
## 115           STAB1    1
## 116           STAB2    1
## 117            TFRC    1
## 118           TIAM1    1
## 119             TTN    1
## 120           UNC5C    1
## 121            UTRN    1

top_markers_df[grep("CM", top_markers_df$L3_clusterName),"gene"] %>% 
    table %>% as.data.frame %>% dplyr::arrange(desc(Freq))

##                   . Freq
## 1              DAB1    2
## 2          TRDN-AS1    2
## 3             ACTC1    1
## 4             ACTN2    1
## 5           ADAMTS6    1
## 6             AGBL4    1
## 7            ANGPT1    1
## 8            ANKRD1    1
## 9              ANLN    1
## 10           APOLD1    1
## 11            ATAD2    1
## 12          ATP5F1B    1
## 13         B4GALNT3    1
## 14            BARD1    1
## 15           BRINP3    1
## 16            BRIP1    1
## 17           CACNB2    1
## 18           CAMK1D    1
## 19       CCDC54-AS1    1
## 20            CDIN1    1
## 21            CENPK    1
## 22            CENPP    1
## 23           CEP128    1
## 24              CKM    1
## 25          COL11A1    1
## 26            CRYAB    1
## 27            CSMD1    1
## 28           CYP2J2    1
## 29              DES    1
## 30           DIAPH3    1
## 31             DPF3    1
## 32              DSP    1
## 33              DTL    1
## 34           EEF1A1    1
## 35             EEF2    1
## 36  ENSG00000253369    1
## 37  ENSG00000257252    1
## 38  ENSG00000285961    1
## 39  ENSG00000286619    1
## 40  ENSG00000287999    1
## 41  ENSG00000288659    1
## 42  ENSG00000289309    1
## 43            EPHA4    1
## 44             EZH2    1
## 45            FANCA    1
## 46            FANCI    1
## 47             FHL2    1
## 48             FLNC    1
## 49            FRMD5    1
## 50            GAPDH    1
## 51            GCNT2    1
## 52            HELLS    1
## 53         HSP90AA1    1
## 54         HSP90AB1    1
## 55            HSPA8    1
## 56            HSPB7    1
## 57            KCNH7    1
## 58            KCNJ3    1
## 59            KCNQ5    1
## 60        KIDINS220    1
## 61           KIF26B    1
## 62            KNTC1    1
## 63          L3MBTL4    1
## 64             LDB3    1
## 65        LINC00923    1
## 66        LINC01505    1
## 67        LINC02208    1
## 68        LINC02899    1
## 69            LRRC2    1
## 70           LRRTM3    1
## 71         MARCHF11    1
## 72           MGAT4C    1
## 73         MIR924HG    1
## 74           MMS22L    1
## 75           MYBPC3    1
## 76             MYH6    1
## 77             MYL2    1
## 78             MYL3    1
## 79             MYL7    1
## 80           MYO18B    1
## 81             NAV1    1
## 82              NES    1
## 83             NEXN    1
## 84            NLGN1    1
## 85             NPPA    1
## 86              NTM    1
## 87            NUDT4    1
## 88            PACRG    1
## 89              PAM    1
## 90            PCDH7    1
## 91          PHACTR1    1
## 92            PLCL2    1
## 93            RBM20    1
## 94             RELN    1
## 95             RFC3    1
## 96             ROR1    1
## 97             RRM2    1
## 98             RYR3    1
## 99            SFRP1    1
## 100         SIPA1L1    1
## 101     SLC16A1-AS1    1
## 102          SLC1A3    1
## 103         SLC25A4    1
## 104         SLC27A6    1
## 105           SLIT3    1
## 106           STK39    1
## 107            TBX5    1
## 108          TENT5A    1
## 109           THSD4    1
## 110          TMEM71    1
## 111           TNNC1    1
## 112           TRPM3    1
## 113          TTTY10    1
## 114          TTTY14    1
## 115             UTY    1
## 116           XIRP1    1
## 117           XIRP2    1
## 118         ZNF385B    1

top_markers_df[grep("neu", top_markers_df$L3_clusterName),"gene"] %>% 
    table %>% as.data.frame %>% dplyr::arrange(desc(Freq))

##                   . Freq
## 1             RALYL    4
## 2              CUX2    3
## 3              DPYD    3
## 4          IL1RAPL2    3
## 5            KCNIP4    3
## 6             KCNQ5    3
## 7            LRRC4C    3
## 8            SORCS1    3
## 9             TRPM3    3
## 10           ZNF536    3
## 11         ADAMTSL3    2
## 12            CDH18    2
## 13             CDH4    2
## 14            CHRM3    2
## 15             CLMP    2
## 16             CNR1    2
## 17            CNTN4    2
## 18          CNTNAP2    2
## 19           DLGAP2    2
## 20           DNAH10    2
## 21             DOK6    2
## 22          DPY19L1    2
## 23            ERBB4    2
## 24            FGF13    2
## 25          GALNTL6    2
## 26             GAS7    2
## 27          GRAMD1B    2
## 28            GRIK2    2
## 29            GRIK3    2
## 30             GRM7    2
## 31           HS3ST4    2
## 32           KLHL29    2
## 33           LHFPL3    2
## 34            LUZP2    2
## 35            MDGA2    2
## 36            MEIS2    2
## 37            NCAM2    2
## 38            NPAS3    2
## 39             NRG1    2
## 40             NRP1    2
## 41            NRXN1    2
## 42            NRXN3    2
## 43           PRSS12    2
## 44            PTPRT    2
## 45             RGS6    2
## 46           SEMA3C    2
## 47           SORBS2    2
## 48          SOX2-OT    2
## 49            TAFA2    2
## 50             TLE4    2
## 51            UNC5D    2
## 52          ZNF804B    2
## 53            ACSL1    1
## 54            ACTN2    1
## 55          ADAMTS3    1
## 56           ADARB2    1
## 57            ADCY1    1
## 58           ADGRB3    1
## 59           ADGRL2    1
## 60              ALK    1
## 61             ANK2    1
## 62             ANK3    1
## 63            ASTN2    1
## 64           ATP8A2    1
## 65           ATRNL1    1
## 66             BCHE    1
## 67           BRINP2    1
## 68           BRINP3    1
## 69          C8orf34    1
## 70          CACNA1A    1
## 71            CCBE1    1
## 72            CDH12    1
## 73            CDH13    1
## 74            CDH19    1
## 75             CDH8    1
## 76           CHRNA7    1
## 77           CLSTN2    1
## 78            CNTN3    1
## 79            CNTN5    1
## 80          CNTNAP3    1
## 81          CNTNAP5    1
## 82          COL19A1    1
## 83          COL25A1    1
## 84           CTNNA2    1
## 85              DCC    1
## 86             DGKB    1
## 87            DISP3    1
## 88         DLX6-AS1    1
## 89             DNER    1
## 90             DPP6    1
## 91            DSCAM    1
## 92             DTNA    1
## 93          ELAPOR1    1
## 94            ELFN1    1
## 95            ELMO1    1
## 96             EML5    1
## 97             EML6    1
## 98  ENSG00000253693    1
## 99  ENSG00000271860    1
## 100 ENSG00000285744    1
## 101 ENSG00000287092    1
## 102        EPB41L4A    1
## 103         FAM135B    1
## 104           FGF12    1
## 105          FRMPD4    1
## 106          GABRG3    1
## 107            GAD1    1
## 108           GADL1    1
## 109           GLRA2    1
## 110           GREM2    1
## 111           GRIA2    1
## 112           GRIA4    1
## 113           GRID2    1
## 114            GRM3    1
## 115             GRP    1
## 116         GUCY1A2    1
## 117          HS3ST5    1
## 118          HS6ST2    1
## 119          HS6ST3    1
## 120          IGSF21    1
## 121        IL1RAPL1    1
## 122             IL7    1
## 123           ITPR1    1
## 124           KCNB2    1
## 125           KCNC2    1
## 126           KCNH5    1
## 127           KCNJ3    1
## 128           KCNK2    1
## 129          KCNMA1    1
## 130           KCNN3    1
## 131           KCNQ3    1
## 132        KIAA1217    1
## 133          KIF26B    1
## 134         KIRREL3    1
## 135           KITLG    1
## 136           KLHL1    1
## 137          KLHL32    1
## 138           LAMA2    1
## 139          LHFPL6    1
## 140       LINC01091    1
## 141       LINC01102    1
## 142       LINC01322    1
## 143       LINC01435    1
## 144       LINC01606    1
## 145       LINC02254    1
## 146       LINC03000    1
## 147          LINGO2    1
## 148            LMO3    1
## 149           LRFN2    1
## 150           LRP1B    1
## 151            LRP8    1
## 152          LRRTM4    1
## 153          MAP2K1    1
## 154        MIR137HG    1
## 155       MIR4500HG    1
## 156           MTUS2    1
## 157            MUSK    1
## 158           MYO5B    1
## 159           MYRIP    1
## 160           MYT1L    1
## 161           NEGR1    1
## 162          NKAIN2    1
## 163          NKAIN3    1
## 164          NLGN4X    1
## 165            NOL4    1
## 166            NPNT    1
## 167       NR2F2-AS1    1
## 168           NR3C2    1
## 169            NRP2    1
## 170            NWD2    1
## 171           NXPH1    1
## 172           NXPH2    1
## 173            OCA2    1
## 174           OPCML    1
## 175           PALMD    1
## 176             PAM    1
## 177          PANTR1    1
## 178           PCBP3    1
## 179         PCDH11Y    1
## 180           PCSK2    1
## 181           PCSK5    1
## 182           PDZD2    1
## 183          PDZRN3    1
## 184          PDZRN4    1
## 185           PEX5L    1
## 186          PHLDB2    1
## 187      PHOX2B-AS1    1
## 188         PIP5K1B    1
## 189           PLCE1    1
## 190            PLS3    1
## 191          PLXNA2    1
## 192          POU6F2    1
## 193         PPP2R2C    1
## 194           PREX1    1
## 195           PRKCA    1
## 196          PTCHD4    1
## 197          PTPRN2    1
## 198          PTPRZ1    1
## 199          R3HDM1    1
## 200        RASGEF1C    1
## 201          RBFOX3    1
## 202            RGS7    1
## 203           RIMS2    1
## 204          RIPOR2    1
## 205            RYR3    1
## 206           SCN5A    1
## 207            SDK1    1
## 208          SEMA3E    1
## 209            SGCZ    1
## 210             SLA    1
## 211          SLAIN1    1
## 212        SLC22A23    1
## 213         SLC35F1    1
## 214         SLC35F3    1
## 215         SLC44A5    1
## 216          SLC8A3    1
## 217           SNTG2    1
## 218          SORCS3    1
## 219         ST3GAL6    1
## 220         ST8SIA5    1
## 221          STK32B    1
## 222            SYBU    1
## 223            SYT1    1
## 224           TENM3    1
## 225            THRB    1
## 226          TMEFF2    1
## 227         TMEM108    1
## 228           TRPC4    1
## 229           TRPC5    1
## 230           TRPS1    1
## 231           TSHZ3    1
## 232           UNC5C    1
## 233            XKR4    1
## 234          ZBTB16    1
## 235           ZFPM2    1
## 236         ZNF385B    1
## 237         ZNF804A    1

top_markers_df[grep("TEC", top_markers_df$L3_clusterName),"gene"] %>% 
    table %>% as.data.frame %>% dplyr::arrange(desc(Freq))

##                   . Freq
## 1               NEB    3
## 2            ADGRL3    2
## 3             FGF13    2
## 4              RYR3    2
## 5              TRDN    2
## 6             ACTA1    1
## 7             ACTN2    1
## 8            ADARB2    1
## 9             AGBL1    1
## 10            AHNAK    1
## 11             ANK2    1
## 12             ANK3    1
## 13           ATP10B    1
## 14             BCHE    1
## 15             BNC2    1
## 16               C3    1
## 17              CA3    1
## 18            CADM2    1
## 19            CALCR    1
## 20           CCDC80    1
## 21            CDIN1    1
## 22             CDON    1
## 23            CHRM3    1
## 24            CLCN5    1
## 25            CMYA5    1
## 26            CNTN4    1
## 27          COL15A1    1
## 28          COL19A1    1
## 29          COL25A1    1
## 30            CRIM1    1
## 31           CTNNA3    1
## 32       CYP1B1-AS1    1
## 33              DCC    1
## 34             DLG2    1
## 35              DMD    1
## 36             DPP6    1
## 37  ENSG00000226043    1
## 38  ENSG00000268981    1
## 39  ENSG00000289084    1
## 40  ENSG00000289950    1
## 41             EYA2    1
## 42          FAM163A    1
## 43          FILIP1L    1
## 44            FOLH1    1
## 45           FRMD4A    1
## 46            GADL1    1
## 47             GPC5    1
## 48            GREM2    1
## 49              H19    1
## 50           HS6ST2    1
## 51             IGF2    1
## 52           KCNIP4    1
## 53         KCNQ1OT1    1
## 54            KCTD8    1
## 55            LAMA2    1
## 56             LDB3    1
## 57        LINC01091    1
## 58        LINC02955    1
## 59             LRP2    1
## 60           MAP2K1    1
## 61          MARCHF1    1
## 62            MCF2L    1
## 63            MEF2C    1
## 64             MEG3    1
## 65             MEG8    1
## 66             MEG9    1
## 67           MEGF10    1
## 68           MGAT4C    1
## 69       MIR133A1HG    1
## 70         MIR493HG    1
## 71             MLIP    1
## 72             MUSK    1
## 73           MYBPC1    1
## 74             MYH2    1
## 75             MYH3    1
## 76             MYH7    1
## 77            MYH7B    1
## 78             MYH8    1
## 79             MYL1    1
## 80             MYL2    1
## 81            MYOM2    1
## 82            NCAM1    1
## 83            NEK10    1
## 84             NFIB    1
## 85           NKAIN2    1
## 86             NPNT    1
## 87            NRXN3    1
## 88             PAX7    1
## 89            PCDH7    1
## 90           PHLDB2    1
## 91          PKHD1L1    1
## 92            PLCE1    1
## 93             PRG4    1
## 94           PRSS16    1
## 95            PTPRQ    1
## 96            PTPRT    1
## 97           RBFOX1    1
## 98            ROBO2    1
## 99             ROR1    1
## 100            RYR1    1
## 101            RYR2    1
## 102      SAMD4A-AS1    1
## 103           SCN5A    1
## 104          SEMA3C    1
## 105        SLC22A23    1
## 106         SLC2A14    1
## 107          SLC8A3    1
## 108            SLF2    1
## 109           SLIT3    1
## 110            SOX6    1
## 111         SPATS2L    1
## 112         STARD13    1
## 113           SULF1    1
## 114           TBATA    1
## 115           TECRL    1
## 116           THSD4    1
## 117            TLN2    1
## 118         TMEM108    1
## 119           TNNC2    1
## 120           TNNI1    1
## 121           TNNT1    1
## 122           TNNT3    1
## 123            TPM1    1
## 124            TPM2    1
## 125            TPM3    1
## 126             TTN    1
## 127            VCAN    1
## 128           WDPCP    1
## 129            WNK2    1
## 130           XIRP2    1
## 131            XKR4    1
## 132            ZEB2    1
## 133         ZNF385B    1
## 134         ZNF385D    1

Manually add top marker genes for cell groups of interest to markergenes_dendro.R

Then run the following plotting blocks

source(here::here("output/01-preprocessing/02/shared/markergenes/markergenes_dendro.R"))

# subset each gene expr mat to the collection of markers, to save time in the merge
avg_exp_decontx_subset_list2 <- map(avg_exp_decontx_list, ~ .x[rownames(.x) %in% featureSets, ])
map(avg_exp_decontx_subset_list2, dim)

## [[1]]
## [1] 34 10
## 
## [[2]]
## [1] 36 18
## 
## [[3]]
## [1] 36 22
## 
## [[4]]
## [1] 36 16
## 
## [[5]]
## [1] 36 14
## 
## [[6]]
## [1] 36 18
## 
## [[7]]
## [1] 36 22
## 
## [[8]]
## [1] 36 19
## 
## [[9]]
## [1] 36 12
## 
## [[10]]
## [1] 36 22
## 
## [[11]]
## [1] 36 18
## 
## [[12]]
## [1] 35 12

avg_exp_decontx_mat_subset2 <- Reduce(merge2, avg_exp_decontx_subset_list2)
dim(avg_exp_decontx_mat_subset2)

## [1]  36 203

any(is.na(avg_exp_decontx_mat_subset2))

## [1] TRUE

avg_exp_decontx_mat_subset2[is.na(avg_exp_decontx_mat_subset2)] <- 0 # set any NA to zero
any(is.na(avg_exp_decontx_mat_subset2))

## [1] FALSE

# scale expr matrix
avg_exp_decontx_mat_subset_scaled2 <- avg_exp_decontx_mat_subset2 %>%
  # genes are in rows; for each gene, scale expression to [0, 1]
  apply(1, scales::rescale) %>%
  t()

markers <- featureSets[featureSets %in% rownames(avg_exp_decontx_mat_subset_scaled2)]

# make a function to plot the heatmap in dendrogram order, and with markers ordered diagonally
marker_hm_fun <- purrr::partial(
  pheatmap::pheatmap,
  mat = avg_exp_decontx_mat_subset_scaled2[markers, cluster_meta2$Cluster],
  border_color = NA,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  annotation_col = cluster_meta2 %>% dplyr::select(organ),
  annotation_colors = list("organ" = cmap_organ),
  # use the project's RNA-specific palette
  color = colorRampPalette(cmap_rna)(n=100),
  show_rownames = TRUE,
  cellwidth = 13,
  cellheight = 5,
  main = "Average expression of top marker genes per cluster, scaled to [0,1] across clusters")

# make png/pdf
# marker_hm_fun()
marker_hm_fun(filename=paste0(figout, "/marker_heatmap_manual.pdf"))

flat_data <- melt(avg_exp_decontx_mat_subset_scaled2[markers, cluster_meta2$Cluster], 
                  varnames = c("Markers", "Cluster"), 
                  value.name = "Expression")

ggplot(flat_data, aes(x=Cluster, y=Markers, color=Expression, size=Expression)) + 
  geom_point(stroke=0) + theme(axis.text.x = element_text(angle=90, hjust=1)) + 
  scale_size(range = c(0, 5)) +  # Adjust dot size range + 
  scale_y_discrete(limits = rev(unique(flat_data$Markers))) +  # Reverse marker order
  #scale_color_gradientn(colors = cmap_rna) +
  labs(x = "Cluster", y = "Markers", size = "Percentage (%)", color = "Expression") +
  scale_color_gradient(low = "white", high = "red")

# pdf(paste0(figout, "/manual_marker_dotplot.pdf"), width=30, height=5)
# print(p)
# dev.off()
# 
# png(paste0(figout, "/manual_marker_dotplot.png"), width=30, height=5, units ="in", res=300)
# print(p)
# dev.off()

7 Session info

sessionInfo()

## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.3.10/lib/libopenblas_haswellp-r0.3.10.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggthemes_4.2.4              RColorBrewer_1.1-3         
##  [3] DoubletFinder_2.0.3         BPCells_0.2.0              
##  [5] RcppAlgos_2.7.2             ggrastr_1.0.1              
##  [7] GenomicFeatures_1.46.5      AnnotationDbi_1.56.2       
##  [9] rhdf5_2.38.1                SummarizedExperiment_1.24.0
## [11] Biobase_2.54.0              MatrixGenerics_1.6.0       
## [13] Rcpp_1.0.10                 Matrix_1.5-4               
## [15] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
## [17] IRanges_2.28.0              S4Vectors_0.32.4           
## [19] BiocGenerics_0.40.0         matrixStats_0.63.0         
## [21] plyr_1.8.8                  magrittr_2.0.3             
## [23] gtable_0.3.3                gtools_3.9.4               
## [25] gridExtra_2.3               ArchR_1.0.2                
## [27] dendextend_1.17.1           pvclust_2.2-0              
## [29] data.table_1.14.8           cowplot_1.1.1              
## [31] SeuratObject_4.1.3          Seurat_4.3.0               
## [33] stringr_1.5.0               tibble_3.2.1               
## [35] readr_2.1.4                 glue_1.6.2                 
## [37] purrr_1.0.2                 ggplot2_3.5.0              
## [39] tidyr_1.3.1                 dplyr_1.1.4                
## [41] here_1.0.1                 
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3               spatstat.explore_3.1-0   reticulate_1.28         
##   [4] tidyselect_1.2.0         RSQLite_2.3.1            htmlwidgets_1.6.2       
##   [7] gmp_0.7-1                BiocParallel_1.28.3      Rtsne_0.16              
##  [10] munsell_0.5.0            codetools_0.2-18         ica_1.0-3               
##  [13] future_1.33.2            miniUI_0.1.1.1           withr_3.0.0             
##  [16] spatstat.random_3.1-4    colorspace_2.1-0         progressr_0.13.0        
##  [19] filelock_1.0.2           highr_0.10               knitr_1.42              
##  [22] ROCR_1.0-11              tensor_1.5               listenv_0.9.0           
##  [25] labeling_0.4.2           GenomeInfoDbData_1.2.7   polyclip_1.10-4         
##  [28] farver_2.1.1             pheatmap_1.0.12          bit64_4.0.5             
##  [31] rprojroot_2.0.3          parallelly_1.35.0        vctrs_0.6.5             
##  [34] generics_0.1.3           xfun_0.39                BiocFileCache_2.2.1     
##  [37] R6_2.5.1                 ggbeeswarm_0.7.2         bitops_1.0-7            
##  [40] rhdf5filters_1.6.0       spatstat.utils_3.0-2     cachem_1.0.8            
##  [43] DelayedArray_0.20.0      vroom_1.6.3              promises_1.2.0.1        
##  [46] BiocIO_1.4.0             scales_1.3.0             beeswarm_0.4.0          
##  [49] globals_0.16.2           goftest_1.2-3            rlang_1.1.3             
##  [52] splines_4.1.2            rtracklayer_1.54.0       lazyeval_0.2.2          
##  [55] spatstat.geom_3.1-0      yaml_2.3.7               reshape2_1.4.4          
##  [58] abind_1.4-5              httpuv_1.6.10            tools_4.1.2             
##  [61] ellipsis_0.3.2           jquerylib_0.1.4          ggridges_0.5.4          
##  [64] progress_1.2.2           zlibbioc_1.40.0          RCurl_1.98-1.12         
##  [67] prettyunits_1.1.1        deldir_1.0-6             pbapply_1.7-0           
##  [70] viridis_0.6.3            zoo_1.8-12               ggrepel_0.9.5           
##  [73] cluster_2.1.2            scattermore_1.0          lmtest_0.9-40           
##  [76] RANN_2.6.1               fitdistrplus_1.1-11      hms_1.1.3               
##  [79] patchwork_1.2.0          mime_0.12                evaluate_0.21           
##  [82] xtable_1.8-4             XML_3.99-0.14            compiler_4.1.2          
##  [85] biomaRt_2.50.3           KernSmooth_2.23-20       crayon_1.5.2            
##  [88] htmltools_0.5.5          later_1.3.1              tzdb_0.4.0              
##  [91] DBI_1.1.3                dbplyr_2.3.2             MASS_7.3-54             
##  [94] rappdirs_0.3.3           cli_3.6.2                parallel_4.1.2          
##  [97] igraph_1.4.2             pkgconfig_2.0.3          GenomicAlignments_1.30.0
## [100] sp_1.6-0                 plotly_4.10.1            spatstat.sparse_3.0-1   
## [103] xml2_1.3.4               vipor_0.4.5              bslib_0.4.2             
## [106] XVector_0.34.0           digest_0.6.31            sctransform_0.3.5       
## [109] RcppAnnoy_0.0.20         spatstat.data_3.0-1      Biostrings_2.62.0       
## [112] rmarkdown_2.21           leiden_0.4.2             uwot_0.1.14             
## [115] restfulr_0.0.15          curl_5.0.0               Rsamtools_2.10.0        
## [118] shiny_1.7.4              rjson_0.2.21             lifecycle_1.0.3         
## [121] nlme_3.1-153             jsonlite_1.8.4           Rhdf5lib_1.16.0         
## [124] viridisLite_0.4.2        fansi_1.0.4              pillar_1.9.0            
## [127] lattice_0.20-45          KEGGREST_1.34.0          fastmap_1.1.1           
## [130] httr_1.4.6               survival_3.2-13          png_0.1-8               
## [133] bit_4.0.5                stringi_1.7.12           sass_0.4.6              
## [136] blob_1.2.4               memoise_2.0.1            irlba_2.3.5.1           
## [139] future.apply_1.10.0

Dendrogram for similarity across cell types

12 September, 2025