# **MODIFY THIS CHUNK**
project_id  <- "HDMA-public" # determines the name of the cache folder
doc_id      <- "04-enhancers/09" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out         <- here::here("output/", doc_id); dir.create(out, recursive = TRUE)
figout      <- here::here("figures/", doc_id, "/")
cache       <- paste0("~/scratch/cache/", project_id, "/", doc_id, "/")
script_path <- here::here("code/utils/")

1 Overview

We received the VISTA-validated enhancer set from collaborators. Here we explore the data to guide our handling of the dataset for training/testing validation.

2 Set up

# libraries
library(here)
library(dplyr)
library(tidyr)
library(ggplot2)
library(purrr)
library(glue)
library(readr)
library(tibble)
library(cowplot)
library(GenomicRanges)
library(rtracklayer)
library(ArchR)
library(ComplexHeatmap)
library(BSgenome.Hsapiens.UCSC.hg38)
library(Seurat)
library(BPCells)
library(ggseqlogo)

# shared project scripts/functions
source(file.path(script_path, "plotting_config.R"))
source(file.path(script_path, "hdma_palettes.R"))
source(file.path(script_path, "sj_scRNAseq_helpers.R"))
source(file.path(script_path, "trackplots.R"))
source(file.path(script_path, "track_helpers_BL.R"))
source(file.path(script_path, "track_helpers_SJ.R"))


ggplot2::theme_set(theme_BOR())

3 Load data

vista <- read_tsv(here::here("data/external/2024-01-24_vista_@mkosicki/2024-01-24_vista.clean.tsv"))

## Rows: 3389 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (12): vista_id, backbone, stage, tissue, tissue_positive, assembly, coor...
## dbl  (1): denominator
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#tibble::glimpse(vista)

unique(vista$assembly)

## [1] "hg38" "mm10"

sum(is.na(vista$vista_id))

## [1] 0

length(unique(vista$vista_id))

## [1] 3272

length(unique(vista$coord))

## [1] 3272

3.1 Filter for only entries that have a hg38 coordinate

These also contain enhancers whose vista_id start with “mm” and use a “mm10” assembly, but mapped to an hg38 ortholog

vista <- vista[!is.na(vista$coordinate_hg38),]
vista

## # A tibble: 3,266 × 13
##    vista_id backbone stage denominator tissue   tissue_positive assembly coord  
##    <chr>    <chr>    <chr>       <dbl> <chr>    <chr>           <chr>    <chr>  
##  1 hs3      hZR      e11.5           4 neg      <NA>            hg38     chr16:…
##  2 hs4      hZR      e11.5          10 hb;mb;nt 10;10;6         hg38     chr16:…
##  3 hs9      hZR      e11.5          16 neg      <NA>            hg38     chr16:…
##  4 hs10     hZR      e11.5           7 neg      <NA>            hg38     chr16:…
##  5 hs13     hZR      e11.5          16 neg      <NA>            hg38     chr16:…
##  6 hs16     hZR      e11.5           7 cn;other 6;6             hg38     chr16:…
##  7 hs17     hZR      e11.5           7 neg      <NA>            hg38     chr16:…
##  8 hs20     hZR      e11.5          18 hb;mb    4;8             hg38     chr16:…
##  9 hs62     hZR      e11.5           8 neg      <NA>            hg38     chr16:…
## 10 hs63     hZR      e11.5          20 neg      <NA>            hg38     chr16:…
## # ℹ 3,256 more rows
## # ℹ 5 more variables: strand <chr>, coordinate_hg38 <chr>,
## #   coordinate_mm10 <chr>, seq <chr>, external_note <chr>

3.2 Distribution of vista enhancer widths

med_width <- median(width(GRanges(vista$coordinate_hg38)))
ggpubr::gghistogram(width(GRanges(vista$coordinate_hg38))) + xlab("vista enhancer width in bp") + geom_vline(xintercept=med_width, linetype="dashed", color="red") + 
  annotate("text", x=med_width, hjust=-0.1, y=800, label=paste0("median: ", med_width, "bp"), color="red")

4 Load global peaks

also tried increasing minoverlap to 200bp for findOverlaps and still get around 85% of vista enhancers in global peakset

global_peaks <- readRDS(here::here("output/04-enhancers/01/peaks_all_df.rds"))
overlap <- findOverlaps(GRanges(global_peaks), GRanges(vista$coordinate_hg38) %>% unique) 

sprintf("From the vista enhancer dataset (hg38 only), %d of %d (%.2f%%) enhancers overlap with the hdma global peakset", overlap@to %>% unique %>% length, vista$vista_id %>% unique %>% length, overlap@to %>% unique %>% length/(vista$vista_id %>% unique %>% length) *100)

## [1] "From the vista enhancer dataset (hg38 only), 2731 of 3149 (86.73%) enhancers overlap with the hdma global peakset"

sprintf("From the hdma global peakset, %d of %d (%.2f%%) peaks overlap with vista enhancers", overlap@from %>% unique %>% length, dim(global_peaks)[1], overlap@from %>% unique %>% length/dim(global_peaks)[1]*100)

## [1] "From the hdma global peakset, 7664 of 1032273 (0.74%) peaks overlap with vista enhancers"

plotdf <- data.frame(vista_idx=seq_along(GRanges(vista$coordinate_hg38) %>% unique), n_hdma_peak_overlap=0)
tmp <- overlap@to %>% table %>% as.data.frame(stringsAsFactors=F) %>% dplyr::rename(n_hdma_peak_overlap="Freq", vista_idx=".")
plotdf[tmp$vista_idx, "n_hdma_peak_overlap"] <- tmp$n_hdma_peak_overlap

med <- median(tmp$n_hdma_peak_overlap)
ggplot(plotdf, aes(x=n_hdma_peak_overlap)) + geom_bar() + theme_BOR() + geom_vline(xintercept = med, linetype="dashed", color="red") + 
  annotate("text", x=med, y=600, hjust=-0.5, label=paste0("median: ", med), color="red") + ylab("n_vista_enhancers")

vista_long <- read.csv(here::here("output/04-enhancers/08/vista_long_with_atlas_tissues.tsv"), sep="\t")
vista_long$vista_id %>% unique %>% length

## [1] 3272

pos <- vista_long[vista_long$HDMA_tissue %in% c("Brain", "Eye", "Heart", "Skin", "Liver"),]
pos_coord <- vista[vista$vista_id %in% pos$vista_id, ]$coordinate_hg38 %>% GRanges

overlap <- findOverlaps(GRanges(global_peaks), pos_coord %>% unique) 

sprintf("From the vista enhancer dataset (hg38 only) positive for Brain/Eye/Heart/Skin/Liver, %d of %d (%.2f%%) enhancers overlap with the hdma global peakset", overlap@to %>% unique %>% length, pos$vista_id %>% unique %>% length, overlap@to %>% unique %>% length/(pos$vista_id %>% unique %>% length) *100)

## [1] "From the vista enhancer dataset (hg38 only) positive for Brain/Eye/Heart/Skin/Liver, 797 of 865 (92.14%) enhancers overlap with the hdma global peakset"

sprintf("From the hdma global peakset, %d of %d (%.2f%%) peaks overlap with vista enhancers", overlap@from %>% unique %>% length, dim(global_peaks)[1], overlap@from %>% unique %>% length/dim(global_peaks)[1]*100)

## [1] "From the hdma global peakset, 2551 of 1032273 (0.25%) peaks overlap with vista enhancers"

sprintf("%d of %d vista enhancers are positive for Brain/Eye/Heart/Skin/Liver", pos_coord %>% unique %>% length, vista_long$vista_id %>% unique %>% length)

## [1] "851 of 3272 vista enhancers are positive for Brain/Eye/Heart/Skin/Liver"

4.1 Peak type

library(ArchR)
library(BSgenome.Hsapiens.UCSC.hg38)
addArchRGenome("hg38")

## Setting default genome to Hg38.

geneAnnot <- getArchRGenome(geneAnnotation = T, genomeAnnotation = F) %>% as.list

## Using GeneAnnotation set by addArchRGenome(Hg38)!

peaks <- GRanges(vista$coordinate_hg38) %>% unique
peaks_anno <- ArchR:::.fastAnnoPeaks(peaks, BSgenome = BSgenome.Hsapiens.UCSC.hg38, geneAnnotation = geneAnnot)

## Annotating Peaks : Nearest Gene

## Annotating Peaks : Gene

## Annotating Peaks : TSS

## Annotating Peaks : GC

ggplot(peaks_anno %>% as.data.frame, aes(x=1, fill=peakType)) + geom_bar() + theme_BOR()

ggplot(peaks_anno %>% as.data.frame, aes(x=1, fill=peakType)) + geom_bar(position="fill") + theme_BOR()

# Heatmap of VISTA enhancer frag counts

proj <- loadArchRProject(here::here("output/01-preprocessing/03/allSamples_decontx"))

proj <- addFeatureMatrix(
  input = proj,
  features = GRanges(vista$coordinate_hg38) %>% unique,
  matrixName = "VistaPeakMatrix"
)

proj <- saveArchRProject(proj)

4.2 Get all annotations

tissue_meta <- read_tsv(here::here("code/02-global_analysis/01-organs_keep.tsv"))

## Rows: 12 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): organcode, organ, iteration
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# add organ colors
col.data.frame <- data.frame(Color=cmap_organ) %>% dplyr::mutate(organ=names(cmap_organ))

organ.code.list <- tissue_meta$organcode
all.annots <- lapply(1:length(organ.code.list), function(i){
  # read cluster id annotation
  annot <- read.csv(sprintf(here::here("output/01-preprocessing/02/shared/meta/%s_meta_cluster.txt"), organ.code.list[i]), sep="\t") %>% as.data.frame
  rownames(annot) <- annot$cb
  annot$L0_clusterID <- paste0(organ.code.list[i],"_",annot$L1_clusterID)
  annot$L3_clusterID <- paste0(organ.code.list[i],"_",annot$L3_clusterName)
  return(annot)
})
all.annots <- dplyr::bind_rows(all.annots)
rownames(all.annots) <- all.annots$L0_clusterID

all.annots <- all.annots %>%
  mutate(Cluster = paste0(organ_code, "_",L1_clusterID)) %>%
  mutate(Cluster_labelled = paste0(organ_code, "_", L1_clusterID, "_", L3_clusterName)) %>%
  left_join(col.data.frame) %>%
  tibble::column_to_rownames(var = "Cluster")

## Joining with `by = join_by(organ)`

head(all.annots)

##        organ organ_code L1_clusterID L0_clusterName L1_clusterName L2_clusterID
## AG_0 Adrenal         AG            0            epi         AG_epi      AG_epi1
## AG_1 Adrenal         AG            1            epi         AG_epi      AG_epi2
## AG_2 Adrenal         AG            2            epi         AG_epi      AG_epi3
## AG_3 Adrenal         AG            3            epi         AG_epi      AG_epi4
## AG_4 Adrenal         AG            4            epi         AG_epi      AG_epi5
## AG_5 Adrenal         AG            5            end         AG_end       AG_end
##               L2_clusterName            L3_clusterName ncell median_numi
## AG_0          adrenal cortex          adrenal cortex 1   823      9498.0
## AG_1          adrenal cortex          adrenal cortex 2   576      6827.5
## AG_2          adrenal cortex          adrenal cortex 3   511     10567.0
## AG_3          adrenal cortex          adrenal cortex 4   483      6340.0
## AG_4 sympathoadrenal lineage sympathoadrenal lineage 1   155      5262.0
## AG_5             endothelial               endothelial   115      4117.0
##      median_ngene median_pctmt median_nfrags median_tss median_frip
## AG_0         3280  0.008694896        6784.0     9.8320   0.2906086
## AG_1         3114  0.000000000        4341.5    10.3445   0.2400227
## AG_2         3661  0.008910274        5692.0     9.6830   0.2679032
## AG_3         2487  0.000000000        5519.0     9.8760   0.3020710
## AG_4         2564  0.000000000        4425.0    10.1390   0.2104465
## AG_5         2188  0.006551792        4192.0    10.1250   0.2417165
##                                                                                            note
## AG_0                 NR5A1+ CYP11A1/CYP11B1+; mostly PCW21; groups with 2 and 3 in cluster tree
## AG_1                                                  AGTR1+ likely zona glomerulosa; all PCW17
## AG_2                                        NR5A1+ AKR1B1+ SULT2A1+ CYP11A1/CYP11B1+; all PCW18
## AG_3                                                                        NR5A1+; mosly PCW21
## AG_4 PHOX2A/B+ ISL1+ GATA3+; likely mix of chromaffin cells and sympathoblasts from all samples
## AG_5                                                               PECAM1+ TEK+; mix of samples
##      L0_clusterID                 L3_clusterID               Cluster_labelled
## AG_0         AG_0          AG_adrenal cortex 1          AG_0_adrenal cortex 1
## AG_1         AG_1          AG_adrenal cortex 2          AG_1_adrenal cortex 2
## AG_2         AG_2          AG_adrenal cortex 3          AG_2_adrenal cortex 3
## AG_3         AG_3          AG_adrenal cortex 4          AG_3_adrenal cortex 4
## AG_4         AG_4 AG_sympathoadrenal lineage 1 AG_4_sympathoadrenal lineage 1
## AG_5         AG_5               AG_endothelial               AG_5_endothelial
##        Color
## AG_0 #876941
## AG_1 #876941
## AG_2 #876941
## AG_3 #876941
## AG_4 #876941
## AG_5 #876941

# add average cluster age

meta <- read_tsv(Sys.glob(here::here("output/01-preprocessing/02/shared/meta/*_meta.txt"))) %>% as.data.frame(stringsAsFactors=F)

## Rows: 817740 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): cb, L1_clusterName, L2_clusterID, L2_clusterName, L3_clusterName
## dbl (1): L1_clusterID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

smeta <- read_tsv(Sys.glob(here::here("output/01-preprocessing/02/shared/meta/*_meta_sample.txt"))) %>% as.data.frame(stringsAsFactors=F)

## Rows: 76 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cmeta <- read_tsv(Sys.glob(here::here("output/01-preprocessing/02/shared/meta/*_meta_cluster.txt"))) %>% as.data.frame(stringsAsFactors=F)

## Rows: 203 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): organ, organ_code, L0_clusterName, L1_clusterName, L2_clusterID, L2...
## dbl (8): L1_clusterID, ncell, median_numi, median_ngene, median_pctmt, media...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

rownames(smeta) <- smeta$sample

meta$sample <- lapply(meta$cb, function(n){strsplit(n, split="#")[[1]][1]}) %>% unlist
meta$PCW <- smeta[meta$sample, "pcw"]
meta$PCD <- smeta[meta$sample, "pcd"]

meta_group <- meta %>% group_by(L2_clusterID) %>% dplyr::summarise(avg_pcw=mean(PCW), avg_pcd=mean(PCD)) %>% as.data.frame(stringsAsFactors=F)
all.annots <- merge(all.annots, meta_group, by="L2_clusterID", all.x=T) %>% merge(cmeta, all.x=T)
head(all.annots)

##   L2_clusterID   organ organ_code L1_clusterID L0_clusterName L1_clusterName
## 1       AG_end Adrenal         AG            5            end         AG_end
## 2      AG_epi1 Adrenal         AG            0            epi         AG_epi
## 3      AG_epi2 Adrenal         AG            1            epi         AG_epi
## 4      AG_epi3 Adrenal         AG            2            epi         AG_epi
## 5      AG_epi4 Adrenal         AG            3            epi         AG_epi
## 6      AG_epi5 Adrenal         AG            4            epi         AG_epi
##            L2_clusterName            L3_clusterName ncell median_numi
## 1             endothelial               endothelial   115      4117.0
## 2          adrenal cortex          adrenal cortex 1   823      9498.0
## 3          adrenal cortex          adrenal cortex 2   576      6827.5
## 4          adrenal cortex          adrenal cortex 3   511     10567.0
## 5          adrenal cortex          adrenal cortex 4   483      6340.0
## 6 sympathoadrenal lineage sympathoadrenal lineage 1   155      5262.0
##   median_ngene median_pctmt median_nfrags median_tss median_frip
## 1         2188  0.006551792        4192.0    10.1250   0.2417165
## 2         3280  0.008694896        6784.0     9.8320   0.2906086
## 3         3114  0.000000000        4341.5    10.3445   0.2400227
## 4         3661  0.008910274        5692.0     9.6830   0.2679032
## 5         2487  0.000000000        5519.0     9.8760   0.3020710
## 6         2564  0.000000000        4425.0    10.1390   0.2104465
##                                                                                         note
## 1                                                               PECAM1+ TEK+; mix of samples
## 2                 NR5A1+ CYP11A1/CYP11B1+; mostly PCW21; groups with 2 and 3 in cluster tree
## 3                                                  AGTR1+ likely zona glomerulosa; all PCW17
## 4                                        NR5A1+ AKR1B1+ SULT2A1+ CYP11A1/CYP11B1+; all PCW18
## 5                                                                        NR5A1+; mosly PCW21
## 6 PHOX2A/B+ ISL1+ GATA3+; likely mix of chromaffin cells and sympathoblasts from all samples
##   L0_clusterID                 L3_clusterID               Cluster_labelled
## 1         AG_5               AG_endothelial               AG_5_endothelial
## 2         AG_0          AG_adrenal cortex 1          AG_0_adrenal cortex 1
## 3         AG_1          AG_adrenal cortex 2          AG_1_adrenal cortex 2
## 4         AG_2          AG_adrenal cortex 3          AG_2_adrenal cortex 3
## 5         AG_3          AG_adrenal cortex 4          AG_3_adrenal cortex 4
## 6         AG_4 AG_sympathoadrenal lineage 1 AG_4_sympathoadrenal lineage 1
##     Color  avg_pcw  avg_pcd
## 1 #876941 17.24348 123.7130
## 2 #876941 21.00000 151.0000
## 3 #876941 16.99132 121.9444
## 4 #876941 18.00000 126.0000
## 5 #876941 21.00000 151.0000
## 6 #876941 16.30323 116.9290

4.3 Pseudobulk vista peak matrix

per cluster

addArchRThreads(4)
proj$RNA_Clusters_Labelled <- all.annots[proj$RNA_Clusters, "Cluster_labelled"]
vista_peak_matrix <- getMatrixFromProject(proj, useMatrix = "VistaPeakMatrix")
saveRDS(vista_peak_matrix, paste0(out, "/vista_peak_matrix_cell.rds"))

tmp <- vista_peak_matrix@assays@data$VistaPeakMatrix %>% t %>% as.matrix %>% as.data.frame %>% group_by(vista_peak_matrix$RNA_Clusters_Labelled) %>% dplyr::summarise(across(everything(), list(median=median, sum=sum, mean=mean)))
saveRDS(tmp, paste0(out, "/vista_peak_matrix_clust.rds"))

add peak annotations

vista_peak_matrix <- readRDS(paste0(out, "/vista_peak_matrix_cell.rds"))
peaks <- vista_peak_matrix@rowRanges

# add enh name to peaks
vista_gr <- GRanges(vista$coordinate_hg38)
mcols(vista_gr) <- vista
vista_gr <- vista_gr %>% unique
overlap <- findOverlaps(peaks, vista_gr, type="equal")
peaks$vista_id <- NA
peaks[overlap@from]$vista_id <- vista_gr[overlap@to]$vista_id

# add vista organ positive info
for (organ in c("Brain", "Heart", "Eye", "Liver", "Skin")){
  pos_peaks <- vista[vista$vista_id %in% vista_long[vista_long$HDMA_tissue==organ, "vista_id"],]$coordinate_hg38 %>% GRanges
  mcols(peaks)[paste0("vista_pos_", organ)] <- rep("neg", length(peaks))
  mcols(peaks)[peaks$vista_id %in% vista_long[vista_long$HDMA_tissue==organ, "vista_id"], paste0("vista_pos_", organ)] <- "pos"
}

vista_peak_matrix@rowRanges <- peaks 

saveRDS(vista_peak_matrix, paste0(out, "/vista_peak_matrix_cell.rds"))

4.4 Overlap with global peaks

## cannot get peak matrix at once because it's too large, submit as job
#system("sbatch -p wjg,sfgf,biochem --mem-per-cpu=250g --time=48:00:00 --job-name=peakmtx --wrap \"Rscript 09-get_peak_matrix.R\"")

peak_matrix <- readRDS(paste0(out, "/peak_matrix_cell_overlapvista.rds")) # output from 09-get_peak_matrix.R
vista_peak_matrix <- readRDS(paste0(out, "/vista_peak_matrix_cell.rds"))

# overlap vista peaks with global peaks, min 75%=375bp overlap
#   if overlapping multiple global peaks, use ATAC signal from the peak with the highest total ATAC signal
overlaps <- findOverlaps(peak_matrix@rowRanges, vista_peak_matrix@rowRanges, minoverlap=375)
vista_withoverlap <- overlaps@to %>% unique

maxmat <- list()
hdma_peaks <- c()

peak_matrix_data <- peak_matrix@assays@data$PeakMatrix
for (i in vista_withoverlap){
  if (mod(i, 50)==0){
    message(i)
  }
  cand_id <- overlaps@from[which(overlaps@to==i)]
  cand <- peak_matrix_data[cand_id,,drop=F]
  maxid <- rev(order(rowSums(cand)))[1]
  maxmat[[i]] <- cand[maxid,] %>% unname
  hdma_peaks <- c(hdma_peaks, cand_id[maxid])
}

stopifnot(sum(lengths(maxmat)!=0)==length(hdma_peaks))
maxmat <- maxmat[lengths(maxmat)!=0]

maxmat <- do.call(rbind, maxmat)
hdma_peaks <- peak_matrix@rowRanges[hdma_peaks]

vista_peak_matrix_max <- vista_peak_matrix
vista_peak_matrix_max <- vista_peak_matrix_max[vista_withoverlap,]
vista_peak_matrix_max@assays@data$VistaPeakMatrix <- Matrix(maxmat, sparse=T)
mcols(hdma_peaks) <- cbind(mcols(hdma_peaks), mcols(rowRanges(vista_peak_matrix_max))[,2:7])

rowRanges(vista_peak_matrix_max) <- hdma_peaks
  
saveRDS(vista_peak_matrix_max, paste0(out, "/peak_matrix_cell_overlapvista_maxpeak.rds"))

tmp <- vista_peak_matrix_max@assays@data$VistaPeakMatrix %>% t %>% as.matrix %>% as.data.frame %>% group_by(vista_peak_matrix_max$RNA_Clusters_Labelled) %>% dplyr::summarise(across(everything(), list(median=median, sum=sum, mean=mean)))
saveRDS(tmp, paste0(out, "/peak_matrix_cell_overlapvista_maxpeak_clust.rds"))

Read overlap data

vista_peak_matrix <- readRDS(paste0(out, "/peak_matrix_cell_overlapvista_maxpeak.rds"))
tmp <- readRDS(paste0(out, "/peak_matrix_cell_overlapvista_maxpeak_clust.rds"))

# annotations
rownames(all.annots) <- all.annots$Cluster_labelled
all.annots.sorted <- all.annots[tmp[[grep("vista_peak", colnames(tmp), value=T)]], ]

4.5 ATAC heatmap

Filter to vista enhancers positive in at least 1 hdma tissue

df <- tmp[,grepl("sum", colnames(tmp))]
peaks <- vista_peak_matrix@rowRanges

idx_pf <- which((peaks$vista_pos_Brain=="pos") | (peaks$vista_pos_Heart=="pos") | (peaks$vista_pos_Eye=="pos") | 
        (peaks$vista_pos_Liver=="pos") | (peaks$vista_pos_Skin=="pos"))

# different normalizations
df <- df[, idx_pf]
peaks <- peaks[idx_pf,]
df_lognorm_sub <- (df/rowSums(df)) * 1e6 %>% log1p
df_lognorm_0to1_sub <- as.data.frame(lapply(df_lognorm_sub, scales::rescale))

ha <- columnAnnotation(organ=all.annots.sorted$organ, compartment=all.annots.sorted$L0_clusterName,
                       col=list(compartment=cmap_compartment,
                                organ=cmap_organ))

rowannot_ls <- list()
col_ls <- list()
for (organ in c("Heart", "Brain", "Eye", "Liver", "Skin")){
  rowannot_ls[paste0("vista_pos_", organ)] <- mcols(peaks)[paste0("vista_pos_", organ)]
  col_ls[[paste0("vista_pos_", organ)]] <- c(neg="gray", pos=cmap_organ[[organ]])
}

# order the rows and cols
ha2 <- HeatmapAnnotation(df=do.call(cbind, rowannot_ls %>% unname), col = col_ls, which="row")

# Calculate breaks based on quantiles to handle outliers
cmap <- cmaps_BOR$whitePurple
dfnorm <- df_lognorm_0to1_sub
plot_title <- "Normalized ATAC \n signal (0-1) \nLn(CPM+1)"
file_name <- "heatmap_vista_hdma_maxpeakoverlap.pdf"


break_points <- quantile(dfnorm %>% as.matrix, probs = seq(0.1, 0.9, length.out = length(cmap)))
color_mapping_1 <- circlize::colorRamp2(break_points, cmap)

break_points <- quantile(dfnorm %>% as.matrix, probs = seq(0.01, 0.99, length.out = length(cmap)))
color_mapping_2 <- circlize::colorRamp2(break_points, cmap)

color_mapping_3 <- circlize::colorRamp2(seq(0, 0.3, length.out=length(cmap)), cmap)

color_mapping_4 <- circlize::colorRamp2(seq(0, 0.5, length.out=length(cmap)), cmap)

color_mapping_5 <- circlize::colorRamp2(seq(0, 0.8, length.out=length(cmap)), cmap)


### cluster within groups ----------------------------------

# add peak group info
peaks$n_org_pos <- (data.frame(mcols(peaks)[,c("vista_pos_Heart", "vista_pos_Brain", "vista_pos_Eye", "vista_pos_Liver", "vista_pos_Skin")]) == "pos") %>% rowSums
peaks$group <- "negative"
mcols(peaks)[peaks$vista_pos_Heart=="pos" & peaks$n_org_pos==1, "group"] <- "heart positive only"
mcols(peaks)[peaks$vista_pos_Brain=="pos" & peaks$n_org_pos==1, "group"] <- "brain positive only"
mcols(peaks)[peaks$vista_pos_Eye=="pos" & peaks$n_org_pos==1, "group"] <- "eye positive only"
mcols(peaks)[peaks$vista_pos_Liver=="pos" & peaks$n_org_pos==1, "group"] <- "liver positive only"
mcols(peaks)[peaks$vista_pos_Skin=="pos" & peaks$n_org_pos==1, "group"] <- "skin positive only"
mcols(peaks)[peaks$n_org_pos > 1, "group"] <- "multi-organ positive"
peaks$group <- factor(peaks$group, levels=c("liver positive only", "skin positive only", "brain positive only", "eye positive only", "multi-organ positive","heart positive only" ))


ht_list <- list()
cmap_list <- c(color_mapping_1, color_mapping_2, color_mapping_3, color_mapping_4, color_mapping_5)
for (i in seq_along(cmap_list)){
  ht1 <- Heatmap(
        dfnorm %>% t, 
        name=plot_title,
        top_annotation = ha,
        right_annotation = ha2,
        row_split = peaks$group,  
        cluster_row_slices = T,
        cluster_rows = T,
        cluster_columns=F,
        clustering_distance_columns = "pearson",
        clustering_distance_rows = "pearson",
        clustering_method_rows = "complete",
        clustering_method_columns = "complete",
        column_names_gp = gpar(fontsize = 2, col=all.annots.sorted$Color),
        column_labels = all.annots.sorted$Cluster_labelled,
        column_title = paste0(dim(dfnorm)[1], " HDMA clusters"),
        row_names_gp = gpar(fontsize=0.5),
        row_labels = peaks$vista_id,
        row_title = paste0(dim(dfnorm)[2], " VISTA enhancers"),
        col = cmap_list[[i]]
)
  ht1 <- draw(ht1)
  ht_list[[i]]  <- ht1
}

saveRDS(ht_list, paste0(out, "/", file_name, ".rds"))

file_name <- "heatmap_vista_hdma_maxpeakoverlap.pdf"
ht_list <- readRDS(paste0(out, "/", file_name, ".rds"))
for (h in ht_list){
  plot(h)
}

4.6 ATAC heatmap Z score

df <- tmp[,grepl("sum", colnames(tmp))]
peaks <- vista_peak_matrix@rowRanges

idx_pf <- which((peaks$vista_pos_Brain=="pos") | (peaks$vista_pos_Heart=="pos") | (peaks$vista_pos_Eye=="pos") | 
        (peaks$vista_pos_Liver=="pos") | (peaks$vista_pos_Skin=="pos"))

# different normalizations
df <- df[, idx_pf]
peaks <- peaks[idx_pf,]
df_lognorm_sub <- (df/rowSums(df)) * 1e6 %>% log1p
df_lognorm_z_sub <- scale(df_lognorm_sub) %>% as.data.frame

# read saved row order
ord <- readRDS(glue("{out}/heatmap_vista_hdma_maxpeakoverlap.pdf.rds"))
ord <- ord[[5]] %>% row_order
peak_order <- ord %>% unlist %>% unname
df_lognorm_z_sub <- df_lognorm_z_sub[,peak_order]
peaks <- peaks[peak_order]

ha <- columnAnnotation(organ=all.annots.sorted$organ, compartment=all.annots.sorted$L0_clusterName,
                       col=list(compartment=cmap_compartment,
                                organ=cmap_organ))

rowannot_ls <- list()
col_ls <- list()
for (organ in c("Heart", "Brain", "Eye", "Liver", "Skin")){
  rowannot_ls[paste0("vista_pos_", organ)] <- mcols(peaks)[paste0("vista_pos_", organ)]
  col_ls[[paste0("vista_pos_", organ)]] <- c(neg="gray", pos=cmap_organ[[organ]])
}

# order the rows and cols
ha2 <- HeatmapAnnotation(df=do.call(cbind, rowannot_ls %>% unname), col = col_ls, which="row")

# Calculate breaks based on quantiles to handle outliers
cmap <- cmap_chromvar
dfnorm <- df_lognorm_z_sub
plot_title <- "Normalized ATAC \n signal (Z-Score) \nLn(CPM+1)"
file_name <- "heatmap_vista_hdma_maxpeakoverlap_zscore.pdf"


break_points <- quantile(dfnorm %>% as.matrix, probs = seq(0.1, 0.9, length.out = length(cmap)))
color_mapping_1 <- circlize::colorRamp2(break_points, cmap)

break_points <- quantile(dfnorm %>% as.matrix, probs = seq(0.01, 0.99, length.out = length(cmap)))
color_mapping_2 <- circlize::colorRamp2(break_points, cmap)

color_mapping_3 <- circlize::colorRamp2(seq(-1, 1, length.out=length(cmap)), cmap)

color_mapping_4 <- circlize::colorRamp2(seq(-2, 2, length.out=length(cmap)), cmap)

color_mapping_5 <- circlize::colorRamp2(seq(0, 0.8, length.out=length(cmap)), cmap)

### cluster within groups ----------------------------------

# add peak group info
peaks$n_org_pos <- (data.frame(mcols(peaks)[,c("vista_pos_Heart", "vista_pos_Brain", "vista_pos_Eye", "vista_pos_Liver", "vista_pos_Skin")]) == "pos") %>% rowSums
peaks$group <- "negative"
mcols(peaks)[peaks$vista_pos_Heart=="pos" & peaks$n_org_pos==1, "group"] <- "heart positive only"
mcols(peaks)[peaks$vista_pos_Brain=="pos" & peaks$n_org_pos==1, "group"] <- "brain positive only"
mcols(peaks)[peaks$vista_pos_Eye=="pos" & peaks$n_org_pos==1, "group"] <- "eye positive only"
mcols(peaks)[peaks$vista_pos_Liver=="pos" & peaks$n_org_pos==1, "group"] <- "liver positive only"
mcols(peaks)[peaks$vista_pos_Skin=="pos" & peaks$n_org_pos==1, "group"] <- "skin positive only"
mcols(peaks)[peaks$n_org_pos > 1, "group"] <- "multi-organ positive"
peaks$group <- factor(peaks$group, levels=c("liver positive only", "skin positive only", "brain positive only", "eye positive only", "multi-organ positive","heart positive only" ))

ht_list <- list()
cmap_list <- c(color_mapping_1, color_mapping_2, color_mapping_3, color_mapping_4, color_mapping_5)
for (i in seq_along(cmap_list)){
  ht1 <- Heatmap(
        dfnorm %>% t, 
        name=plot_title,
        top_annotation = ha,
        right_annotation = ha2,
        cluster_rows = F,
        split=peaks$group,
        cluster_columns=F,
        column_names_gp = gpar(fontsize = 2, col=all.annots.sorted$Color),
        column_labels = all.annots.sorted$Cluster_labelled,
        column_title = paste0(dim(dfnorm)[1], " HDMA clusters"),
        row_names_gp = gpar(fontsize=0.5),
        row_labels = peaks$vista_id,
        row_title = paste0(dim(dfnorm)[2], " VISTA enhancers"),
        col = cmap_list[[i]]
)
  ht1 <- draw(ht1)
  ht_list[[i]]  <- ht1
}

saveRDS(ht_list, paste0(out, "/", file_name, ".rds"))

file_name <- "heatmap_vista_hdma_maxpeakoverlap_zscore.pdf"
ht_list <- readRDS(paste0(out, "/", file_name, ".rds"))
# for (h in ht_list){
#   plot(h)
# }
plot(ht_list[[4]])

4.7 calculate enrichment

“If an enhancer is labelled VISTA heart positive, how likely is it going to have higher HDMA ATAC signal than those not VISTA heart positive”

P value = (how many times is the shuffled enrichment > actual enrichment)

organ <- "Eye"
df <- df_lognorm_z_sub
# df <- df_lognorm_sub
mask_pos <- mcols(peaks)[[paste0("vista_pos_",organ)]] == "pos"
n_pos <- sum(mask_pos)
mask_hdma_clust <- all.annots.sorted$organ == organ

avg_signal_neg <- df[mask_hdma_clust,!mask_pos] %>% unlist %>%  mean
avg_signal_pos <- df[mask_hdma_clust,mask_pos] %>% unlist %>% mean

enrichment <- avg_signal_pos - avg_signal_neg  # actual enrichment, subtract in log space = division in nonlog space

# now create shuffles 
enrichment_shuffles <- list()
avg_signal_pos_shuffles <- list()
n_shuffles <- 10000
set.seed(16)
for (i in 1:n_shuffles){
  idx_pos_label <- sample(1:length(peaks), size = n_pos)
  mask_pos <- (1:length(peaks)) %in% idx_pos_label
  neg <- df[mask_hdma_clust,!mask_pos] %>% unlist %>%  mean
  pos <- df[mask_hdma_clust,mask_pos] %>% unlist %>% mean
  avg_signal_pos_shuffles[[i]] <- pos
  enrichment_shuffles[[i]] <- pos - neg
}
avg_signal_pos_shuffles <- avg_signal_pos_shuffles %>% unlist
enrichment_shuffles <- enrichment_shuffles %>% unlist
sum(enrichment_shuffles > enrichment) / n_shuffles
sum(avg_signal_pos_shuffles > avg_signal_pos) / n_shuffles
exp(enrichment)

# also try wilcoxon rank sum test
for (organ in c("Brain", "Heart", "Eye", "Liver", "Skin")){
  print("#####################")
  print(organ)
  df <- df_lognorm_z_sub
  # df <- df_lognorm_sub
  mask_pos <- mcols(peaks)[[paste0("vista_pos_",organ)]] == "pos"
  n_pos <- sum(mask_pos)
  mask_hdma_clust <- all.annots.sorted$organ == organ
  
  y <- df[mask_hdma_clust,!mask_pos] %>% unlist
  x <- df[mask_hdma_clust,mask_pos] %>% unlist
  res <- wilcox.test(x, y,
              alternative = c("greater"),
              conf.int = T, conf.level = 0.95)
  print(res)
  print(res$p.value)
  print(res$statistic/(length(x)*length(y)))
}

4.8 RNA heatmap

plot the matching RNA heatmap to the ATAC heatmap, use expression of the nearest gene

# get peaks
peaks <- vista_peak_matrix@rowRanges
idx_pf <- which((peaks$vista_pos_Brain=="pos") | (peaks$vista_pos_Heart=="pos") | (peaks$vista_pos_Eye=="pos") | 
        (peaks$vista_pos_Liver=="pos") | (peaks$vista_pos_Skin=="pos"))
peaks <- peaks[idx_pf,]

# get list of all nearest genes first
addArchRGenome("hg38")

## Setting default genome to Hg38.

geneAnnot <- getArchRGenome(geneAnnotation = T, genomeAnnotation = F) %>% as.list

## Using GeneAnnotation set by addArchRGenome(Hg38)!

peaks_anno <- ArchR:::.fastAnnoPeaks(peaks, BSgenome = BSgenome.Hsapiens.UCSC.hg38, geneAnnotation = geneAnnot)

## Annotating Peaks : Nearest Gene

## Annotating Peaks : Gene

## Annotating Peaks : TSS

## Annotating Peaks : GC

gene.list <- unique(peaks_anno$nearestGene)

dir.create(paste0(out, "/gex/"), recursive = TRUE, showWarnings = F)

get RNA data

for (i in 1:nrow(tissue_meta)){
  
  organ <- tissue_meta$organ[i]
  organcode <- tissue_meta$organcode[i]
  
  message(sprintf("@ current organ: %s", organ))
  
  out_files <- c(file.path(out, sprintf("gex/all_gex_rna.mean.%s.rds", organcode)),
                 file.path(out, sprintf("gex/all_gex_rna.detection.%s.rds", organcode)),
                 file.path(out, sprintf("gex/all_gex_decontx.mean.%s.rds", organcode)),
                 file.path(out, sprintf("gex/all_gex_decontx.detection.%s.rds", organcode)))
  
  if (all(file.exists(out_files))) {
    
    message("@ done, skipping.")
    
  } else {
    
    # read cluster id annotation
    annot <- read.csv(sprintf(here::here("output/01-preprocessing/02/shared/meta/%s_meta.txt"), organcode), sep="\t") %>% as.data.frame
    rownames(annot) <- annot$cb
    
    # Raw RNA counts -------------------------------------------------------------
    # get gene expression (RNA raw counts, scaled to 10000 per cell then log-transformed)
    message("fetching gene expression from RNA assay")
    rna_proj <- readRDS(
      sprintf(here::here("output/01-preprocessing/02/%s/rna_preprocess_output/RNA_obj_clustered_final.rds"),
              organ))
    rna_proj <- NormalizeData(rna_proj, assay="RNA")
    DefaultAssay(rna_proj) <- "RNA"
    gex.df <- FetchData(rna_proj, vars = gene.list[gene.list %in% rownames(rna_proj)], slot="data") 
    
    # add cluster
    gex.df$cluster <-  paste0(organcode, "_",annot[rownames(gex.df),"L1_clusterID"]) # e.g. AG_0, AG_10
    gex.df.mean <- gex.df %>% group_by(cluster) %>% dplyr::summarize(across(everything(), mean)) %>% as.data.frame
    
    saveRDS(gex.df.mean, file.path(out, sprintf("gex/all_gex_rna.mean.%s.rds", organcode)))
    
    # detection rate -------------------------------------------------------------
    rna.detection <- calc_pct1(gex.df, "cluster")
    
    saveRDS(rna.detection, file.path(out, sprintf("gex/all_gex_rna.detection.%s.rds", organcode)))
    
    dim(gex.df.mean)
    dim(rna.detection)
    
    # DecontX --------------------------------------------------------------------
    message("@ fetching gene expression from decontX assay")
    # get decontX normalized data but get genes in gene list only
    DefaultAssay(rna_proj) <- "decontX"
    gex.df <- FetchData(rna_proj, vars = gene.list[gene.list %in% rownames(rna_proj)], slot="data") 
    
    # add cluster
    gex.df$cluster <-  paste0(organcode, "_",annot[rownames(gex.df),"L1_clusterID"]) # e.g. AG_0, AG_10
    gex.df.mean <- gex.df %>% group_by(cluster) %>% dplyr::summarize(across(everything(), mean)) %>% as.data.frame
    
    saveRDS(gex.df.mean, file.path(out, sprintf("gex/all_gex_decontx.mean.%s.rds", organcode)))
    
    # detection rate -------------------------------------------------------------
    decontx_detection <- calc_pct1(gex.df, "cluster")
    
    saveRDS(decontx_detection, file.path(out, sprintf("gex/all_gex_decontx.detection.%s.rds", organcode)))
    
    dim(gex.df.mean)
    dim(decontx_detection)
    
    rm(rna_proj)
    
    message("@ done.")
    
  }
}

# write out gene expression averaged per cluster, binding all rows together
# dplyr::bind_rows(will fill NAs for missing columns between dataframes being bound togetther)
all_gex_rna.mean <- map(
  tissue_meta$organcode,
  ~ readRDS(file.path(out, glue("gex/all_gex_rna.mean.{.x}.rds")))) %>% 
  bind_rows()

all_gex_rna.detection <- map(
  tissue_meta$organcode,
  ~ readRDS(file.path(out, glue("gex/all_gex_rna.detection.{.x}.rds")))) %>% 
  bind_rows()

all_gex_decontx.mean <- map(
  tissue_meta$organcode,
  ~ readRDS(file.path(out, glue("gex/all_gex_decontx.mean.{.x}.rds")))) %>% 
  bind_rows()

all_gex_decontx.detection <- map(
  tissue_meta$organcode,
  ~ readRDS(file.path(out, glue("gex/all_gex_decontx.detection.{.x}.rds")))) %>% 
  bind_rows()

dim(all_gex_rna.mean)
dim(all_gex_rna.detection)
dim(all_gex_decontx.mean)
dim(all_gex_decontx.detection)

saveRDS(dplyr::bind_rows(all_gex_rna.mean),          file.path(out, "gex/all_gex_rna.mean.rds"))
saveRDS(dplyr::bind_rows(all_gex_decontx.mean),      file.path(out, "gex/all_gex_decontx.mean.rds"))
saveRDS(dplyr::bind_rows(all_gex_rna.detection),     file.path(out, "gex/all_gex_rna.detectionrate.rds"))
saveRDS(dplyr::bind_rows(all_gex_decontx.detection), file.path(out, "gex/all_gex_decontx.detectionrate.rds"))

read gex data

all_gex_rna.mean <- readRDS(file.path(out, "gex/all_gex_rna.mean.rds"))
all_gex_decontx.mean <- readRDS(file.path(out, "gex/all_gex_decontx.mean.rds"))

# read saved row order
ht_list <- readRDS(glue("{out}/heatmap_vista_hdma_maxpeakoverlap.pdf.rds")) 
ht <- ht_list[[5]]

ord <- lapply(seq_along(row_order(ht)), function(i){data.frame(idx=row_order(ht)[[i]]) %>% mutate(group=names(row_order(ht))[i])}) %>% do.call(rbind, .)
ord$group <- factor(ord$group, levels=c("liver positive only", "skin positive only", "brain positive only", "eye positive only", "multi-organ positive","heart positive only" ))
peak_order <- ord$idx
genes_sorted <- peaks_anno[peak_order]$nearestGene %>% unname
clust_sorted <- all.annots.sorted$L0_clusterID

# filter for peaks whose nearest gene has expression data
rna <- all_gex_rna.mean
#rna <- all_gex_decontx.mean
rownames(rna) <- rna$cluster
idx <- which(genes_sorted %in% colnames(rna))
mat <- rna[clust_sorted, genes_sorted[idx]]
mat[is.na(mat)] <- 0
mat <- as.data.frame(lapply(mat, scales::rescale)) # scale 0 to 1 for each gene across all clusters

# plotting config
cmap <- cmaps_BOR$whiteBlue
plot_title <- "Normalized RNA \n signal (0-1) \nLn(TPM+1)"

break_points <- quantile(mat %>% as.matrix, probs = seq(0.1, 0.9, length.out = length(cmap)), na.rm=T)
color_mapping_1 <- circlize::colorRamp2(break_points, cmap)

break_points <- quantile(mat %>% as.matrix, probs = seq(0.01, 0.99, length.out = length(cmap)), na.rm=T)
color_mapping_2 <- circlize::colorRamp2(break_points, cmap)

color_mapping_3 <- circlize::colorRamp2(seq(0, 1, length.out=length(cmap)), cmap)

ha3 <- HeatmapAnnotation(df=do.call(cbind, rowannot_ls %>% unname)[idx,], col = col_ls, which="row")

ht1 <- Heatmap(
        mat %>% t, 
        name=plot_title,
        top_annotation = ha,
        right_annotation = ha3,
        split = ord[idx,"group"],
        cluster_rows = F,
        cluster_columns=F,
        column_names_gp = gpar(fontsize = 2, col=all.annots.sorted$Color),
        column_labels = all.annots.sorted$Cluster_labelled,
        column_title = paste0(dim(mat)[1], " HDMA clusters"),
        row_names_gp = gpar(fontsize=0.5),
        row_labels = genes_sorted[idx],
        row_title = paste0(dim(mat)[2], " nearest genes of VISTA enhancers"),
        col = color_mapping_3
)

ht1 <- draw(ht1)

plot(ht1)

4.9 RNA heatmap Z score

# read saved row order
ht_list <- readRDS(glue("{out}/heatmap_vista_hdma_maxpeakoverlap.pdf.rds")) 
ht <- ht_list[[5]]

ord <- lapply(seq_along(row_order(ht)), function(i){data.frame(idx=row_order(ht)[[i]]) %>% mutate(group=names(row_order(ht))[i])}) %>% do.call(rbind, .)
ord$group <- factor(ord$group, levels=c("liver positive only", "skin positive only", "brain positive only", "eye positive only", "multi-organ positive","heart positive only" ))
peak_order <- ord$idx
genes_sorted <- peaks_anno[peak_order]$nearestGene %>% unname
clust_sorted <- all.annots.sorted$L0_clusterID

# filter for peaks whose nearest gene has expression data
rna <- all_gex_rna.mean
rownames(rna) <- rna$cluster
idx <- which(genes_sorted %in% colnames(rna))
mat <- rna[clust_sorted, genes_sorted[idx]]
mat[is.na(mat)] <- 0
mat <- scale(mat) %>% as.data.frame # z score

# plotting config
# cmap <- cmap_rna
cmap <- cmap_chromvar
plot_title <- "Normalized RNA \n signal (Z-Score) \nLn(TPM+1)"

break_points <- quantile(mat %>% as.matrix, probs = seq(0.1, 0.9, length.out = length(cmap)), na.rm=T)
color_mapping_1 <- circlize::colorRamp2(break_points, cmap)

break_points <- quantile(mat %>% as.matrix, probs = seq(0.01, 0.99, length.out = length(cmap)), na.rm=T)
color_mapping_2 <- circlize::colorRamp2(break_points, cmap)

color_mapping_3 <- circlize::colorRamp2(seq(-2, 2, length.out=length(cmap)), cmap)

ha3 <- HeatmapAnnotation(df=do.call(cbind, rowannot_ls %>% unname)[idx,], col = col_ls, which="row")

ht1 <- Heatmap(
        mat %>% t, 
        name=plot_title,
        top_annotation = ha,
        right_annotation = ha3,
        split = ord[idx,"group"],
        cluster_rows = F,
        cluster_columns=F,
        column_names_gp = gpar(fontsize = 2, col=all.annots.sorted$Color),
        column_labels = all.annots.sorted$Cluster_labelled,
        column_title = paste0(dim(mat)[1], " HDMA clusters"),
        row_names_gp = gpar(fontsize=0.5),
        row_labels = genes_sorted[idx],
        row_title = paste0(dim(mat)[2], " nearest genes of VISTA enhancers"),
        col = color_mapping_3
)

ht1 <- draw(ht1)

# plot(ht1)

4.10 calculate enrichment

“If an enhancer is labelled VISTA heart positive, how likely is it going to have higher HDMA RNA signal than those not VISTA heart positive”

P value = (how many times is the shuffled enrichment > actual enrichment)

organ <- "Brain"
df <- mat
peaks_test <- peaks_anno[peak_order][idx,]
mask_pos <- mcols(peaks_test)[[paste0("vista_pos_",organ)]] == "pos"
n_pos <- sum(mask_pos)
mask_hdma_clust <- all.annots.sorted$organ == organ

avg_signal_neg <- df[mask_hdma_clust,!mask_pos] %>% unlist %>%  mean
avg_signal_pos <- df[mask_hdma_clust,mask_pos] %>% unlist %>% mean

enrichment <- avg_signal_pos - avg_signal_neg  # actual enrichment, subtract in log space = division in nonlog space

# now create shuffles 
enrichment_shuffles <- list()
avg_signal_pos_shuffles <- list()
n_shuffles <- 10000
set.seed(16)
for (i in 1:n_shuffles){
  idx_pos_label <- sample(1:length(peaks_test), size = n_pos)
  mask_pos <- (1:length(peaks_test)) %in% idx_pos_label
  neg <- df[mask_hdma_clust,!mask_pos] %>% unlist %>%  mean
  pos <- df[mask_hdma_clust,mask_pos] %>% unlist %>% mean
  avg_signal_pos_shuffles[[i]] <- pos
  enrichment_shuffles[[i]] <- pos - neg
}
avg_signal_pos_shuffles <- avg_signal_pos_shuffles %>% unlist
enrichment_shuffles <- enrichment_shuffles %>% unlist
sum(enrichment_shuffles > enrichment) / n_shuffles
sum(avg_signal_pos_shuffles > avg_signal_pos) / n_shuffles

5 Plot tracks

VISTA heart pos only, HDMA pos in liver from global peak max overlap approach: c(“hs2007”,“mm99”,“hs2240”,“mm194”,“mm122”,“mm2143”,“mm69”,“mm21”,“mm291”,“mm296”,“mm101”,“mm78”,“hs2062”,“mm1668”,“mm18”,“mm257”,“hs502”,“mm244”,“mm1343”,“mm89”,“mm1211”,“mm1330”,“mm1326”)

# also try wilcoxon rank sum test
for (organ in c("Brain", "Heart", "Eye", "Liver", "Skin")){
  print("#####################")
  print(organ)
  df <- mat
  peaks_test <- peaks_anno[peak_order][idx,]
  mask_pos <- mcols(peaks_test)[[paste0("vista_pos_",organ)]] == "pos"
  n_pos <- sum(mask_pos)
  mask_hdma_clust <- all.annots.sorted$organ == organ
  
  y <- df[mask_hdma_clust,!mask_pos] %>% unlist
  x <- df[mask_hdma_clust,mask_pos] %>% unlist
  res <- wilcox.test(x, y,
              alternative = c("greater"),
              conf.int = T, conf.level = 0.95)
  print(res)
  print(res$p.value)
  print(res$statistic/(length(x)*length(y)))
}

5.1 Plot ATAC

global_bp_obj <- readRDS(here::here("output/05-misc/01/global_bp_obj.rds"))

# plot
# region_name <- "mm291"
# region_str <- "chr2:126673667-126674912"
region_name <- "mm101"
region_str <- "chr16:104534-105616"
region <- GRanges(region_str) + 5e4
peaks_highlight <- ranges(GRanges(region_str))

highlights <- tibble(
  xmin = peaks_highlight@start,
  xmax = peaks_highlight@start + peaks_highlight@width - 1,
  color = rep("red", length(peaks_highlight)),
  ymin = -Inf, ymax = Inf, alpha = .2
)

# 
# add in ABC loops
global <- readRDS(here::here("output/04-enhancers/06/abc_summary_df.rds"))
# first get the TSS coords from ABC reference file
tss <- read.table(here::here("data/reference/tss/CollapsedGeneBounds.hg38.bed"), col.names = c("seqnames", "start", "end", "symbol", "", "strand", "id", "type"))
tss$tss_start <- 0
tss[tss$strand=="-", "tss_start"] <- tss[tss$strand=="-", "end"]
tss[tss$strand=="+", "tss_start"] <- tss[tss$strand=="+", "start"]

#subglobal <- global %>% dplyr::filter(abc_is_linked_to_gene_HT==1)
subglobal <- global %>% dplyr::filter(abc_is_linked_to_gene_LI==1)
#subglobal <- global %>% dplyr::filter((abc_is_linked_to_gene_HT==1) | (abc_is_linked_to_gene_LI==1))
overlap <- findOverlaps(region, GRanges(subglobal))
subglobal <- subglobal[overlap@to,]

subglobal <- merge(subglobal, tss[,c("symbol", "tss_start")], by.x="abc_linked_gene_LI", by.y="symbol", all.x=T)

loops <- subglobal[,c("seqnames", "abc_linked_gene_LI", "peak_name", "med_abc_score_LI")]
loops$chr <- loops$seqnames
loops$start <- pmin((subglobal$start + subglobal$end)%/%2, subglobal$tss_start)
loops$end <- pmax((subglobal$start + subglobal$end)%/%2, subglobal$tss_start)

p <- trackplot_helper_v2c(
  region=region,
  clusters=global_bp_obj$cell_metadata$organ, 
  fragments=global_bp_obj$frags, 
  cell_read_counts=global_bp_obj$cell_metadata$nFrags, 
  transcripts=global_bp_obj$transcripts, 
  loops=loops,
  loop_color = "med_abc_score_LI",
  loop_label = "ABC Liver",
  highlights = highlights
)
ggplot2::ggsave(plot=p, paste0(figout, "/plot_track_", region_name, "_LI_abc.png"), dpi=300, width=9, height=8, units="in")

5.2 Plot GEX

# gene_name <- "GYPC"
gene_name <- "HBA2"
####################

p <- trackplot_helper_v2c(
  gene=gene_name,
  clusters=global_bp_obj$cell_metadata$organ, 
  fragments=global_bp_obj$frags, 
  cell_read_counts=global_bp_obj$cell_metadata$nFrags, 
  transcripts=global_bp_obj$transcripts, 
  expression_matrix = global_bp_obj$rna,
  loops=loops,
  flank = 5e4,
  loop_color = "med_abc_score_LI",
  loop_label = "ABC Liver",
  highlights = highlights
)

ggplot2::ggsave(plot=p, paste0(figout, "/plot_track_", region_name, "_LI_abc_", gene_name, "gex.png"), dpi=300, width=12, height=8, units="in")

5.2.1 Liver clusters

submeta <- global_bp_obj$cell_metadata %>% dplyr::filter(organ %in% c("Liver"))

p <- trackplot_helper_v2c(
  gene=gene_name,
  clusters=submeta$archive_L3_clusterID, 
  fragments=global_bp_obj$frags %>% select_cells(submeta$cb), 
  cell_read_counts=submeta$nFrags, 
  transcripts=global_bp_obj$transcripts, 
  expression_matrix = global_bp_obj$rna[,submeta$cb],
  loops=loops,
  flank = 1e4,
  #flank = 2e5,
  loop_color = "med_abc_score_LI",
  loop_label = "ABC Liver",
  highlights = highlights
)

ggplot2::ggsave(plot=p, paste0(figout, "/plot_track_", region_name, "_LI_abc_", gene_name, "gex_liveronly.png"), dpi=300, width=12, height=8, units="in")
ggplot2::ggsave(plot=p, paste0(figout, "/plot_track_", region_name, "_LI_abc_", gene_name, "gex_liveronly.pdf"), dpi=300, width=8, height=8, units="in")

#region <- GRanges("chr2:126655000-126678000") # mm291
# region <- GRanges("chr16:103900-181800") # mm101
region <- GRanges("chr16:76800-181800") # mm101 wider to include alpha globin superenhancer

submeta <- global_bp_obj$cell_metadata %>% dplyr::filter(organ %in% c("Liver"))

submeta$archive_L2_clusterName <- factor(submeta$archive_L2_clusterName, levels=c("erythroblast", "cycling erythroblast", "early erythroblast", "Kupffer", "B cell progenitor", "cholangiocyte", "endothelial", "hepatocyte", "megakaryocyte", "stellate"))

# filter for only ABC loops that originate from the enhancer 
region_narrow <-  GRanges("chr16:104534-105616")

# can only start inside the enhancer 
mask <- ((start(global_bp_obj$loops_abc$Liver) >=  start(region_narrow)) & (start(global_bp_obj$loops_abc$Liver) <=  end(region_narrow))) & (seqnames(global_bp_obj$loops_abc$Liver)=="chr16") 

# can start or end inside the enhancer
# mask <- (((start(global_bp_obj$loops_abc$Liver) >=  start(region_narrow)) & (start(global_bp_obj$loops_abc$Liver) <=  end(region_narrow))) | ((end(global_bp_obj$loops_abc$Liver) >=  start(region_narrow)) & (end(global_bp_obj$loops_abc$Liver) <=  end(region_narrow)))) & (seqnames(global_bp_obj$loops_abc$Liver)=="chr16") 


p1 <- trackplot_helper_v2c(
  region=region,
  clusters=submeta$archive_L2_clusterName, 
  fragments=global_bp_obj$frags %>% select_cells(submeta$cb), 
  cell_read_counts=submeta$nFrags, 
  transcripts=global_bp_obj$transcripts,
  loops=global_bp_obj$loops_abc$Liver[mask] %>% unique,
  loop_color = "ABC.Score",
  loop_label = "ABC Liver",
  highlights = highlights
)

p2 <- trackplot_helper_v2c(
  gene="HBA2",
  clusters=submeta$archive_L2_clusterName, 
  fragments=global_bp_obj$frags %>% select_cells(submeta$cb), 
  cell_read_counts=submeta$nFrags, 
  transcripts=global_bp_obj$transcripts, 
  expression_matrix = global_bp_obj$rna[,submeta$cb],
  flank = 5e3,
  highlights = highlights
)

p1

p2

5.2.2 organ specific clusters

organ_code <- "BR"
organ_name <- "Brain"

abc_is_linked_to_gene_organ <- paste0("abc_is_linked_to_gene_", organ_code)
abc_linked_gene_organ <- paste0("abc_linked_gene_", organ_code)
med_abc_score_organ <- paste0("med_abc_score_", organ_code)

subglobal <- global %>% dplyr::filter(global[abc_is_linked_to_gene_organ]==1)
overlap <- findOverlaps(region, GRanges(subglobal))
subglobal <- subglobal[overlap@to,]

subglobal <- merge(subglobal, tss[,c("symbol", "tss_start")], by.x=abc_linked_gene_organ, by.y="symbol", all.x=T)

loops <- subglobal[,c("seqnames", abc_linked_gene_organ, "peak_name", med_abc_score_organ)]
loops$chr <- loops$seqnames
loops$start <- pmin((subglobal$start + subglobal$end)%/%2, subglobal$tss_start)
loops$end <- pmax((subglobal$start + subglobal$end)%/%2, subglobal$tss_start)

Sys.time()
submeta <- global_bp_obj$cell_metadata %>% dplyr::filter(organ_code==organ_code)
p <- trackplot_helper_v2c(
  #gene=gene_name,
  region=region,
  clusters=submeta$archive_L3_clusterID, 
  fragments=global_bp_obj$frags %>% select_cells(submeta$cb), 
  cell_read_counts=submeta$nFrags, 
  transcripts=global_bp_obj$transcripts, 
  expression_matrix = global_bp_obj$rna[,submeta$cb],
  loops=loops,
  flank = 5e4,
  loop_color = med_abc_score_organ,
  loop_label = paste0("ABC ", organ_name),
  highlights = highlights
)

Sys.time()
ggplot2::ggsave(plot=p, paste0(figout, "/plot_track_", region_name, "_", organ_code, "_abc_", gene_name, "gex_", tolower(organ_name), "only.png"), dpi=300, width=12, height=8, units="in")

5.2.3 P2G loops

global_atac_proj <- loadArchRProject(here::here("output/01-preprocessing/03/allSamples_decontx"))
loops <- get_p2g_loops(global_atac_proj)

p <- trackplot_helper_v2c(
  region=region,
  clusters=global_bp_obj$cell_metadata$organ, 
  fragments=global_bp_obj$frags, 
  cell_read_counts=global_bp_obj$cell_metadata$nFrags, 
  transcripts=global_bp_obj$transcripts, 
  loops=GRanges(loops), 
  loop_color="Correlation", 
  loop_label = "P2G Links (Global)",
  highlights = highlights
)
ggplot2::ggsave(plot=p, paste0(figout, "/plot_track_", region_name, "_globalp2g.png"), dpi=300, width=12, height=8, units="in")

5.3 Plot contrib scores

5.3.1 inputs

# region <- "chr16:104534-105616" # mm101
region <- "chr16:105080-105200" # zoomed in mm101
chrombp_outdir <- here::here("output/03-chrombpnet/02-compendium/hits_unified_motifs/reconciled_per_celltype_peaks/")
finemo_param <- "counts_v0.23_a0.8_all"

global_bp_obj <- readRDS(here::here("output/05-misc/01/global_bp_obj.rds"))

# hits 
cluster <- "Liver_c0" # for loading hits only
hits <- global_bp_obj$hits[[cluster]]

5.3.2 subset global

# subset the global object
sub_meta <- global_bp_obj$cell_metadata %>%
  dplyr::filter(organ=="Liver") 

sub_meta$archive_L2_clusterName <- factor(sub_meta$archive_L2_clusterName, levels=c( "erythroblast", "cycling erythroblast", "early erythroblast", "Kupffer", "B cell progenitor", "cholangiocyte", "endothelial", "hepatocyte", "megakaryocyte", "stellate"))

5.3.3 plot contribs

track_contribs_list <- list()

for (L2_cluster in levels(sub_meta$archive_L2_clusterName)){
  # L2_cluster <- "erythroblast"
  print(L2_cluster)
  L1_clusters <- sub_meta[sub_meta$archive_L2_clusterName==L2_cluster, "Cluster_chrombpnet"] %>% unique
  
  bw_contrib_list <- list()
  for (clust in L1_clusters){
    # load contribution scores
    print(paste0("loading contrib scores from ", clust))
    bw_contrib_list[[clust]] <- rtracklayer::import.bw(glue(here::here("output/03-chrombpnet/01-models/contribs/bias_Heart_c0_thresh0.4/{clust}/average_shaps.counts.bw")), selection=BigWigSelection(GRanges(region))) %>% as.data.frame 
  }
  tmp <- do.call(rbind, bw_contrib_list)
  bw_contrib <- tmp %>% dplyr::group_by(seqnames,start,end, width, strand) %>% dplyr::summarise(score=mean(score)) %>% GRanges
  
  track_contribs_list[[L2_cluster]] <- trackplot_contribs_BL(bw_contrib,
                                       region = region, 
                                       track_label = NULL,
                                       facet_label = L2_cluster,
                                       genome = BSgenome.Hsapiens.UCSC.hg38,
                                       rel_height = 1,
                                       ymax = 0.25, ymin=-0.039,
                                       ) + xlab(NULL)
}

## [1] "erythroblast"
## [1] "loading contrib scores from Liver_c2"
## [1] "loading contrib scores from Liver_c8"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "cycling erythroblast"
## [1] "loading contrib scores from Liver_c0"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "early erythroblast"
## [1] "loading contrib scores from Liver_c5"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "Kupffer"
## [1] "loading contrib scores from Liver_c7"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "B cell progenitor"
## [1] "loading contrib scores from Liver_c11"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "cholangiocyte"
## [1] "loading contrib scores from Liver_c13"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "endothelial"
## [1] "loading contrib scores from Liver_c10"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "hepatocyte"
## [1] "loading contrib scores from Liver_c1"
## [1] "loading contrib scores from Liver_c3"
## [1] "loading contrib scores from Liver_c4"
## [1] "loading contrib scores from Liver_c6"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "megakaryocyte"
## [1] "loading contrib scores from Liver_c12"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

## [1] "stellate"
## [1] "loading contrib scores from Liver_c9"

## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.

5.3.4 combine plots

# gene annotation
track_genes <- trackplot_gene(global_bp_obj$transcripts, region) +
  ggplot2::guides(color = "none")

# scale bar for the top
scale_plot <- trackplot_scalebar(region)


track_hits <- trackplot_genome_annotation(loci = hits,
                            region = region,
                            color_by = "score",
                            show_strand = T,
                            colors = c("white", "red"), 
                            label_size = 3,
                            label_by = "name",
                            track_label = "Instances")


plot_list <- list(track_genes + highlight_region(region, alpha = 0.4, color = "yellow"), track_hits)

p <- trackplot_combine(c(list(scale_plot), track_contribs_list, plot_list),
                  title = str_to_pretty(region)) &
  ggplot2::theme(legend.direction = "vertical")
p

# ggsave(plot=p, paste0(figout, "contrib_scores_mm101.pdf"), width=8, height=6)

6 Session info

.libPaths()

## [1] "/oak/stanford/groups/wjg/bliu/software/R_lib"     
## [2] "/share/software/user/open/R/4.1.2/lib64/R/library"

sessionInfo()

## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.3.10/lib/libopenblas_haswellp-r0.3.10.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggthemes_4.2.4                    RColorBrewer_1.1-3               
##  [3] ggrastr_1.0.1                     GenomicFeatures_1.46.5           
##  [5] AnnotationDbi_1.56.2              ggseqlogo_0.1                    
##  [7] BPCells_0.2.0                     SeuratObject_4.1.3               
##  [9] Seurat_4.3.0                      BSgenome.Hsapiens.UCSC.hg38_1.4.4
## [11] BSgenome_1.62.0                   Biostrings_2.62.0                
## [13] XVector_0.34.0                    ComplexHeatmap_2.10.0            
## [15] rhdf5_2.38.1                      SummarizedExperiment_1.24.0      
## [17] Biobase_2.54.0                    MatrixGenerics_1.6.0             
## [19] Rcpp_1.0.10                       Matrix_1.5-4                     
## [21] matrixStats_0.63.0                data.table_1.14.8                
## [23] stringr_1.5.0                     plyr_1.8.8                       
## [25] magrittr_2.0.3                    gtable_0.3.3                     
## [27] gtools_3.9.4                      gridExtra_2.3                    
## [29] ArchR_1.0.2                       rtracklayer_1.54.0               
## [31] GenomicRanges_1.46.1              GenomeInfoDb_1.30.1              
## [33] IRanges_2.28.0                    S4Vectors_0.32.4                 
## [35] BiocGenerics_0.40.0               cowplot_1.1.1                    
## [37] tibble_3.2.1                      readr_2.1.4                      
## [39] glue_1.6.2                        purrr_1.0.2                      
## [41] ggplot2_3.5.0                     tidyr_1.3.1                      
## [43] dplyr_1.1.4                       here_1.0.1                       
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3               spatstat.explore_3.1-0   reticulate_1.28         
##   [4] tidyselect_1.2.0         RSQLite_2.3.1            htmlwidgets_1.6.2       
##   [7] BiocParallel_1.28.3      Rtsne_0.16               munsell_0.5.0           
##  [10] codetools_0.2-18         ica_1.0-3                future_1.33.2           
##  [13] miniUI_0.1.1.1           withr_3.0.0              spatstat.random_3.1-4   
##  [16] colorspace_2.1-0         progressr_0.13.0         filelock_1.0.2          
##  [19] highr_0.10               knitr_1.42               ROCR_1.0-11             
##  [22] ggsignif_0.6.4           tensor_1.5               listenv_0.9.0           
##  [25] labeling_0.4.2           GenomeInfoDbData_1.2.7   polyclip_1.10-4         
##  [28] farver_2.1.1             bit64_4.0.5              rprojroot_2.0.3         
##  [31] parallelly_1.35.0        vctrs_0.6.5              generics_0.1.3          
##  [34] xfun_0.39                BiocFileCache_2.2.1      R6_2.5.1                
##  [37] doParallel_1.0.17        ggbeeswarm_0.7.2         clue_0.3-64             
##  [40] bitops_1.0-7             rhdf5filters_1.6.0       spatstat.utils_3.0-2    
##  [43] cachem_1.0.8             DelayedArray_0.20.0      vroom_1.6.3             
##  [46] promises_1.2.0.1         BiocIO_1.4.0             scales_1.3.0            
##  [49] beeswarm_0.4.0           globals_0.16.2           goftest_1.2-3           
##  [52] rlang_1.1.3              GlobalOptions_0.1.2      splines_4.1.2           
##  [55] rstatix_0.7.2            lazyeval_0.2.2           broom_1.0.5             
##  [58] spatstat.geom_3.1-0      yaml_2.3.7               reshape2_1.4.4          
##  [61] abind_1.4-5              backports_1.4.1          httpuv_1.6.10           
##  [64] tools_4.1.2              ellipsis_0.3.2           jquerylib_0.1.4         
##  [67] ggridges_0.5.4           progress_1.2.2           zlibbioc_1.40.0         
##  [70] RCurl_1.98-1.12          prettyunits_1.1.1        ggpubr_0.6.0            
##  [73] deldir_1.0-6             pbapply_1.7-0            GetoptLong_1.0.5        
##  [76] zoo_1.8-12               ggrepel_0.9.5            cluster_2.1.2           
##  [79] magick_2.7.3             scattermore_1.0          circlize_0.4.15         
##  [82] lmtest_0.9-40            RANN_2.6.1               fitdistrplus_1.1-11     
##  [85] hms_1.1.3                patchwork_1.2.0          mime_0.12               
##  [88] evaluate_0.21            xtable_1.8-4             XML_3.99-0.14           
##  [91] shape_1.4.6              biomaRt_2.50.3           compiler_4.1.2          
##  [94] KernSmooth_2.23-20       crayon_1.5.2             htmltools_0.5.5         
##  [97] later_1.3.1              tzdb_0.4.0               DBI_1.1.3               
## [100] dbplyr_2.3.2             rappdirs_0.3.3           MASS_7.3-54             
## [103] car_3.1-0                cli_3.6.2                parallel_4.1.2          
## [106] igraph_1.4.2             pkgconfig_2.0.3          GenomicAlignments_1.30.0
## [109] sp_1.6-0                 plotly_4.10.1            spatstat.sparse_3.0-1   
## [112] xml2_1.3.4               foreach_1.5.2            vipor_0.4.5             
## [115] bslib_0.4.2              digest_0.6.31            sctransform_0.3.5       
## [118] RcppAnnoy_0.0.20         spatstat.data_3.0-1      rmarkdown_2.21          
## [121] leiden_0.4.2             uwot_0.1.14              curl_5.0.0              
## [124] restfulr_0.0.15          shiny_1.7.4              Rsamtools_2.10.0        
## [127] rjson_0.2.21             lifecycle_1.0.3          nlme_3.1-153            
## [130] jsonlite_1.8.4           Rhdf5lib_1.16.0          carData_3.0-5           
## [133] viridisLite_0.4.2        fansi_1.0.4              pillar_1.9.0            
## [136] lattice_0.20-45          KEGGREST_1.34.0          fastmap_1.1.1           
## [139] httr_1.4.6               survival_3.2-13          png_0.1-8               
## [142] iterators_1.0.14         bit_4.0.5                stringi_1.7.12          
## [145] sass_0.4.6               blob_1.2.4               memoise_2.0.1           
## [148] irlba_2.3.5.1            future.apply_1.10.0

Overlap VISTA enhancer database with HDMA global peakset

07 October, 2025