# **MODIFY THIS CHUNK**
project_id <- "HDMA-public" # determines the name of the cache folder
doc_id <- "05-misc/02" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out <- here::here("output/", doc_id); dir.create(out, recursive = TRUE)
figout <- here::here("figures/", doc_id, "/")
script_path <- here::here("code/utils/")Here we use the BPCells objects to quickly visualize genomic tracks at different scales.
# libraries
library(rtracklayer)
library(GenomicRanges)
library(glue)
library(BPCells)
library(ggseqlogo)
suppressPackageStartupMessages({
library(tidyverse)
library(patchwork)
library(here)
library(BSgenome.Hsapiens.UCSC.hg38)
})
# shared project scripts/functions
scriptPath <- here::here("code/utils")
source(paste0(scriptPath, "/track_helpers_BL.R"))
source(paste0(scriptPath, "/track_helpers_SJ.R"))
source(paste0(scriptPath, "/trackplots.R")) # BPCells trackplot helpers in dev# read global BPCells object
global_bp_obj <- readRDS(paste0(here::here("output/05-misc/01"), "/global_bp_obj.rds"))If the ATAC_merged/ RNA_merged/ RNA_raw_merged/ paths containing BPCells files are moved in the file system, must re-add to BPCells object. This is because BPCells stores raw data on disk and the BPCells objects are linked to folders on disk.
# global_bp_obj$frags <- open_fragments_dir("/your/new/path/to/ATAC_merged/")
# global_bp_obj$rna <- open_matrix_dir("/your/new/path/to/RNA_merged/")
# global_bp_obj$rna_raw <- open_matrix_dir("/your/new/path/to/RNA_raw_merged/")
# saveRDS(global_bp_obj, paste0(out, "/global_bp_obj.rds"))plot a gene, cluster by organ, coverage tracks only this should take less than 1min
p1 <- trackplot_helper_v2c(
gene="ONECUT1",
clusters=global_bp_obj$cell_metadata$organ,
fragments=global_bp_obj$frags,
cell_read_counts=global_bp_obj$cell_metadata$nFrags,
transcripts=global_bp_obj$transcripts,
flank=1e5
)
p1plot a region, cluster by organ, coverage tracks only note that the “flank” argument is not actually used when region is supplied instead of gene, if you wish to extend the window, simply give an extended region to the function
p2 <- trackplot_helper_v2c(
region="chr15:52730000-52820000",
clusters=global_bp_obj$cell_metadata$organ,
fragments=global_bp_obj$frags,
cell_read_counts=global_bp_obj$cell_metadata$nFrags,
transcripts=global_bp_obj$transcripts
)
p2plot a gene, smaller window, cluster by organ, coverage + decontXed expression + peak2gene loops
p3 <- trackplot_helper_v2c(
gene="ONECUT1",
clusters=global_bp_obj$cell_metadata$organ,
fragments=global_bp_obj$frags,
cell_read_counts=global_bp_obj$cell_metadata$nFrags,
transcripts=global_bp_obj$transcripts,
flank=2e4,
expression_matrix=global_bp_obj$rna,
loops=global_bp_obj$loops_p2g$global,
loop_color="Correlation",
loop_label = "Peak2Gene Links (global)"
)
p3plot a region, smaller window, cluster by organ, coverage + raw RNA expression
p4 <- trackplot_helper_v2c(
region="chr15:52730000-52820000",
gene="ONECUT1",
clusters=global_bp_obj$cell_metadata$organ,
fragments=global_bp_obj$frags,
cell_read_counts=global_bp_obj$cell_metadata$nFrags,
transcripts=global_bp_obj$transcripts,
expression_matrix=global_bp_obj$rna_raw
)
p4filter to eye clusters only, cluster by cell type, coverage + decontXed expression + ABC loops linked in eye clusters
submeta <- global_bp_obj$cell_metadata %>% dplyr::filter(organ=="Eye")
p5 <- trackplot_helper_v2c(
region="chr15:52730000-52820000",
gene="ONECUT1",
clusters=submeta$archive_L3_clusterID,
fragments=global_bp_obj$frags %>% select_cells(submeta$cb),
cell_read_counts=submeta$nFrags,
transcripts=global_bp_obj$transcripts,
expression_matrix=global_bp_obj$rna[,submeta$cb],
loops=global_bp_obj$loops_abc$Eye,
loop_color="ABC.Score",
loop_label = "ABC predictions (Eye)"
)
p5filter to horizontal cells (eye) and hepatocytes (liver) only, cluster by cell type, coverage + decontXed expression + ABC loops linked in eye or liver clusters
submeta <- global_bp_obj$cell_metadata %>% dplyr::filter(archive_L2_clusterName %in% c("hepatocyte", "horizontal"))
p6 <- trackplot_helper_v2c(
region="chr15:52730000-52820000",
gene="ONECUT1",
clusters=submeta$archive_L3_clusterID,
fragments=global_bp_obj$frags %>% select_cells(submeta$cb),
cell_read_counts=submeta$nFrags,
transcripts=global_bp_obj$transcripts,
loops=c(global_bp_obj$loops_abc$Eye, global_bp_obj$loops_abc$Liver),
loop_color="ABC.Score",
expression_matrix=global_bp_obj$rna[,submeta$cb],
loop_label = "ABC predictions (Eye&Liver)"
)
p6plot a region, cluster by organ, coverage tracks only, add a highlight region
p7 <- trackplot_helper_v2c(
region="chr15:52730000-52820000",
highlights=gr_to_highlight(GRanges(c("chr15:52731000-52735000","chr15:52790000-52791000"))),
clusters=global_bp_obj$cell_metadata$organ,
fragments=global_bp_obj$frags,
cell_read_counts=global_bp_obj$cell_metadata$nFrags,
transcripts=global_bp_obj$transcripts
)
p7p8 <- trackplot_helper_v2c(
region="chr15:52785000-52790000",
clusters=global_bp_obj$cell_metadata$organ,
fragments=global_bp_obj$frags,
cell_read_counts=global_bp_obj$cell_metadata$nFrags,
transcripts=global_bp_obj$transcripts,
annot=global_bp_obj$peaks$global,
annot_color="score",
annot_labelby="peak_name",
annot_label="Global peaks"
)
p8add motif annotations from eye horizontal cells, plot the horizontal cell cluster only
submeta <- global_bp_obj$cell_metadata %>% dplyr::filter(archive_L2_clusterName %in% c("hepatocyte", "horizontal"))
p9 <- trackplot_helper_v2c(
region="chr15:52789730-52789950",
clusters=submeta$archive_L3_clusterID,
fragments=global_bp_obj$frags %>% select_cells(submeta$cb),
cell_read_counts=submeta$nFrags,
transcripts=global_bp_obj$transcripts,
annot=global_bp_obj$hits$Eye_c11,
annot_color="score",
annot_labelby="name",
annot_strand=T, # careful that if the width of the annotation is too narrow, showing strand will run into an error
annot_label="Eye horizontal cells hits"
)
p9submeta <- global_bp_obj$cell_metadata %>% dplyr::filter(archive_L2_clusterName %in% c("hepatocyte", "horizontal"))
p10 <- trackplot_helper_v2c(
region="chr15:52780500-52780800",
clusters=submeta$archive_L3_clusterID,
fragments=global_bp_obj$frags %>% select_cells(submeta$cb),
cell_read_counts=submeta$nFrags,
transcripts=global_bp_obj$transcripts,
annot=global_bp_obj$hits$Eye_c11,
annot_color="score",
annot_labelby="name",
annot_strand=TRUE, # careful that if the width of the annotation is too narrow, showing strand will run into an error
annot_label="Eye horizontal cells hits"
)
p10
# Example 11: ChromBPNet contribution scores add chrombpnet contribution
score seqlogo annotations, around mm101 enhancer in liver
Contribution score bigwigs are not stored in the global_bp_obj.rds object because they are too big. Access bigwigs on disk.
cluster_meta <- global_bp_obj$cell_metadata
sub_meta <- global_bp_obj$cell_metadata %>% dplyr::filter(organ=="Liver")
sub_meta$archive_L2_clusterName <- factor(sub_meta$archive_L2_clusterName, levels=c( "erythroblast", "cycling erythroblast", "early erythroblast", "Kupffer", "B cell progenitor", "cholangiocyte", "endothelial", "hepatocyte", "megakaryocyte", "stellate"))
region <- "chr16:105080-105200" # zoomed in mm101
track_contribs_list <- list()
for (L2_cluster in levels(sub_meta$archive_L2_clusterName)){
print(L2_cluster)
L1_clusters <- sub_meta[sub_meta$archive_L2_clusterName==L2_cluster, "Cluster_chrombpnet"] %>% unique
bw_contrib_list <- list()
for (clust in L1_clusters){
# load contribution scores
print(paste0("loading contrib scores from ", clust))
bw_contrib_list[[clust]] <- rtracklayer::import.bw(glue(here::here("output/03-chrombpnet/01-models/contribs/bias_Heart_c0_thresh0.4/{clust}/average_shaps.counts.bw")), selection=BigWigSelection(GRanges(region))) %>% as.data.frame
}
tmp <- do.call(rbind, bw_contrib_list)
bw_contrib <- tmp %>% dplyr::group_by(seqnames,start,end, width, strand) %>% dplyr::summarise(score=mean(score)) %>% GRanges
track_contribs_list[[L2_cluster]] <- trackplot_contribs_BL(bw_contrib,
region = region,
track_label = NULL,
facet_label = L2_cluster,
genome = BSgenome.Hsapiens.UCSC.hg38,
rel_height = 1,
ymax = 0.25, ymin=-0.039,
) + xlab(NULL)
}## [1] "erythroblast"
## [1] "loading contrib scores from Liver_c2"
## [1] "loading contrib scores from Liver_c8"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "cycling erythroblast"
## [1] "loading contrib scores from Liver_c0"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "early erythroblast"
## [1] "loading contrib scores from Liver_c5"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "Kupffer"
## [1] "loading contrib scores from Liver_c7"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "B cell progenitor"
## [1] "loading contrib scores from Liver_c11"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "cholangiocyte"
## [1] "loading contrib scores from Liver_c13"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "endothelial"
## [1] "loading contrib scores from Liver_c10"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "hepatocyte"
## [1] "loading contrib scores from Liver_c1"
## [1] "loading contrib scores from Liver_c3"
## [1] "loading contrib scores from Liver_c4"
## [1] "loading contrib scores from Liver_c6"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "megakaryocyte"
## [1] "loading contrib scores from Liver_c12"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
## [1] "stellate"
## [1] "loading contrib scores from Liver_c9"
## `summarise()` has grouped output by 'seqnames', 'start', 'end', 'width'. You
## can override using the `.groups` argument.
## @ plotting basepair-level contribs for width 121
## Scale for x is already present. Adding another scale for x, which will replace
## the existing scale.
region <- "chr16:105080-105200" # zoomed in mm101
track_genes <- trackplot_gene(global_bp_obj$transcripts, region) + ggplot2::guides(color = "none")
scale_plot <- trackplot_scalebar(region)
track_hits <- trackplot_genome_annotation(loci = global_bp_obj$hits[["Liver_c0"]],
region = region,
color_by = "score",
show_strand = T,
colors = c("white", "red"),
label_size = 3,
label_by = "name",
track_label = "Instances")
plot_list <- list(track_genes + highlight_region(region, alpha = 0.4, color = "yellow"), track_hits)
p <- trackplot_combine(c(list(scale_plot), track_contribs_list, plot_list),
title = str_to_pretty(region)) &
ggplot2::theme(legend.direction = "vertical")
p# ggsave(plot=p, paste0(figout, "contrib_scores_mm101.pdf"), width=8, height=6)