# **MODIFY THIS CHUNK**
library(here)
kundaje_dir <- trimws(readr::read_lines(here("code/AK_PROJ_DIR.txt")))
doc_id <- "04c"
out <- here( "output/03-chrombpnet/03-syntax/", doc_id); dir.create(out, recursive = TRUE)
figout <- here( "figures/03-chrombpnet/03-syntax", doc_id, "/"); dir.create(figout, recursive = TRUE)
chrombpnet_dir <- here( "output/03-chrombpnet")Here, we load the results of in silico marginalizations to assess motif cooperativity/synergy and syntax constraints.
Firstly, for each composite motif, we focus on the cluster with the most instances, and evaluate synergy in that cell type. We will assign motif pairs as having synergy or not, and having hard or soft syntax (or both or neither).
Secondly, we run the in silico marginalizations for every composite motif in every cell type, to more generally assess specificity of the results and investigate model predictions on motifs for TFs that are not expressed to be co-expressed.
library(dplyr)
library(tidyr)
library(ggplot2)
library(readr)
library(scales)
library(glue)
library(purrr)
library(stringr)
library(ggrepel)
library(cowplot)
library(pheatmap)
library(ggseqlogo)
library(universalmotif)
library(TFBSTools)
# for interactive stuff
library(plotly)
library(reactable)
library(shiny)
script_path <- here( "code/utils/")
source(file.path(script_path, "plotting_config.R"))
source(file.path(script_path, "hdma_palettes.R"))
source(file.path(script_path, "sj_scRNAseq_helpers.R"))
source(file.path(script_path, "chrombpnet_utils.R"))
ggplot2::theme_set(theme_BOR() + theme(strip.background = element_blank()))# load hits
hits_all <- readRDS(file.path(chrombpnet_dir, "03-syntax/01/hits.Rds")) %>% bind_rows()
# load intermediate objects
load(file.path(chrombpnet_dir, "03-syntax/01/intermediate_obj.Rda"))Load merged modisco reports:
cwm_means <- read_tsv(here("output/03-chrombpnet/02-compendium/modisco_compiled/cwm_sums_df.tsv"))## Rows: 6362 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): cluster, pattern_class, motif, component_pattern
## dbl (4): cwm_sum, abs_cwm_sum, cwm_mean, abs_cwm_mean
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
modisco_merged <- read_tsv(here("output/03-chrombpnet/02-compendium/modisco_compiled_anno/modisco_merged_reports.tsv")) %>%
# get short cluster ID
left_join(cluster_meta %>% dplyr::select(Cluster, Cluster_ChromBPNet),
by = c("component_celltype" = "Cluster_ChromBPNet")) %>%
left_join(cwm_means, by = "component_pattern")## Rows: 6362 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): merged_pattern, component_celltype, pattern_class, pattern, compone...
## dbl (1): n_seqlets
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(modisco_merged)## # A tibble: 6 × 14
## merged_pattern component_celltype pattern_class.x pattern n_seqlets
## <chr> <chr> <chr> <chr> <dbl>
## 1 neg.Average_12__merged_p… Spleen_c5 neg_patterns patter… 88
## 2 neg.Average_12__merged_p… Spleen_c0 neg_patterns patter… 461
## 3 neg.Average_12__merged_p… Skin_c5 neg_patterns patter… 302
## 4 neg.Average_12__merged_p… Heart_c13 neg_patterns patter… 128
## 5 neg.Average_12__merged_p… Skin_c7 neg_patterns patter… 708
## 6 neg.Average_12__merged_p… Heart_c5 neg_patterns patter… 47
## # ℹ 9 more variables: component_pattern <chr>, Cluster <chr>, cluster <chr>,
## # pattern_class.y <chr>, motif <chr>, cwm_sum <dbl>, abs_cwm_sum <dbl>,
## # cwm_mean <dbl>, abs_cwm_mean <dbl>
Path to the full pipeline for clusters of focus:
ism_dir <- file.path(chrombpnet_dir, "03-syntax/04b/in_silico_marginalization_full/")
fs::dir_exists(ism_dir)## /oak/stanford/groups/wjg/skim/projects/HDMA-public/output/03-chrombpnet/03-syntax/04b/in_silico_marginalization_full/
## TRUE
List of composites to test:
compo_to_test <- read_tsv(here("code/03-chrombpnet/03-syntax/04b-composites_to_test.tsv")) %>%
filter(test_spacing == "Y")## Rows: 157 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (15): motif_name, Cluster, category, annotation_broad, component_motifA,...
## dbl (8): n_hits_per_cluster, idx_uniq, variant_idx, total_hits, start_A, en...
## lgl (3): composite_cwm_fwd, componentA_cwm_fwd, componentB_cwm_fwd
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
compo_to_test$motif_name_safe <- gsub("\\/", "\\.", compo_to_test$motif_name)
compo_to_test$done <- ifelse(file.exists(file.path(ism_dir, compo_to_test$motif_name_safe, compo_to_test$Cluster, ".done")),
TRUE,
FALSE)
glue("{sum(compo_to_test$done)/nrow(compo_to_test)*100}%")## 100%
compo_to_test <- compo_to_test %>%
dplyr::rename(seqA = test_seqA, seqB = test_seqB)Check for duplicates:
# sort the two motif sequences alphabetically so we can find duplicates even
# when the order is swapped
pairs <- map_dfr(1:nrow(compo_to_test), function(i) {
motifs <- compo_to_test[i, c("seqA", "seqB")] %>% unlist() %>% sort()
data.frame(motif_name = compo_to_test$motif_name[i],
A = motifs[[1]],
B = motifs[[2]])
})
# should be empty if no duplicates.
pairs %>% group_by(A, B) %>% mutate(n = n()) %>% filter(n > 1)## # A tibble: 0 × 4
## # Groups: A, B [0]
## # ℹ 4 variables: motif_name <chr>, A <chr>, B <chr>, n <int>
Outputs of the in silico marginalization experiments:
result_outs <- pmap_dfr(list(compo_to_test$motif_name_safe, compo_to_test$Cluster, compo_to_test$motif_name),
~ data.table::fread(file.path(ism_dir, ..1, ..2, "results.tsv"), data.table = FALSE) %>%
mutate(motif_name = ..3)) %>%
left_join(compo_to_test %>% dplyr::select(motif_name, category), by = "motif_name") %>%
dplyr::select(-seqA_palindrome, -seqB_palindrome)Z-scores on the marginal joint effects:
z_scores <- pmap_dfr(list(compo_to_test$motif_name_safe, compo_to_test$Cluster, compo_to_test$motif_name),
function(motif_safe, cluster, motif) {
df <- data.table::fread(file.path(ism_dir, motif_safe, cluster, "Z_scores.tsv"), data.table = FALSE) %>%
mutate(motif_name = motif)
df
}) %>%
left_join(compo_to_test %>% dplyr::select(motif_name, category), by = "motif_name")Summarized effects for solo motifs:
solo_df <- pmap_dfr(list(compo_to_test$motif_name_safe, compo_to_test$Cluster, compo_to_test$motif_name),
~ data.table::fread(file.path(ism_dir, ..1, ..2, "summarized_effects_solo.tsv"),
data.table = FALSE) %>%
mutate(motif_name = ..3)) %>%
left_join(compo_to_test %>% dplyr::select(motif_name, category), by = "motif_name") %>%
dplyr::rename(orientation = key)Summarized effects for spaced motifs:
spacing_df <- pmap_dfr(list(compo_to_test$motif_name_safe, compo_to_test$Cluster, compo_to_test$motif_name),
~ data.table::fread(file.path(ism_dir, ..1, ..2, "summarized_effects_per_spacing.tsv"), data.table = FALSE) %>%
mutate(motif_name = ..3)) %>%
left_join(compo_to_test %>% dplyr::select(motif_name, category, seqA, seqB), by = "motif_name") %>%
dplyr::rename(orientation = key) %>%
rowwise() %>%
mutate(dist_center_to_center = round(nchar(seqA)/2) + spacing + round(nchar(seqB)/2))All experiments in all cell types:
ism_dir_all <- file.path(chrombpnet_dir, "03-syntax/04b/in_silico_marginalization")
fs::dir_exists(ism_dir_all)## /oak/stanford/groups/wjg/skim/projects/HDMA-public/output/03-chrombpnet/03-syntax/04b/in_silico_marginalization
## TRUE
List of composites to test:
# get all tests performed (all motif pairs x all clusters)
all_tests <- expand_grid(
motif_name_safe = compo_to_test$motif_name_safe,
cluster = chrombpnet_models_keep$Cluster
)
all_tests$done <- ifelse(fs::file_exists(file.path(ism_dir_all, all_tests$motif_name_safe, all_tests$cluster, ".done")),
TRUE,
FALSE)
sum(all_tests$done)## [1] 26082
sum(all_tests$done)/nrow(all_tests)## [1] 1
nrow(all_tests)-sum(all_tests$done)## [1] 0
all_tests2 <- all_tests %>% filter(done) %>%
left_join(compo_to_test %>% dplyr::select(-Cluster, -done), by = "motif_name_safe")Outputs of the in silico marginalization experiments:
result_outs_allexp <- map2_dfr(all_tests2$motif_name_safe, all_tests2$cluster,
~ data.table::fread(file.path(ism_dir_all, .x, .y, "results.tsv"), data.table = FALSE)
%>% mutate(cluster = .y)) %>%
left_join(compo_to_test %>% dplyr::select(motif_name, category), by = "motif_name") %>%
dplyr::select(-seqA_palindrome, -seqB_palindrome)
nrow(result_outs_allexp)## [1] 26082
Outputs of the solo tests:
solo_df_allexp <- pmap_dfr(list(all_tests2$motif_name_safe, all_tests2$cluster, all_tests2$motif_name),
~ data.table::fread(file.path(ism_dir_all, ..1, ..2, "summarized_effects_solo.tsv"),
data.table = FALSE) %>%
mutate(motif_name = ..3, cluster = ..2))
head(solo_df_allexp)## key counts_after_mean counts_after_sd effect_mean effect_sd motif_name
## 1 A 2.405999 0.1453060 0.021411831 0.010908893 1|AP-2_BHLH
## 2 B 2.379369 0.1503053 -0.005217277 0.010656357 1|AP-2_BHLH
## 3 A + B 4.785368 0.2955811 0.016194545 0.020554082 1|AP-2_BHLH
## 4 A 2.436762 0.1218845 -0.001816668 0.001396795 1|AP-2_BHLH
## 5 B 2.429812 0.1221980 -0.008766225 0.003088963 1|AP-2_BHLH
## 6 A + B 4.866573 0.2440730 -0.010582877 0.004265720 1|AP-2_BHLH
## cluster
## 1 Adrenal_c0
## 2 Adrenal_c0
## 3 Adrenal_c0
## 4 Adrenal_c1
## 5 Adrenal_c1
## 6 Adrenal_c1
Save intermediates:
save(spacing_df, solo_df, z_scores, result_outs, compo_to_test, result_outs_allexp, solo_df_allexp,
file = glue("{out}/intermediate_obj.Rda"))blues <- RColorBrewer::brewer.pal(6, "Blues")
purples <- RColorBrewer::brewer.pal(6, "Purples")
palette_orientation_hetero <- c('A,B HT' = blues[3],
'B,A HT' = blues[4],
'HH' = blues[5],
'TT' = blues[6])
palette_orientation_homo <- c('HT' = purples[2],
'HH' = purples[4],
'TT' = purples[6])
palette_coop <- c("homo - not cooperative" = "gray70",
"hetero - not cooperative" = "gray70",
"homo - cooperative" = "#7696f5",
"hetero - cooperative" = "#ab7ec2",
"homo - specific" = "royalblue3",
"hetero - specific" = "darkorchid4")
shapemap_result <- c("no synergy" = 1,
"soft" = 24,
"hard & soft" = 23,
"hard" = 22)
cmap_category["not cooperative"] <- "gray80"
palette_motifs <- colorRampPalette(RColorBrewer::brewer.pal(9, "Blues")[3:9])(n=nrow(compo_to_test))We want to combine results into one dataframe:
# join results
all_results_best <- compo_to_test %>%
dplyr::select(motif_name, Cluster, n_hits_per_cluster, idx_uniq, annotation_broad, component_motifA, component_motifB, seqA, seqB) %>%
left_join(result_outs, by = "motif_name") %>%
left_join(z_scores %>% dplyr::select(-category),
by = c("motif_name", "best_orientation" = "orientation", "best_spacing" = "spacing")) %>%
left_join(spacing_df %>%
dplyr::select(-category, -effect_mean, -effect_sd, -seqA, -seqB) %>%
dplyr::rename(best_counts_after_mean = counts_after_mean,
best_counts_after_sd = counts_after_sd),
by = c("motif_name", "best_orientation" = "orientation", "best_spacing" = "spacing")) %>%
left_join(solo_df %>%
dplyr::filter(orientation == "A + B") %>%
dplyr::select(motif_name,
sum_counts_after_mean = counts_after_mean,
sum_counts_after_sd = counts_after_sd,
sum_effect = effect_mean)) %>%
# the maximum difference between joint vs independent effects across all arrangements
mutate(joint_vs_ind_max = joint_effect - sum_effect)## Joining with `by = join_by(motif_name)`
# adjust p-value
all_results_best$joint_vs_sum_p_value_adj <- p.adjust(all_results_best$joint_vs_sum_p_value, method = "BH")We want to calculate the difference in joint/independent effects for each arrangement;
all_results_spacing <- spacing_df %>%
dplyr::rename(joint_effect = effect_mean) %>%
left_join(solo_df %>% filter(orientation == "A + B") %>%
dplyr::select(-orientation, -counts_after_mean, -counts_after_sd, -effect_sd, sum_effect = effect_mean),
by = c("motif_name", "category")) %>%
mutate(joint_vs_ind_per_arrangement = joint_effect - sum_effect)Similarly, for the set of tests of all motifs in all cell types, we compute difference between joint and independent effects:
# adjust p-value
result_outs_allexp$joint_vs_sum_p_value_adj <- p.adjust(result_outs_allexp$joint_vs_sum_p_value, method = "BH")
all_results_allexp <- result_outs_allexp %>%
left_join(solo_df_allexp %>% filter(key == "A + B") %>%
mutate(sum_effect = effect_mean) %>%
dplyr::select(-key, -counts_after_mean, -counts_after_sd, -effect_sd, -effect_mean),
by = c("motif_name", "cluster")) %>%
mutate(joint_vs_ind_max = joint_effect - sum_effect) %>%
mutate(log10padj = -log10(joint_vs_sum_p_value_adj))Let’s annotate the best orientation for each motif:
all_results_spacing$is_best_orientation <- FALSE
get_best_orientation <- function(motif) {
all_results_best %>% filter(motif_name == motif) %>% pull(best_orientation)
}
motifs <- unique(all_results_spacing$motif_name)
print(motifs)## [1] "1|AP-2_BHLH"
## [2] "10|BHLH:ATOH/NEUROD_HD:DLX/LHX#1"
## [3] "104|BZIP:ATF/CREB_ETS"
## [4] "105|BZIP:ATF/CREB_ETS:ELF/ETV"
## [5] "106|BZIP:ATF/CREB_MEF2"
## [6] "110|BZIP:ATF/CREB_SP/KLF#2"
## [7] "112|BZIP:ATF/CREB_TEAD#2"
## [8] "12|BHLH:ATOH/NEUROD_MEF2"
## [9] "121|BZIP:CEBP_FOX#2"
## [10] "123|BZIP:CEBP_NFI"
## [11] "124|BZIP:CEBP_NR2/RXR"
## [12] "125|BZIP:CEBP_TEAD"
## [13] "128|BZIP:DBP/HLF_FOX"
## [14] "130|BZIP:DBP/HLF_NKX"
## [15] "139|BZIP:FOSL/JUND_ETS"
## [16] "14|BHLH:MXI1_BHLH:TWIST/ZBTB"
## [17] "140|BZIP:FOSL/JUND_FOX"
## [18] "141|BZIP:FOSL/JUND_IKZF1"
## [19] "142|BZIP:FOSL/JUND_NR:ESRRA/NR5A#1"
## [20] "144|BZIP:FOSL/JUND_TEAD"
## [21] "146|BZIP:MAFG/MAFF_BZIP:MAFG/MAFF"
## [22] "147|BZIP:MAFG/MAFF_TEAD"
## [23] "15|BHLH:NHLH/TFAP_NFY"
## [24] "152|CTCF_TBP"
## [25] "156|EBF_ETS:ELF/ETV"
## [26] "16|BHLH:OLIG/NEUROG_MEF2"
## [27] "162|ETS:ELF/ETV_ETS:ELF/ETV#1"
## [28] "163|ETS:ELF/ETV_FOX"
## [29] "164|ETS:ELF/ETV_NFY"
## [30] "165|ETS:ELF/ETV_SP/KLF#1"
## [31] "17|BHLH:OLIG/NEUROG_SOX"
## [32] "177|ETS:ELF/SPIB_ETS:ELF/SPIB#2"
## [33] "185|ETS:ELF/SPIB_FOX"
## [34] "187|ETS:ELF/SPIB_GATA#2"
## [35] "191|ETS:ELF/SPIB_NFI#4"
## [36] "193|ETS:ELF/SPIB_NFY"
## [37] "199|ETS:ELF/SPIB_NR:ESRRA/NR5A#5"
## [38] "201|ETS:ELF/SPIB_RUNX#1"
## [39] "208|ETS:ELF/SPIB_SOX#6"
## [40] "209|ETS:ELF/SPIB_SP/KLF"
## [41] "215|ETS_ETS#1"
## [42] "217|ETS_FOX#1"
## [43] "219|ETS_IKZF1"
## [44] "221|ETS_NFAT#2"
## [45] "224|ETS_NR:ESRRA/NR5A#2"
## [46] "225|ETS_SOX"
## [47] "226|ETS_SP/KLF#1"
## [48] "240|FOX_FOX#3"
## [49] "242|FOX_GATA#2"
## [50] "245|FOX_NFI"
## [51] "25|BHLH:TFAP/MYOD_BHLH:TFAP/MYOD#2"
## [52] "253|FOX_NKX#7"
## [53] "255|FOX_NR:HNF4A/HNF4G"
## [54] "256|FOX_NR:NR2F/RXRG"
## [55] "257|FOX_RFX"
## [56] "258|FOX_SOX"
## [57] "263|GATA1_TAL1"
## [58] "266|GATA_GATA#3"
## [59] "271|GATA_HD:MEIS/TGIF/TBX#1"
## [60] "273|GATA_MEF2#1"
## [61] "278|GATA_NFI"
## [62] "280|GATA_NR:ESRRA/NR5A#2"
## [63] "281|GATA_NR:NR4A/NR2C"
## [64] "283|GATA_SP/KLF#2"
## [65] "285|GATA_TEAD"
## [66] "288|GRHL_GRHL#1"
## [67] "290|GRHL_TEAD#1"
## [68] "30|BHLH:TFAP/MYOD_BHLH:TWIST/ZBTB"
## [69] "301|HD:DLX/LHX_HD:MEIS/TGIF/TBX"
## [70] "303|HD:DLX/LHX_POU"
## [71] "307|HD:MEIS/TGIF/TBX_HD:MEIS/TGIF/TBX#1"
## [72] "309|HD:MEIS/TGIF/TBX_HD:PAX/VSX"
## [73] "310|HD:MEIS/TGIF/TBX_HD:PITX/OTX"
## [74] "311|HD:MEIS/TGIF/TBX_MEF2"
## [75] "32|BHLH:TFAP/MYOD_BZIP:FOSL/JUND#2"
## [76] "321|HD:PITX/OTX_HD:PITX/OTX#4"
## [77] "322|HD:PITX/OTX_HD:SIX/ZNF"
## [78] "323|HD:PITX/OTX_SP/KLF"
## [79] "326|HD_HD#1"
## [80] "33|BHLH:TFAP/MYOD_ETS"
## [81] "330|HD_NFI#1"
## [82] "338|IKZF1_NR"
## [83] "339|IKZF1_RUNX"
## [84] "34|BHLH:TFAP/MYOD_ETS:ELF/SPIB"
## [85] "340|IRF/STAT_IRF/STAT#1"
## [86] "345|MEF2_NFI"
## [87] "349|MEF2_TBP"
## [88] "35|BHLH:TFAP/MYOD_FOX#1"
## [89] "351|NFAT_NFAT#1"
## [90] "354|NFAT_SOX#1"
## [91] "357|NFI_NR2E1"
## [92] "368|NFY_NFY#1"
## [93] "371|NFY_SP/KLF#2"
## [94] "38|BHLH:TFAP/MYOD_GATA"
## [95] "382|NKX_NKX#5"
## [96] "389|NKX_TEAD#2"
## [97] "39|BHLH:TFAP/MYOD_HD:PBX/PKNOX"
## [98] "393|NR:ESRRA/NR5A_SOX"
## [99] "395|NR:HNF4A/HNF4G_NR:HNF4A/HNF4G"
## [100] "40|BHLH:TFAP/MYOD_ROR"
## [101] "402|NR_NR#2"
## [102] "409|PAX_SOX"
## [103] "41|BHLH:TFAP/MYOD_SP/KLF"
## [104] "413|POU4_POU4"
## [105] "414|POU_POU#1"
## [106] "416|POU_RUNX"
## [107] "422|RFX_RUNX"
## [108] "423|RFX_SOX"
## [109] "424|RFX_SP/KLF"
## [110] "428|RUNX_RUNX#1"
## [111] "430|RUNX_TCF7L/LEF"
## [112] "434|SOX_SOX#1"
## [113] "439|SP/KLF_SP/KLF#3"
## [114] "446|SRF_TEAD"
## [115] "452|TEAD_TEAD"
## [116] "46|BHLH:TWIST/ZBTB_BHLH:TWIST/ZBTB"
## [117] "467|p53_p53#1"
## [118] "47|BHLH:TWIST/ZBTB_HD"
## [119] "48|BHLH:TWIST/ZBTB_HD:PAX/VSX"
## [120] "5|BHLH:ASCL/TCF_ETS:ELF/SPIB"
## [121] "55|BHLH:USF/BHLHE_ETS:ELF/SPIB"
## [122] "56|BHLH:USF/BHLHE_NFY"
## [123] "59|BHLH_BHLH:ATOH/NEUROD"
## [124] "6|BHLH:ASCL/TCF_TCF7L/LEF"
## [125] "60|BHLH_BHLH:MXI1"
## [126] "63|BHLH_BHLH:TWIST/ZBTB#2"
## [127] "65|BHLH_BZIP:ATF/CREB#1"
## [128] "69|BHLH_EBF"
## [129] "71|BHLH_HD#2"
## [130] "72|BHLH_HD:MEIS/TGIF/TBX#1"
## [131] "74|BHLH_HD:PBX1/PDX1"
## [132] "75|BHLH_HD:PITX/OTX#1"
## [133] "79|BHLH_IKZF1"
## [134] "80|BHLH_MEF2#1"
## [135] "84|BHLH_NFI"
## [136] "9|BHLH:ATOH/NEUROD_BHLH:ATOH/NEUROD"
## [137] "91|BHLH_RUNX#3"
## [138] "97|BHLH_p53"
for (i in seq_along(motifs)) {
best_ori <- get_best_orientation(motifs[i])
all_results_spacing[all_results_spacing$motif_name == motifs[i] & all_results_spacing$orientation == best_ori, ]$is_best_orientation <- TRUE
}Let’s compute the max difference in joint vs ind. effects at the best orientation, in the 20-150bp range:
tmp <- all_results_spacing %>%
filter(is_best_orientation) %>%
filter(spacing %in% 20:150) %>%
group_by(motif_name) %>%
slice_max(n = 1, order_by = joint_effect) %>%
mutate(joint_vs_ind_med_distance = joint_effect - sum_effect) %>%
dplyr::select(motif_name, joint_vs_ind_med_distance) %>%
ungroup()
all_results_best <- all_results_best %>% left_join(tmp, by = "motif_name")What’s the distribution of counts?
# first, get the mean
motif_order_mean <- spacing_df %>%
group_by(motif_name) %>%
summarize(mean_counts = mean(counts_after_mean)) %>%
arrange(desc(mean_counts)) %>%
pull(motif_name)
spacing_df %>%
mutate(motif_name = factor(motif_name, levels = motif_order_mean)) %>%
mutate(upper = counts_after_mean + counts_after_sd,
lower = counts_after_mean - counts_after_sd) %>%
ggplot(aes(x = motif_name, y = counts_after_mean)) +
geom_point(aes(fill = category), shape = 21, color = "white", size = 2, alpha = 0.5) +
# geom_errorbar(aes(color = category, ymin = lower, ymax = upper), width = 0.1) +
scale_fill_manual(values = cmap_category) +
# scale_color_manual(values = cmap_category) +
facet_grid(. ~ category, space = "free_x", scales = "free_x") +
theme(strip.background = element_blank(),
axis.text.x = element_text(size = 9)) +
rotate_x() +
ylab("Mean predicted log-counts for AB") +
ggtitle("Distribution of predicted counts for all spacings/orientations of each comp. motif") +
no_legend()We also manually verified that DeepSHAPs on the ISM confirm that the motifs we inserted are indeed the ones driving the predicted accessibility. We thus remove them from downstream analysis.
dim(all_results_best)## [1] 138 30
motifs_no_contrib_support <- as.numeric(c("33", "40", "56", "72", "97", "104", "105", "152",
"164", "187", "191", "217", "281", "310", "338",
"349", "357", "413"))p_val_thresh <- 0.001
z_thresh <- 4
delta_thresh <- 0.15
# decide if significant
all_results_best <- all_results_best %>%
# we exclude cases where the contributions at the optimal arrangement didn't
# reflect the motifs we inserted, since it tells us the models are not performing
# the experiment we intended/not responding to those sequences
filter(!(idx_uniq %in% motifs_no_contrib_support)) %>%
mutate(result = case_when(
joint_vs_sum_p_value_adj < p_val_thresh & joint_vs_ind_max > delta_thresh & joint_vs_ind_med_distance > delta_thresh & max_Z > z_thresh ~ "hard & soft",
joint_vs_sum_p_value_adj < p_val_thresh & joint_vs_ind_max > delta_thresh & max_Z > z_thresh ~ "hard",
joint_vs_sum_p_value_adj < p_val_thresh & joint_vs_ind_med_distance > delta_thresh ~ "soft",
TRUE ~ "no synergy"))
all_results_spacing <- all_results_spacing %>%
left_join(all_results_best %>% dplyr::select(motif_name, result), by = "motif_name") %>%
# remove filtered-out motifs
filter(!is.na(result)) %>%
mutate(orientation = ifelse(orientation == "HT", "A,B HT", orientation),
result = factor(result, levels = rev(names(shapemap_result))))
z_scores <- z_scores %>% mutate(orientation = ifelse(orientation == "HT", "A,B HT", orientation))Similarly for the analysis of all motifs x all cell types, we simply assign each result as synergistic or not:
all_results_allexp <- all_results_allexp %>%
mutate(result = case_when(
joint_vs_sum_p_value_adj < p_val_thresh & joint_vs_ind_max > delta_thresh ~ "synergy" ,
TRUE ~ "no synergy"))
table(all_results_allexp$result)##
## no synergy synergy
## 23765 2317
First, let’s basically score the similarity of each motif to its reverse complement. Importantly, since we are operating with specific sequences for each motif, we’ll quantify similarity on that sequence rather than the motif it originates from. We’ll look at the distribution to understand what are palindromes, quasi-palindromes, and non-palindromes.
seqs_tested <- unique(c(all_results_best$seqA, all_results_best$seqB))
length(seqs_tested)## [1] 80
# convert these to motifs -- basically the PWMs will be the one-hot encoded versions
seqs_tested_ppm <- map(seqs_tested, ~ universalmotif::create_motif(.x, name = .x))
names(seqs_tested_ppm) <- seqs_tested
# reverse complement
# revcomp returns just the RC'd motif, so we need create_motif to recreate a motif class
# in-place modification to set the name as the reverse complemented sequence
# which is just the consensus sequence from the reverse complemented PPM of the original motif
seqs_tested_ppm_rc <- map(seqs_tested_ppm, ~ universalmotif::create_motif(input = revcomp(.x@motif)) %>%
set_attr("name", .@consensus))
# iterate over the two lists and calculate the similarity of a sequence to its
# reverse complement
self_sim <- map2_dfr(seqs_tested_ppm, seqs_tested_ppm_rc, function(fw, rev) {
data.frame("motif_fw" = fw@name,
"motif_rev" = rev@name,
"similarity" = universalmotif::compare_motifs(motifs = list(fw, rev), tryRC = FALSE, use.type = "PPM") %>% .[1, 2])
})Note that even some motifs which have self-similarity == 1 are not perfectly palindromic, as they may differ by one nucleotide on one side, e.g. CTAATTA and its reverse complement TAATTAG.
p_sim <- self_sim %>%
arrange(similarity) %>%
mutate(motifs = paste0(motif_fw, ", ", motif_rev)) %>%
mutate(motifs = factor(motifs, levels = unique(.$motifs))) %>%
ggplot(aes(x = motifs, y = similarity, label = motifs)) +
geom_hline(yintercept = 0.7, color = "black") +
geom_point(color = "red") +
# scale_color_manual(values = cmap_category) +
coord_flip()
ggplotly(p_sim)Get categories which are palindromic:
anno_pal <- self_sim %>%
filter(motif_fw == motif_rev) %>%
dplyr::pull(motif_fw) %>%
unique()
anno_quasipal <- self_sim %>%
filter(similarity >= 0.7) %>%
dplyr::pull(motif_fw) %>%
unique()
anno_quasipal <- base::setdiff(anno_quasipal, anno_pal)
# sanity check, should be 0
base::intersect(anno_pal, anno_quasipal)## character(0)
all_results_best <- all_results_best %>%
mutate(
seqA_palindrome = case_when(
seqA %in% anno_pal ~ "P",
seqA %in% anno_quasipal ~ "Q",
grepl("BHLH|BZIP|NFI", component_motifA) ~ "Q",
TRUE ~ "-"),
seqB_palindrome = case_when(
seqB %in% anno_pal ~ "P",
seqB %in% anno_quasipal ~ "Q",
grepl("BHLH|BZIP|NFI", component_motifB) ~ "Q",
TRUE ~ "-")) %>%
# a heuristic way to sort motifs
mutate(palindromicity = case_when(
seqA_palindrome == "P" & seqB_palindrome == "P" ~ 5,
seqA_palindrome != "-" & seqB_palindrome != "-" ~ 4,
seqA_palindrome == "P" | seqB_palindrome == "P" ~ 3,
seqA_palindrome != "-" & seqB_palindrome == "-" ~ 2.5,
seqB_palindrome != "-" & seqA_palindrome == "-" ~ 2,
seqA_palindrome == "-" & seqB_palindrome == "-" ~ 1
)) %>%
mutate(result = factor(result, levels = c("hard", "hard & soft", "soft", "no synergy")))
all_results_best %>% write_tsv(glue("{out}/composite_motif_ISM_results.tsv"))Tally up the results:
dim(all_results_best)## [1] 120 34
table(all_results_best$result)##
## hard hard & soft soft no synergy
## 39 9 19 53
# proportions
table(all_results_best$result)/sum(table(all_results_best$result))##
## hard hard & soft soft no synergy
## 0.3250000 0.0750000 0.1583333 0.4416667
DT::datatable(all_results_best)all_results_best %>%
mutate(result = factor(result, levels = c("hard", "hard & soft", "soft", "no synergy"))) %>%
ggplot(aes(x = result)) + geom_bar(fill = "black") + rotate_x() +
scale_y_continuous(breaks = seq(5, 55, 5))all_results_best %>%
dplyr::select(motif_name, idx_uniq, category, result, Cluster_ChromBPNet = Cluster,
annotation_broad, component_motifA, component_motifB, seqA, seqB,
best_orientation,
best_distance_between_motifs = best_spacing,
best_dist_centers = dist_center_to_center, best_seq,
joint_effect_log_counts = joint_effect,
joint_vs_sum_p_value,
joint_vs_sum_p_value_adj,
Z_scored_joint_effect = Z,
independent_effect = sum_effect,
seqA_palindrome,
seqB_palindrome) %>%
mutate(result = factor(result, levels = c("hard", "hard & soft", "soft", "no synergy"))) %>%
arrange(result) %>%
write_csv(glue("{out}/TABLE_ism_synergy_results.csv"))Save results:
save(all_results_best, all_results_spacing, all_results_allexp, file = glue("{out}/all_results.Rda"))What’s the distribution of spacing we see for specific interactions?
df1 <- all_results_best %>%
filter(result %in% c("hard", "hard & soft")) %>%
group_by(result, best_spacing, category) %>%
count() %>%
dplyr::rename(distance = best_spacing) %>%
mutate(type = "dist_spacing")
df2 <- all_results_best %>%
filter(result %in% c("hard", "hard & soft")) %>%
group_by(result, dist_centers, category) %>%
count() %>%
mutate(n = -n) %>%
dplyr::rename(distance = dist_centers) %>%
mutate(type = "dist_centers")
bind_rows(df1, df2) %>%
ggplot(aes(x = distance, y = n)) +
geom_hline(yintercept = 0) +
geom_col(aes(fill = type)) +
scale_fill_manual(values = c("dist_spacing" = "black", "dist_centers" = "darkorchid4")) +
xlab("distance (bp)") + ylab("Number of motif pairs") +
scale_x_continuous(breaks = seq(0, 19, by = 1), limits = c(-1, 18)) +
scale_y_continuous(breaks = seq(-5, 10)) +
no_legend() +
theme(axis.text.x = element_text(size = 9))First, we can ask among all 508 motifs, where the do the motifs w/ syntax effects rank. However, motifs which are cell type specific may have their signal diluted here by motifs which are ubiquitous and frequent.
We will focus on positive patterns for this analysis.
cmap_synergy <- c("hard" = "red3",
"hard & soft" = "red3",
"soft" = "salmon",
"no synergy" = "gray40",
"not tested" = "gray90")
motifs_w_results <- all_results_best %>%
mutate(result = as.character(result)) %>%
dplyr::select(motif_name, result) %>%
# filter(result %in% c("hard", "hard & soft", "soft")) %>%
right_join(motifs_compiled_unique) %>%
mutate(result = ifelse(is.na(result), "not tested", result)) %>%
arrange(total_hits) %>%
filter(pattern_class == "pos_patterns") %>%
dplyr::select(motif_name, result, total_hits, category) %>%
mutate(rank = dense_rank(-total_hits))## Joining with `by = join_by(motif_name)`
table(motifs_w_results$result)##
## hard hard & soft no synergy not tested soft
## 39 9 53 373 19
print(dim(motifs_w_results))## [1] 493 5
hits_motifs_w_hard_syntax <- motifs_w_results %>% filter(result %in% c("hard", "hard & soft")) %>%
pull(total_hits)
hits_motifs_w_soft_syntax <- motifs_w_results %>% filter(result %in% c("soft")) %>%
pull(total_hits)
hits_motifs_w_no_syntax <- motifs_w_results %>% filter(result %in% c("no synergy", "not tested")) %>% pull(total_hits)
length(hits_motifs_w_hard_syntax)## [1] 48
length(hits_motifs_w_soft_syntax)## [1] 19
length(hits_motifs_w_no_syntax)## [1] 426
(ks_out <- ks.test(hits_motifs_w_hard_syntax, hits_motifs_w_no_syntax))##
## Two-sample Kolmogorov-Smirnov test
##
## data: hits_motifs_w_hard_syntax and hits_motifs_w_no_syntax
## D = 0.091843, p-value = 0.86
## alternative hypothesis: two-sided
(ks_out2 <- ks.test(hits_motifs_w_soft_syntax, hits_motifs_w_no_syntax))##
## Two-sample Kolmogorov-Smirnov test
##
## data: hits_motifs_w_soft_syntax and hits_motifs_w_no_syntax
## D = 0.20064, p-value = 0.4567
## alternative hypothesis: two-sided
motifs_w_results %>%
mutate(result = factor(result, levels = names(cmap_synergy))) %>%
ggplot(aes(x = rank, y = log10(total_hits))) +
geom_hline(yintercept = 0) +
geom_point(aes(color = result), alpha = 0.7, size = 3) +
geom_segment(data = motifs_w_results %>% filter(!(result %in% c("not tested", "no synergy"))),
mapping = aes(x = rank, y = 0,
xend = rank, yend = -1/3,
color = result),
size = 1) +
# geom_line(aes(group = result)) +
scale_color_manual(values = cmap_synergy) +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
annotate("text",
label = glue("KS test p-value for motifs \nw/ hard syntax vs others: \np={round(ks_out$p.value, 3)}"),
x = 250, y = 6, size = 4) +
xlab("motif rank")This means at the global level, motifs w/ hard/soft syntax don’t seem more prevalent based on number of hits.
Which cell types have at least one motif w/ hard/soft syntax?
# this includes all 189 clusters
length(unique(modisco_merged$Cluster))## [1] 189
# motifs with hard & soft syntax are being counted as hard here:
orig_motifs_joined_w_syntax <- motifs_compiled_unique %>%
filter(pattern_class == "pos_patterns") %>%
dplyr::select(motif_name, pattern) %>%
left_join(all_results_best %>% dplyr::select(motif_name, result), by = "motif_name") %>%
mutate(result = case_when(
is.na(result) ~ "not tested",
result == "hard & soft" ~ "hard",
TRUE ~ result)) %>%
left_join(modisco_merged %>% dplyr::select(merged_pattern, Cluster_ChromBPNet = component_celltype, cwm_mean, cwm_sum), by = c("pattern" = "merged_pattern")) %>%
left_join(cluster_meta %>% dplyr::select(Cluster_ChromBPNet, organ))## Joining with `by = join_by(Cluster_ChromBPNet)`
counts_motifs_per_type_per_celltype <- orig_motifs_joined_w_syntax %>%
filter(result %in% c("hard", "soft")) %>%
group_by(organ, Cluster_ChromBPNet, result) %>%
count()
head(counts_motifs_per_type_per_celltype)## # A tibble: 6 × 4
## # Groups: organ, Cluster_ChromBPNet, result [6]
## organ Cluster_ChromBPNet result n
## <chr> <chr> <chr> <int>
## 1 Adrenal Adrenal_c0 soft 2
## 2 Adrenal Adrenal_c1 soft 1
## 3 Adrenal Adrenal_c2 soft 2
## 4 Adrenal Adrenal_c3 soft 1
## 5 Brain Brain_c0 soft 1
## 6 Brain Brain_c1 hard 1
table(counts_motifs_per_type_per_celltype$result)##
## hard soft
## 115 113
(props <- table(counts_motifs_per_type_per_celltype$result)/189)##
## hard soft
## 0.6084656 0.5978836
length(unique(counts_motifs_per_type_per_celltype$Cluster_ChromBPNet))/189## [1] 0.8148148
counts_motifs_per_type_per_celltype %>%
mutate(n = ifelse(result == "soft", -n, n)) %>%
ggplot(aes(x = Cluster_ChromBPNet, y = n)) +
geom_bar(aes(fill = organ), stat = "identity") +
geom_hline(yintercept = 0) +
scale_fill_manual(values = cmap_organ) +
xlab("cluster") +
rotate_x() +
theme(axis.text.x = element_text(size = 11)) +
scale_y_continuous(breaks = seq(-4, 10, by = 2)) +
ylab("<--soft--- number of motifs w/ significant effect ----hard--->") +
ggtitle(glue("prop. of cell types w/ at least one hard syntax motif: {round(props['hard'], 2)}\nprop. of cell types w/ at least one soft syntax motif: {round(props['soft'], 2)}"))What about prevalence (in terms of number of instances) within cell types?
# these are the motifs from all original clusters, that did not get clustered
# into patterns that were exluded due to QC
orig_motifs_joined_w_syntax %>% dim## [1] 5334 7
orig_motifs_joined_w_syntax2 <- orig_motifs_joined_w_syntax %>%
left_join(hits_all_anno %>% dplyr::select(motif_name, Cluster_ChromBPNet = Cluster, n_hits = n), by = c("motif_name", "Cluster_ChromBPNet")) %>%
mutate(result = factor(result, levels = rev(names(cmap_synergy)))) %>%
filter(!is.na(n_hits))
# orig_motifs_joined_w_syntax2 %>%
# ggplot(aes(x = Cluster_ChromBPNet, y = log10(n_hits))) +
# geom_point(aes(color = result), alpha = 0.7) +
# scale_color_manual(values = cmap_synergy) +
# rotate_x() +
# xlab("cluster") + ylab("log10(number of hits)") +
# ggtitle("number of hits per motif per cluster \n(1 point = 1 motif in 1 cluster)")Representing these as normalized ranks:
motifs_ranked <- orig_motifs_joined_w_syntax2 %>%
group_by(Cluster_ChromBPNet) %>%
mutate(rank = dense_rank(n_hits)) %>%
mutate(norm_rank = rank/max(rank))
any(is.na(motifs_ranked$norm_rank))## [1] FALSE
motifs_ranked %>%
ggplot(aes(x = Cluster_ChromBPNet, y = norm_rank)) +
geom_point(aes(color = result), alpha = 0.7) +
scale_color_manual(values = cmap_synergy) +
rotate_x() +
xlab("cluster") + ylab("normalized rank within the cluster (rank / max_rank)") +
ggtitle("normalized rank by number of hits per motif per cluster \n(1 point = 1 motif in 1 cluster)")Now we can do a similar analysis ranking motifs by CWM mean:
# these are the motifs from all original clusters, that did not get clustered
# into patterns that were exluded due to QC
orig_motifs_joined_w_syntax %>% dim## [1] 5334 7
# orig_motifs_joined_w_syntax2 %>%
# ggplot(aes(x = Cluster_ChromBPNet, y = cwm_mean)) +
# geom_point(aes(color = result), alpha = 0.7) +
# scale_color_manual(values = cmap_synergy) +
# rotate_x() +
# xlab("cluster") + ylab("mean CWM importance") +
# ggtitle("mean importance per trimmed motif per cluster \n(1 point = 1 motif in 1 cluster)")Representing these as normalized ranks:
motifs_ranked2 <- orig_motifs_joined_w_syntax2 %>%
group_by(Cluster_ChromBPNet) %>%
mutate(rank = dense_rank(cwm_mean)) %>%
mutate(norm_rank = rank/max(rank))
motifs_ranked2 %>%
ggplot(aes(x = Cluster_ChromBPNet, y = norm_rank)) +
geom_point(aes(color = result), alpha = 0.7) +
scale_color_manual(values = cmap_synergy) +
rotate_x() +
xlab("cluster") + ylab("normalized rank within the cluster (rank / max_rank)") +
ggtitle("normalized rank by mean importance per motif per cluster \n(1 point = 1 motif in 1 cluster)")We can also rank by the CWM sum:
# these are the motifs from all original clusters, that did not get clustered
# into patterns that were exluded due to QC
orig_motifs_joined_w_syntax %>% dim## [1] 5334 7
orig_motifs_joined_w_syntax2 %>%
ggplot(aes(x = Cluster_ChromBPNet, y = cwm_sum)) +
geom_point(aes(color = result), alpha = 0.7) +
scale_color_manual(values = cmap_synergy) +
rotate_x() +
xlab("cluster") + ylab("total CWM importance") +
ggtitle("summed importance per trimmed motif per cluster \n(1 point = 1 motif in 1 cluster)")Representing these as normalized ranks:
motifs_ranked3 <- orig_motifs_joined_w_syntax2 %>%
group_by(Cluster_ChromBPNet) %>%
mutate(rank = dense_rank(cwm_sum)) %>%
mutate(norm_rank = rank/max(rank))
motifs_ranked3 %>%
ggplot(aes(x = Cluster_ChromBPNet, y = norm_rank)) +
geom_point(aes(color = result), alpha = 0.7) +
scale_color_manual(values = cmap_synergy) +
rotate_x() +
xlab("cluster") + ylab("normalized rank within the cluster (rank / max_rank)") +
ggtitle("normalized rank by total importance per motif per cluster \n(1 point = 1 motif in 1 cluster)")In cell types that have these types of motifs, what’s the mean normalized rank for number of hits or summed importance for motifs with a given type of synergy, among all motifs learned in that cell type?
motifs_ranked_mean <- motifs_ranked %>%
group_by(Cluster_ChromBPNet, result) %>%
summarize(mean_norm_rank = mean(norm_rank)) %>%
mutate(result = factor(result, levels = names(cmap_synergy)))## `summarise()` has grouped output by 'Cluster_ChromBPNet'. You can override
## using the `.groups` argument.
length(unique(motifs_ranked$Cluster_ChromBPNet))## [1] 189
table(motifs_ranked_mean$result)##
## hard hard & soft soft no synergy not tested
## 115 0 113 100 189
# get the ranks per group
ranks1 <- map(c("not tested", "hard", "soft", "no synergy"),
~ motifs_ranked_mean %>% filter(result == .x) %>% pull(mean_norm_rank)) %>% set_names(c("not tested", "hard", "soft", "no synergy"))
# Wilcoxon rank-sum tests
tibble::tribble(
~ "comparison", ~ "p_value",
"hard vs not tested", wilcox.test(ranks1$hard, ranks1$not_tested, paired = FALSE)$p.value,
"hard vs soft", wilcox.test(ranks1$hard, ranks1$soft, paired = FALSE)$p.value,
"hard vs no synergy", wilcox.test(ranks1$hard, ranks1$`no synergy`, paired = FALSE)$p.value,
"soft vs not tested", wilcox.test(ranks1$soft, ranks1$not_tested, paired = FALSE)$p.value,
"soft vs no synergy", wilcox.test(ranks1$soft, ranks1$`no synergy`, paired = FALSE)$p.value)## # A tibble: 5 × 2
## comparison p_value
## <chr> <dbl>
## 1 hard vs not tested 1.33e-20
## 2 hard vs soft 1.07e-26
## 3 hard vs no synergy 7.15e-13
## 4 soft vs not tested 2.83e-20
## 5 soft vs no synergy 3.17e- 4
And for the mean importance-based rank:
motifs_ranked_mean2 <- motifs_ranked2 %>%
group_by(Cluster_ChromBPNet, result) %>%
summarize(mean_norm_rank = mean(norm_rank)) %>%
mutate(result = factor(result, levels = names(cmap_synergy)))## `summarise()` has grouped output by 'Cluster_ChromBPNet'. You can override
## using the `.groups` argument.
length(unique(motifs_ranked2$Cluster_ChromBPNet))## [1] 189
table(motifs_ranked_mean2$result)##
## hard hard & soft soft no synergy not tested
## 115 0 113 100 189
# get the ranks per group
ranks2 <- map(c("not tested", "hard", "soft", "no synergy"),
~ motifs_ranked_mean2 %>% filter(result == .x) %>% pull(mean_norm_rank)) %>% set_names(c("not tested", "hard", "soft", "no synergy"))
# Wilcoxon rank-sum tests
tibble::tribble(
~ "comparison", ~ "p_value",
"hard vs not tested", wilcox.test(ranks2$hard, ranks2$not_tested, paired = FALSE)$p.value,
"hard vs soft", wilcox.test(ranks2$hard, ranks2$soft, paired = FALSE)$p.value,
"hard vs no synergy", wilcox.test(ranks2$hard, ranks2$`no synergy`, paired = FALSE)$p.value,
"soft vs not tested", wilcox.test(ranks2$soft, ranks2$not_tested, paired = FALSE)$p.value,
"soft vs no synergy", wilcox.test(ranks2$soft, ranks2$`no synergy`, paired = FALSE)$p.value)## # A tibble: 5 × 2
## comparison p_value
## <chr> <dbl>
## 1 hard vs not tested 1.33e-20
## 2 hard vs soft 7.32e- 9
## 3 hard vs no synergy 4.37e- 1
## 4 soft vs not tested 2.84e-20
## 5 soft vs no synergy 1.98e- 9
And for the summed importance-based rank:
motifs_ranked_mean3 <- motifs_ranked3 %>%
group_by(Cluster_ChromBPNet, result) %>%
summarize(mean_norm_rank = mean(norm_rank)) %>%
mutate(result = factor(result, levels = names(cmap_synergy)))## `summarise()` has grouped output by 'Cluster_ChromBPNet'. You can override
## using the `.groups` argument.
length(unique(motifs_ranked2$Cluster_ChromBPNet))## [1] 189
table(motifs_ranked_mean3$result)##
## hard hard & soft soft no synergy not tested
## 115 0 113 100 189
# get the ranks per group
ranks3 <- map(c("not tested", "hard", "soft", "no synergy"),
~ motifs_ranked_mean3 %>% filter(result == .x) %>% pull(mean_norm_rank)) %>% set_names(c("not tested", "hard", "soft", "no synergy"))
# Wilcoxon rank-sum test
tibble::tribble(
~ "comparison", ~ "p_value",
"hard vs not tested", wilcox.test(ranks3$hard, ranks3$not_tested, paired = FALSE)$p.value,
"hard vs soft", wilcox.test(ranks3$hard, ranks3$soft, paired = FALSE)$p.value,
"hard vs no synergy", wilcox.test(ranks3$hard, ranks3$`no synergy`, paired = FALSE)$p.value,
"soft vs not tested", wilcox.test(ranks3$soft, ranks3$not_tested, paired = FALSE)$p.value,
"soft vs no synergy", wilcox.test(ranks3$soft, ranks3$`no synergy`, paired = FALSE)$p.value,
"no synergy vs not tested", wilcox.test(ranks3$`no synergy`, ranks3$`not tested`, paired = FALSE)$p.value)## # A tibble: 6 × 2
## comparison p_value
## <chr> <dbl>
## 1 hard vs not tested 1.33e-20
## 2 hard vs soft 1.90e- 3
## 3 hard vs no synergy 5.34e- 7
## 4 soft vs not tested 2.83e-20
## 5 soft vs no synergy 8.00e-13
## 6 no synergy vs not tested 5.53e- 2
p1 <- motifs_ranked_mean %>%
ggplot(aes(x = result, y = mean_norm_rank)) +
geom_boxplot(aes(fill = result), outliers = FALSE, fill = "white") +
# scale_fill_manual(values = cmap_synergy) +
geom_jitter(width = 0.2, shape = 21, color = "gray60", alpha = 0.8) +
no_legend() +
xlab("type of motif") + ylab("mean normalized rank") +
ggtitle("normalized ranks for each type of motif, \nacross cell types", subtitle = "based on number of hits (1 point = 1 cell type)" ) +
ylim(c(0, 1))
p2 <- motifs_ranked_mean2 %>%
ggplot(aes(x = result, y = mean_norm_rank)) +
geom_boxplot(aes(fill = result), outliers = FALSE, fill = "white") +
# scale_fill_manual(values = cmap_synergy) +
geom_jitter(width = 0.2, shape = 21, color = "gray60", alpha = 0.8) +
no_legend() +
xlab("type of motif") + ylab("mean normalized rank") +
ggtitle("normalized ranks for each type of motif, \nacross cell types", subtitle = "based on mean trimmed CWM importance (1 point = 1 cell type)") +
ylim(c(0, 1))
p3 <- motifs_ranked_mean3 %>%
ggplot(aes(x = result, y = mean_norm_rank)) +
geom_boxplot(aes(fill = result), outliers = FALSE, fill = "white") +
# scale_fill_manual(values = cmap_synergy) +
geom_jitter(width = 0.2, shape = 21, color = "gray60", alpha = 0.8) +
no_legend() +
xlab("type of motif") + ylab("mean normalized rank") +
ggtitle("normalized ranks for each type of motif, \nacross cell types", subtitle = "based on total trimmed CWM importance (1 point = 1 cell type)") +
ylim(c(0, 1))
plot_grid(p1, #+ ggtitle(NULL, subtitle = NULL),
p2, #+ ggtitle(NULL, subtitle = NULL),
p3, #+ ggtitle(NULL, subtitle = NULL),
nrow = 1)p0 <- all_results_best %>%
ggplot(aes(x = sum_effect, y = joint_effect, label = motif_name)) +
geom_abline(slope = 1, linetype = "dashed", color = "black") +
geom_point(aes(size = -log10(joint_vs_sum_p_value_adj), fill = category, shape = result, color = category), alpha = 0.7, stroke = 1) +
scale_fill_manual(values = cmap_category) +
scale_color_manual(values = cmap_category) +
scale_shape_manual(values = shapemap_result) +
# scale_fill_manual(values = palette_coop) +
ylab("Joint effect") +
xlab("Independent effect") +
xlim(c(0, 2)) + ylim(c(0, 2)) +
square() +
theme(legend.position = "bottom")
shapemap_result2 <- shapemap_result
shapemap_result2["no synergy"] <- 21
p0_2 <- all_results_best %>%
mutate(max_Z = ifelse(result == "no synergy", NA, max_Z)) %>%
ggplot(aes(x = sum_effect, y = joint_effect, label = motif_name)) +
geom_abline(slope = 1, linetype = "dashed", color = "black") +
geom_point(aes(fill = max_Z, size = -log10(joint_vs_sum_p_value_adj), shape = result), alpha = 0.7, color = "black") +
scale_fill_gradientn(colors = ylrd, na.value = "gray70") +
scale_color_gradientn(colors = ylrd, na.value = "gray70") +
scale_shape_manual(values = shapemap_result2) +
ylab("AB marginal effect, ln(counts)") +
xlab("A+B marignal effect, ln(counts)") +
xlim(c(0, 2)) + ylim(c(0, 2)) +
square() +
theme(legend.position = "bottom")
plot_grid(p0, p0_2, nrow = 1, align = "h", axis = "tb")ggplotly(p0_2)This implies that our categories have different overall behavirous across 200 bp.
all_results_spacing %>%
filter(is_best_orientation) %>%
ggplot(aes(x = dist_center_to_center, y = joint_vs_ind_per_arrangement, group = motif_name, label = motif_name)) +
geom_hline(yintercept = 0, color = "black", alpha = 0.3, lwd = 1) +
geom_line(alpha = 0.3, aes(color = motif_name)) +
annotate('point', x = 10.5, y = 1.4,
shape = 25, size = 2, color = 'red', fill = 'red', alpha = 0.5) +
annotate('text', x = 35, y = 1.4, color = 'red', label = "10.5 bp") +
facet_wrap(~ result, ncol = 1) +
ylab("AB - (A+B) effects in log-counts") + xlab("center-to-center distance (bp)") +
scale_color_manual(values = palette_motifs) +
no_legend() +
ylim(c(-0.5, 1.5))# motif order - by result, then palindromes, then Z
motif_order <- all_results_best %>%
mutate(result = factor(result, levels = rev(names(shapemap_result)))) %>%
# sort by increasing z-score
arrange(result, max_Z) %>%
pull(motif_name) %>%
unique()
motif_order2 <- all_results_best %>%
mutate(result = factor(result, levels = rev(names(shapemap_result)))) %>%
# sort by increasing z-score
arrange(category, palindromicity, max_Z) %>%
pull(motif_name) %>%
unique()
results_w_spacing <- all_results_spacing %>%
mutate(result = factor(result, levels = rev(names(shapemap_result)))) %>%
arrange(result, joint_effect) %>%
mutate(motif_name = factor(motif_name, levels = motif_order),
category = factor(category, levels = c("homocomposite", "heterocomposite")))
results_w_zscore <- all_results_spacing %>%
left_join(z_scores, by = c("orientation", "spacing", "motif_name", "category")) %>%
mutate(result = factor(result, levels = rev(names(shapemap_result)))) %>%
arrange(result, Z) %>%
mutate(motif_name = factor(motif_name, levels = motif_order),
category = factor(category, levels = c("homocomposite", "heterocomposite")),
# cap the Z-score at 5 for better visualization of smaller Z-scores
# TODO: edit legend to indicate >=N
Z = ifelse(Z > 5, 5, Z))Helper function to plot the heatmap for one motif pair, similarly to Taipale lab 2015 paper:
#' @param motif character, motif namee
#' Z-scored effect
plot_heatmap_per_pair <- function(motif, n_bp = 20) {
orientations <- c("HH", "TT", "A,B HT", "B,A HT")
p1 <- results_w_spacing %>%
filter(motif_name == motif) %>%
filter(spacing < n_bp) %>%
mutate(orientation = factor(orientation, levels = rev(orientations))) %>%
ggplot(aes(x = spacing, y = orientation)) +
geom_tile(aes(fill = joint_effect), color = "white") +
scale_fill_gradientn(colors = viridis::magma(100)) +
theme(strip.background = element_blank(),
panel.border = element_blank(),
# axis.text.y = element_text(size = 9),
axis.ticks.y = element_blank(),
strip.text.y = element_blank()) +
xlab("spacing (bp)") + ylab("orientations") + labs(fill = "effect_mean") +
ggtitle(motif)
p2 <- results_w_zscore %>%
filter(motif_name == motif) %>%
filter(spacing < n_bp) %>%
mutate(orientation = factor(orientation, levels = rev(orientations))) %>%
ggplot(aes(x = spacing, y = orientation)) +
geom_tile(aes(fill = Z),color = "black") +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
theme(strip.background = element_blank(),
panel.border = element_blank(),
# axis.text.y = element_(),
axis.ticks.y = element_blank()) +
xlab("spacing (bp)") + ylab("orientations") + labs(fill = "Z-scored effect")
plot_grid(p1, p2, ncol = 1, align = "v", axis = "rl", rel_heights = c(1.2, 1))
}plot_heatmap_per_pair("467|p53_p53#1")plot_heatmap_per_pair("434|SOX_SOX#1")Let’s try a version where for each motif, we only select the best orientation and show the results at that one:
all_results_spacing %>%
mutate(motif_name = factor(motif_name, levels = motif_order2)) %>%
ggplot(aes(x = spacing, y = motif_name)) +
geom_tile(aes(fill = joint_vs_ind_per_arrangement)) +
facet_grid(result ~ ., scales = "free_y", space = "free_y") +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
theme(strip.background = element_blank(),
strip.text.y = element_text(angle = 0),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(size = 5)) +
ggtitle("Joint - independent effect") +
xlab("spacing (bp)") + ylab(NULL)We can reformat the data a bit to get the plots for all “specific” cooperative motifs, including both homocomposites and heterocomposites:
p1 <- results_w_spacing %>%
mutate(motif_name = factor(motif_name, levels = motif_order2)) %>%
filter(result %in% c("hard", "hard & soft")) %>%
filter(spacing < 20) %>%
ggplot(aes(x = spacing, y = motif_name)) +
geom_tile(aes(fill = joint_effect)) +
facet_grid(category ~ orientation, scales = "free_y", space = "free_y") +
scale_fill_gradientn(colors = viridis::magma(100)) +
theme(strip.background = element_blank(),
panel.border = element_blank(),
axis.text.y = element_text(size = 9),
axis.ticks.y = element_blank(),
strip.text.y = element_blank(),
legend.position = "bottom") +
ggtitle("Marginal joint effect") +
xlab("spacing (bp)") + ylab("motif pair")
p2 <- results_w_zscore %>%
mutate(motif_name = factor(motif_name, levels = motif_order2)) %>%
filter(result %in% c("hard", "hard & soft")) %>%
filter(spacing < 20) %>%
ggplot(aes(x = spacing, y = motif_name)) +
geom_tile(aes(fill = Z)) +
facet_grid(category ~ orientation, scales = "free_y", space = "free_y") +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
theme(strip.background = element_blank(),
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom") +
ggtitle("Z-scored effect") +
xlab("spacing (bp)") + ylab(NULL)
# plot_grid(p1, p2, nrow = 1, rel_widths = c(1.4, 1))Plot some features of these motifs
coop_subset <- motifs_compiled_unique %>%
right_join(all_results_best %>% dplyr::select(motif_name, result, Z, joint_effect), by = "motif_name") %>%
filter(result %in% c("hard", "hard & soft"))
dim(coop_subset)## [1] 48 49
motifs_filt <- coop_subset %>% mutate(annotation_broad = motif_name)
hits_annotated <- hits_all_anno %>% mutate(annotation_broad = motif_name) %>%
filter(motif_name %in% coop_subset$motif_name)
# group by broad annotation and calculate summary stats
motifs_to_plot <- motifs_filt %>%
group_by(annotation_broad) %>%
mutate(total_hits_broad = sum(total_hits),
n_variants = n()) %>%
# for visualization of the logo, use the motif w/ the most hits
slice_max(n = 1, order_by = total_hits) %>%
arrange(desc(total_hits_broad))
# plot breakdown of total hits based on cell type compartment
p4 <- hits_annotated %>%
dplyr::select(annotation_broad, compartment, category, n) %>%
group_by(annotation_broad, compartment, category) %>%
summarize(n_hits = sum(n)) %>%
mutate(annotation_broad = factor(annotation_broad, levels = motif_order2)) %>%
ggplot(aes(y = annotation_broad, x = n_hits)) +
geom_col(aes(fill = compartment), position = "fill") +
scale_fill_manual(values = cmap_compartment2) +
facet_grid(category ~ ., space = "free_y", scales = "free_y") +
ylab(NULL) + xlab(NULL) + ggtitle("% hits \nby compartment") +
theme(panel.grid = element_blank(),
panel.border = element_blank(),
strip.background = element_blank(),
# axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 10),
legend.position = "bottom") +
ggplot2::guides(fill=guide_legend(ncol=2))## `summarise()` has grouped output by 'annotation_broad', 'compartment'. You can
## override using the `.groups` argument.
# plot breakdown of total hits based on organ
p5 <- hits_annotated %>%
dplyr::select(annotation_broad, organ, n, category) %>%
group_by(annotation_broad, organ, category) %>%
summarize(n_hits = sum(n)) %>%
mutate(annotation_broad = factor(annotation_broad, levels = motif_order2)) %>%
ggplot(aes(y = annotation_broad, x = n_hits)) +
geom_col(aes(fill = organ), position = "fill") +
scale_fill_manual(values = cmap_organ) +
facet_grid(category ~ ., space = "free_y", scales = "free_y") +
ylab(NULL) + xlab(NULL) + ggtitle("% hits \nby compartment") +
theme(panel.grid = element_blank(),
panel.border = element_blank(),
strip.background = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 10),
legend.position = "bottom") +
ggplot2::guides(fill=guide_legend(ncol=2))## `summarise()` has grouped output by 'annotation_broad', 'organ'. You can
## override using the `.groups` argument.
p6 <- motifs_compiled_unique %>%
filter(motif_name %in% motifs_filt$motif_name) %>%
mutate(motif_name = factor(motif_name, levels = motif_order2)) %>%
ggplot(aes(y = motif_name, x = "1")) +
# color scale is on log10
geom_tile(aes(fill = log10(total_hits)), alpha = 1) +
# labels are non log-transformed
geom_text(aes(label = scales::comma(total_hits)), color = "white", size = 3) +
# don't include the very lightest colors; too hard to read
scale_fill_gradientn(colors = viridis::plasma(100)[0:95], limits = c(0, 8)) +
facet_grid(category ~ ., space = "free_y", scales = "free_y") +
ylab(NULL) + xlab(NULL) + ggtitle("total # hits") +
rotate_x() +
hide_ticks() +
theme(panel.grid = element_blank(),
panel.border = element_blank(),
legend.position = "bottom")
# plot_grid(p4 + theme(strip.text = element_blank()), p5, p6, nrow = 1, align = "h", axis = "tb", rel_widths = c(2, 1, 1))Get logos:
# get motifs in the new order
cwm_list_subset <- cwm_list[motifs_to_plot$motif_name] %>%
map(trim_cwm, flank = 2)
# use the annotation_broad name instead of the motif name
# base_representative$motif_name_short <- gsub("^[^|]*\\|([^#]*)#.*$", "\\1", base_representative$motif_name)
names(cwm_list_subset) <- plyr::mapvalues(names(cwm_list_subset),
from = motifs_to_plot$motif_name,
to = as.character(motifs_to_plot$annotation_broad))
hetero_specific <- coop_subset %>% filter(category == "heterocomposite") %>% pull(motif_name)
homo_specific <- coop_subset %>% filter(category == "homocomposite") %>% pull(motif_name)
p_logos_homo <- ggplot() +
geom_logo(cwm_list_subset[rev(motif_order2[motif_order2 %in% homo_specific])], method = "custom", font = "roboto_bold") +
theme_logo() +
facet_grid(seq_group ~ ., scales = "free_y", switch = "both") +
theme(strip.text.y.left = element_text(angle = 0, hjust = 1),
# reduce margins between facets, so that the logos can take up
# a little bit more space
panel.spacing=unit(0, "lines")
) +
hide_ticks() + ylab(NULL)
p_logos_hetero <- ggplot() +
geom_logo(cwm_list_subset[rev(motif_order2[motif_order2 %in% hetero_specific])], method = "custom", font = "roboto_bold") +
theme_logo() +
facet_grid(seq_group ~ ., scales = "free_y", switch = "both") +
theme(strip.text.y.left = element_text(angle = 0, hjust = 1),
# reduce margins between facets, so that the logos can take up
# a little bit more space
panel.spacing=unit(0, "lines")
) +
hide_ticks() + ylab(NULL)
p_logos <- plot_grid(p_logos_homo, p_logos_hetero, ncol = 1, align = "v", axis = "rl", rel_heights = c(1, length(hetero_specific)/length(homo_specific)))p_palindromic <- all_results_best %>%
filter(result %in% c("hard", "hard & soft")) %>%
dplyr::select(motif_name, category, seqA_palindrome, seqB_palindrome) %>%
mutate(category = factor(category, levels = c("homocomposite", "heterocomposite", "not cooperative"))) %>%
gather(seq, palindrome, seqA_palindrome, seqB_palindrome) %>%
mutate(motif_name = factor(motif_name, levels = motif_order2)) %>%
ggplot(aes(x = seq, y = motif_name)) +
geom_tile(aes(fill = palindrome), alpha = 0.8) +
geom_text(aes(label = palindrome), fontface = "bold", size = 4, color = "black") +
facet_grid(category ~ ., scales = "free_y", space = "free_y") +
scale_fill_manual(values = c("P" = "#2bad0a", "Q" = "#8fe37b", "-" = "gray90")) + #, aesthetics = c("color", "fill")) +
ggtitle("palindromic") +
no_legend() +
ylab(NULL) +
xlab(NULL) +
theme(panel.grid = element_blank(),
panel.border = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
strip.background = element_blank())
plot_grid(p_logos,
p_palindromic + theme(strip.text = element_blank()),
p6 + theme(strip.text = element_blank()),
p2 + theme(strip.text.y = element_blank()),
p4 + theme(strip.text = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()),
p5, nrow = 1, rel_widths = c(1.5, 0.2, 0.2, 2, 0.5, 0.5), rel_heights = c(1.5, 1, 1, 1, 1, 1, 1), align = "h", axis = "tb")plot_curves <- function(motif, facet = TRUE, limits = NULL, best_orientation = FALSE, palette = NULL) {
solo_stats <- solo_df %>% filter(orientation == "A + B" & motif_name == motif)
if (best_orientation) {
keep_orientation <- result_outs %>% filter(motif_name == motif) %>% pull(best_orientation)
plot_df <- spacing_df %>% filter(motif_name == motif & orientation == keep_orientation)
} else {
plot_df <- spacing_df %>% filter(motif_name == motif)
}
p1 <- plot_df %>%
ggplot(aes(x = spacing)) +
geom_hline(yintercept = solo_stats$effect_mean, color = "gray60", size = 2) +
geom_line(aes(y = effect_mean, color = orientation), alpha = 0.7, size = 2) +
geom_ribbon(aes(ymin = effect_mean - effect_sd, ymax = effect_mean + effect_sd, fill = orientation), alpha = 0.2) +
annotate("text", label = "A + B", x = 190, y = solo_stats$effect_mean + 0.05, size = 6) +
ylab("Marginal joint effect") + xlab("distance between motifs (bp)") +
ggtitle(motif)
# scale_color_manual(values = palette_orientation_hetero) +
# scale_fill_manual(values = palette_orientation_hetero) +
if (!is.null(limits)) p1 <- p1 + ylim(limits)
if (facet) p1 <- p1 + facet_wrap(~ orientation, ncol = 1) + no_legend()
if (!is.null(palette)) p1 <- p1 + scale_color_manual(values = palette)
p1
}This also lets us appreciate that true palindromes have identical outcomes because we’re literally doing the exact same experiment.
plot_curves("430|RUNX_TCF7L/LEF", limits = c(0, 0.6), facet = FALSE, best_orientation = TRUE, palette = "red")plot_curves("74|BHLH_HD:PBX1/PDX1", limits = c(0, 0.6), facet = FALSE, best_orientation = TRUE, palette = "red")plot_curves("80|BHLH_MEF2#1", limits = c(0, 0.6), facet = FALSE, best_orientation = TRUE, palette = "red")plot_curves("423|RFX_SOX", limits = c(0, 0.6), facet = FALSE, best_orientation = TRUE, palette = "red")pred_profiles_47 <- data.table::fread(
file.path(ism_dir, "47|BHLH:TWIST.ZBTB_HD/Skin_c0/profile_preds_A,B HT.tsv"),
data.table = FALSE)
pred_profiles_47 %>%
ggplot(aes(x = position, y = mean)) +
geom_line(color = "red") +
geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd), fill = "red", alpha = 0.3) +
facet_grid(spacing ~ .) +
scale_y_continuous(breaks = c(0.1, 0.5)) +
scale_x_continuous(labels = c(-500, -250, 0, 250, 500)) +
theme(panel.spacing.y = unit(0.1, "lines"))spacing_df %>%
filter(motif_name == "47|BHLH:TWIST/ZBTB_HD") %>%
filter(spacing <= 20) %>%
ggplot(aes(x = spacing, y = effect_mean)) +
geom_bar(stat = "identity", color = "black", fill = "red") +
geom_errorbar(aes(ymin = effect_mean - effect_sd, ymax = effect_mean + effect_sd), width = 0.5) +
facet_wrap(~ orientation, ncol = 2)pred_profiles_74 <- data.table::fread(
file.path(ism_dir, "74|BHLH_HD:PBX1.PDX1/Muscle_c0/profile_preds_B,A HT.tsv"),
data.table = FALSE)
pred_profiles_74 %>%
ggplot(aes(x = position, y = mean)) +
geom_line(color = "red") +
geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd), fill = "red", alpha = 0.3) +
facet_grid(spacing ~ .) +
scale_y_continuous(breaks = c(0.1, 0.5)) +
scale_x_continuous(labels = c(-500, -250, 0, 250, 500)) +
theme(panel.spacing.y = unit(0.1, "lines"))spacing_df %>%
filter(motif_name == "74|BHLH_HD:PBX1/PDX1") %>%
filter(spacing <= 20) %>%
ggplot(aes(x = spacing, y = effect_mean)) +
geom_bar(stat = "identity", color = "black", fill = "red") +
geom_errorbar(aes(ymin = effect_mean - effect_sd, ymax = effect_mean + effect_sd), width = 0.5) +
facet_wrap(~ orientation, ncol = 2)Let’s first look at the distribution of effects across all experiments:
all_results_best %>% filter(result != "no synergy") %>% nrow()## [1] 67
combined_df <- bind_rows(
all_results_allexp %>% mutate(set = "background (138 composites x 189 cell types, N=26082)") %>% dplyr::select(set, effect = joint_vs_ind_max),
all_results_best %>% mutate(set = "foreground (reported synergistic pairs, N=67)") %>% filter(result != "no synergy") %>% dplyr::select(set, effect = joint_vs_ind_max))
palette_test <- c("background (138 composites x 189 cell types, N=26082)" = "gray",
"foreground (reported synergistic pairs, N=67)" = "red")
combined_df %>%
ggplot(aes(x = effect)) +
geom_histogram(aes(fill = set), alpha = 0.7, bins = 50) +
scale_fill_manual(values = palette_test) +
facet_wrap(~ set, scales = "free_y", ncol = 1) +
xlab("Joint effect - independent effect (log counts)") +
ylab("Number of tests") +
no_legend()Make a dotplot summarizing all the significant tests:
motif_order <- read_lines(
here("code/03-chrombpnet/03-syntax/04c-motif_order.txt")) %>%
unique()
all(motif_order %in% unique(all_results_allexp$motif_name))## [1] TRUE
all(unique(all_results_allexp$motif_name) %in% motif_order)## [1] TRUE
cluster_order <- cluster_meta$Cluster_ChromBPNet
all_results_allexp_filt <- all_results_allexp %>%
mutate(motif_name = factor(motif_name, levels = rev(motif_order)),
cluster = factor(cluster, levels = cluster_order)) %>%
filter(result == "synergy")
nrow(all_results_allexp)## [1] 26082
nrow(all_results_allexp_filt)## [1] 2317
nrow(all_results_allexp_filt)/nrow(all_results_allexp)## [1] 0.08883521
all_results_allexp_filt$motif_name %>% unique %>% length## [1] 125
all_results_allexp_filt %>%
ggplot(aes(y = motif_name, x = cluster)) +
geom_point(aes(size = log10padj, color = joint_effect, stroke = 0)) +
scale_size(range = c(0, 3)) +
# to keep all clusters in the dendrogram
scale_x_discrete(drop = FALSE) +
scale_color_gradientn(colors = ylrd) +
rotate_x() +
theme(
axis.text.y = element_text(size = 5),
axis.text.x = element_text(size = 5)
)Plot an annotation for motifs indicating the number of cell types where the motif is predicted to be synergistic:
all_results_allexp_filt %>%
group_by(cluster, motif_name) %>%
count() %>%
left_join(cluster_meta %>% dplyr::select(Cluster_ChromBPNet, organ),
by = c("cluster" = "Cluster_ChromBPNet")) %>%
mutate(motif_name = factor(motif_name, levels = rev(motif_order))) %>%
ggplot(aes(x = motif_name, y = n)) +
geom_col(aes(fill = organ), position = "fill") +
scale_fill_manual(values = cmap_organ) +
theme(
axis.text.y = element_text(size = 5),
axis.text.x = element_text(size = 5)
) +
rotate_x()Plot an annotation for motifs indicating the distribution of effects across cell types per motif:
all_results_allexp %>%
dplyr::left_join(cluster_meta %>%
dplyr::select(Cluster_ChromBPNet, organ),
by = c("cluster" = "Cluster_ChromBPNet")) %>%
filter(motif_name %in% all_results_allexp_filt$motif_name) %>%
mutate(motif_name = factor(motif_name, levels = motif_order)) %>%
ggplot(aes(x = motif_name, y = joint_vs_ind_max)) +
geom_boxplot(fill = "gray90", outliers = FALSE) +
geom_hline(yintercept = delta_thresh, linetype = "dashed", color = "red") +
theme(
axis.text.x = element_text(size = 5)
) +
rotate_x()This section allows for exploring the outputs of the in silico experiments interactively.