# **MODIFY THIS CHUNK**
project_id <- "HDMA-public" # determines the name of the cache folder
doc_id <- "02-global_analysis/01" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out <- here::here("output/", doc_id); dir.create(out, recursive = TRUE)
figout <- here::here("figures/", doc_id, "/")Global overview of the HDMA atlas, including QC stats and metadata at the tissue and sample levels.
# libraries
library(glue)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(purrr)
library(readr)
library(tibble)
library(stringr)
library(Seurat)
library(cowplot)
library(data.table)
library(ArchR)
library(here)
# shared project scripts/functions
source(here::here("code/utils/plotting_config.R"))
source(here::here("code/utils/matrix_helpers.R"))
source(here::here("code/utils/seurat_helpers.R"))
source(here::here("code/utils/sj_scRNAseq_helpers.R"))
source(here::here("code/utils/hdma_palettes.R"))
ggplot2::theme_set(theme_BOR())# TSV file to manually define which organs to keep
organs_keep <- read_tsv(here::here("code/02-global_analysis/01-organs_keep.tsv"))## Rows: 12 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): organcode, organ, iteration
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
organs_keep## # A tibble: 12 × 3
## organcode organ iteration
## <chr> <chr> <chr>
## 1 AG Adrenal v4
## 2 BR Brain v4
## 3 EY Eye v6
## 4 HT Heart v4
## 5 LI Liver v4
## 6 LU Lung v8
## 7 MU Muscle v4
## 8 SK Skin v4
## 9 SP Spleen v4
## 10 ST StomachEsophagus v2
## 11 TM Thymus v4
## 12 TR Thyroid v2
Set some paths:
preprocessing_out_dir <- here::here("output/01-preprocessing/02")
meta_dir <- file.path(out, "per_cell_meta")
dir.create(meta_dir, showWarnings = F, recursive = T)Extract RNA metadata:
organs <- organs_keep$organ
organ_codes <- organs_keep$organcode
seurat_filepaths <- map(organs, ~ file.path(preprocessing_out_dir, .x, "rna_preprocess_output/RNA_obj_clustered_final.rds")) %>%
set_names(organs)
# check all files exist
all(map_lgl(seurat_filepaths, file.exists))## [1] TRUE
# create a TSV file for metadata from each organ
for (organ in organs) {
filepath <- seurat_filepaths[[organ]]
# print(filepath)
rna_meta_path <- file.path(meta_dir, paste0("/", organ, "_rna_meta.tsv"))
# print(rna_meta_path)
# only process if the Seurat object exists, and the meta doesn't
if (file.exists(filepath) & !file.exists(rna_meta_path)) {
message(paste0("Loading Seurat object for ", organ, "..."))
so <- readRDS(filepath)
so_meta <- so@meta.data
write_tsv(so_meta, rna_meta_path)
} else message(organ, " meta already exists, skipping...")
}## Adrenal meta already exists, skipping...
## Brain meta already exists, skipping...
## Eye meta already exists, skipping...
## Heart meta already exists, skipping...
## Liver meta already exists, skipping...
## Lung meta already exists, skipping...
## Muscle meta already exists, skipping...
## Skin meta already exists, skipping...
## Spleen meta already exists, skipping...
## StomachEsophagus meta already exists, skipping...
## Thymus meta already exists, skipping...
## Thyroid meta already exists, skipping...
Extract ATAC metadata
archr_filepaths <- map(organs, ~ file.path(preprocessing_out_dir, .x, "atac_preprocess_output/ATAC_obj_clustered_peaks_final")) %>%
set_names(organs)
# check all files exist
all(map_lgl(archr_filepaths, file.exists))## [1] TRUE
# Create a TSV file for metadata from each organ
for (organ in organs) {
filepath <- archr_filepaths[[organ]]
atac_meta_path <- file.path(meta_dir, paste0("/", organ, "_atac_meta.tsv"))
# Only process if the Seurat object exists, and the meta doesn't
if (dir.exists(filepath) & !file.exists(atac_meta_path)) {
message(paste0("Loading ArchRProject for ", organ, "..."))
archr <- loadArchRProject(archr_filepaths[[organ]])
archr_meta <- archr@cellColData %>% as.data.frame()
write_tsv(archr_meta, atac_meta_path)
} else message(organ, " meta already exists, skipping...")
}## Adrenal meta already exists, skipping...
## Brain meta already exists, skipping...
## Eye meta already exists, skipping...
## Heart meta already exists, skipping...
## Liver meta already exists, skipping...
## Lung meta already exists, skipping...
## Muscle meta already exists, skipping...
## Skin meta already exists, skipping...
## Spleen meta already exists, skipping...
## StomachEsophagus meta already exists, skipping...
## Thymus meta already exists, skipping...
## Thyroid meta already exists, skipping...
per_sample_meta <- map_dfr(organ_codes, function(organ){
filepath <- here::here(glue("output/01-preprocessing/02/shared/meta/{organ}_meta_sample.txt"))
print(filepath)
df <- read_tsv(filepath)
print(dim(df))
if(df$sex %>% is.logical()) {
df$sex <- replace(df$sex, df$sex == FALSE, "F") # Specify character type when only female sex in sample
}
df
})## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/AG_meta_sample.txt"
## Rows: 4 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 4 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/BR_meta_sample.txt"
## Rows: 8 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 8 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/EY_meta_sample.txt"
## Rows: 7 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 7 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/HT_meta_sample.txt"
## Rows: 8 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 8 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/LI_meta_sample.txt"
## Rows: 7 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 7 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/LU_meta_sample.txt"
## Rows: 15 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 15 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/MU_meta_sample.txt"
## Rows: 8 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 8 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/SK_meta_sample.txt"
## Rows: 3 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): sample, organ, organ_code
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
## lgl (1): sex
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 3 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/SP_meta_sample.txt"
## Rows: 3 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 3 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/ST_meta_sample.txt"
## Rows: 7 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 7 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/TM_meta_sample.txt"
## Rows: 3 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): sample, organ, organ_code
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
## lgl (1): sex
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 3 13
## [1] "/oak/stanford/groups/wjg/skim/projects/HDMA-public/output/01-preprocessing/02/shared/meta/TR_meta_sample.txt"
## Rows: 3 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): sample, organ, organ_code, sex
## dbl (9): pcw, pcd, ncell, median_ngene, median_numi, median_pctmt, median_nf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 3 13
DT::datatable(per_sample_meta)write_tsv(per_sample_meta, glue("{out}/all_organs_keep_sample.tsv"))
print(head(per_sample_meta))## # A tibble: 6 × 13
## sample organ organ_code pcw pcd sex ncell median_ngene median_numi
## <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 T273_b16_Ad… Adre… AG 18 126 M 612 3525 9910.
## 2 T318_b16_Ad… Adre… AG 21 151 M 1381 2886 7778
## 3 T390_b16_Ad… Adre… AG 13 96 F 112 2407 4588.
## 4 T40_b16_Adr… Adre… AG 17 122 F 778 2992. 6408
## 5 T129_b4_Bra… Brain BR 22 159 F 6177 1935 3583
## 6 T155_b4_Bra… Brain BR 10 70 M 151 1438 1955
## # ℹ 4 more variables: median_pctmt <dbl>, median_nfrags <dbl>,
## # median_tss <dbl>, median_frip <dbl>
Plot frequency of each organ:
# timeline dotplot
per_sample_meta %>%
mutate(organ = factor(organ, levels = rev(sort(names(cmap_organ))))) %>%
ggplot(aes(x = pcw)) +
geom_dotplot(aes(fill = organ, color = sex),
binwidth = 0.1, binpositions = "all", stackgroups = TRUE, dotsize = 5, alpha = 0.8, width = 3, weight = 2) +
scale_fill_manual(values = cmap_organ) +
scale_color_manual(values = c("M" = "black", "F" = "white")) +
scale_x_continuous(breaks = seq(10, 23)) +
geom_hline(yintercept = 0) +
theme(panel.grid = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
panel.border = element_blank())Count samples per organ:
# Plot barplot for each sample
p1 <- per_sample_meta %>%
group_by(organ) %>%
dplyr::count() %>%
ggplot(aes(x = organ, y = n, fill = organ)) +
geom_bar(stat = "identity") +
theme_bw() +
labs(x = "",
y = "# Samples") +
scale_fill_manual(values = cmap_organ) +
scale_x_discrete(limits = rev) +
coord_flip() +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
legend.position = "none"
)
# plot number of cells for each organ:
p2 <- per_sample_meta %>%
dplyr::group_by(organ) %>%
dplyr::summarise(
total_ncells = sum(ncell)) %>%
ggplot(aes(x = organ, y = total_ncells, fill = organ)) +
geom_bar(stat = "identity") +
theme_bw() +
labs(x = "",
y = "Count") +
scale_fill_manual(values = cmap_organ) +
scale_x_discrete(limits = rev) +
scale_y_continuous(trans = scales::log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
coord_flip() +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none"
)
# Plot PCDs of each organ as a dotplot:
p3 <- ggplot(per_sample_meta, aes(x = pcd, y = organ, color = organ, shape = factor(sex))) +
geom_point(size = 5, alpha = 0.8) +
theme_bw() +
labs(x = "Gestational age (days)",
y = "") +
scale_color_manual(values = cmap_organ) +
scale_shape_manual(values = c(19, 17)) +
scale_y_discrete(limits = rev) +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom"
)
# Plot RNA metrics as a box whisker plot:
p4 <- ggplot(per_sample_meta, aes(x = median_numi, y = organ, fill = organ)) +
geom_boxplot(alpha = 0.8) +
geom_jitter(height = 0.1) +
scale_fill_manual(values = cmap_organ) +
scale_x_log10() +
scale_y_discrete(limits = rev) +
theme_bw() +
labs(x = "median # UMIs",
y = "") +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
p5 <- ggplot(per_sample_meta, aes(x = median_ngene, y = organ, fill = organ)) +
geom_boxplot(alpha = 0.8) +
geom_jitter(height = 0.1) +
scale_fill_manual(values = cmap_organ) +
scale_x_log10() +
scale_y_discrete(limits = rev) +
theme_bw() +
labs(x = "median # genes",
y = "") +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none"
)
# Plot ATAC metrics as a box whisker plot:
p6 <- ggplot(per_sample_meta, aes(x = median_nfrags, y = organ, fill = organ)) +
geom_boxplot(alpha = 0.8) +
geom_jitter(height = 0.1) +
scale_fill_manual(values = cmap_organ) +
scale_x_log10() +
scale_y_discrete(limits = rev) +
theme_bw() +
labs(x = "median # fragments",
y = NULL) +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none"
)
p7 <- ggplot(per_sample_meta, aes(x = median_tss, y = organ, fill = organ)) +
geom_boxplot(alpha = 0.8) +
geom_jitter(height = 0.1) +
scale_fill_manual(values = cmap_organ) +
scale_y_discrete(limits = rev) +
theme_bw() +
labs(x = "median TSS enrichment",
y = NULL) +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none"
)
p8 <- ggplot(per_sample_meta, aes(x = median_frip, y = organ, fill = organ)) +
geom_boxplot(alpha = 0.8) +
geom_jitter(height = 0.1) +
scale_fill_manual(values = cmap_organ) +
scale_y_discrete(limits = rev) +
theme_bw() +
labs(x = "medianFRiP",
y = NULL) +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none"
)
plot_grid(p1, p2, p3, p4, p5, p6, p7, p8, nrow = 1, align = "h", axis = "tb")# ggsave(paste0(figout, "/barplot_freq_tissues.pdf"), width=20, height=6)Plot as 2D multiome metrics:
# nUMIs by nFrags
p1 <- ggplot(per_sample_meta, aes(x = median_numi, y = median_nfrags, color = organ)) +
geom_point(size = 3, alpha = 0.9) +
scale_x_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
scale_y_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
scale_color_manual(values = cmap_organ) +
theme_bw() +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
aspect.ratio = 1
#legend.position = "none"
)
# nGenes by FRiP
p2 <- ggplot(per_sample_meta, aes(x = median_ngene, y = median_frip, color = organ)) +
geom_point(size = 3, alpha = 0.9) +
scale_x_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
# scale_y_continuous(trans = log10_trans(),
# breaks = trans_breaks("log10", function(x) 10^x),
# labels = trans_format("log10", math_format(10^.x))) +
scale_color_manual(values = cmap_organ) +
theme_bw() +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
aspect.ratio = 1
#legend.position = "none"
)
# nGenes by TSS
p3 <- ggplot(per_sample_meta, aes(x = median_ngene, y = median_tss, color = organ)) +
geom_point(size = 3, alpha = 0.9) +
scale_x_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
# scale_y_continuous(trans = log10_trans(),
# breaks = trans_breaks("log10", function(x) 10^x),
# labels = trans_format("log10", math_format(10^.x))) +
scale_color_manual(values = cmap_organ) +
theme_bw() +
theme(
panel.grid = element_blank(),
axis.text = element_text(family = "sans", size = 10, color = "black"),
axis.title = element_text(family = "sans", size = 11, color = "black"),
aspect.ratio = 1
#legend.position = "none"
)
plot_grid(p1, p2, p3, nrow = 1, align = "h", axis = "tb")# ggsave(paste0(figout, "/dotPlot_multiome.pdf"), width=15, height=5)Get cell metadata:
# load all RNA cell meta into one table, add in the organ name
cell_meta_rna_paths <- list.files(here::here("output/02-global_analysis/01/per_cell_meta"), pattern = "*_rna_meta.tsv", full.names = TRUE)
cell_meta_rna <- map_dfr(cell_meta_rna_paths, ~ data.table::fread(.x, data.table = FALSE) %>%
add_column(Organ = str_split(basename(.x), "_", n = 3)[[1]][1], .before = 1))
# add organ code to cluster name
cell_meta_rna$Cluster <- map2_chr(cell_meta_rna$Organ, cell_meta_rna$Clusters,
~ paste0(organs_keep[organs_keep$organ == .x, ]$organcode, "_", .y))
# get a sample order by organ/age
sample_order <- cell_meta_rna %>%
distinct(Sample, Organ, PCW) %>%
arrange(Organ, PCW) %>%
pull(Sample)
# n samples per organ
cell_meta_rna %>%
distinct(Sample, Organ) %>%
group_by(Organ) %>%
dplyr::count()
# n cells per organ
cell_meta_rna %>%
group_by(Organ) %>%
dplyr::count()
dim(cell_meta_rna)
cell_meta_rna$Sample <- factor(cell_meta_rna$Sample, sample_order)# get cell barcodes in the df
# load all RNA cell meta into one table, add in the organ name
cell_meta_atac_paths <- list.files(here::here("output/02-global_analysis/01/per_cell_meta"), pattern = "*_atac_meta.tsv", full.names = TRUE)
cell_meta_atac <- map(cell_meta_atac_paths, ~ data.table::fread(.x, data.table = FALSE) %>%
tibble::rownames_to_column(var = "Barcode"))
cell_meta_atac <- map_dfr(cell_meta_atac, function(df) {
if ("Clusters" %in% colnames(df)) df %>% dplyr::rename(RNA_Clusters = Clusters) %>% mutate(RNA_Clusters = as.character(RNA_Clusters))
else df %>% mutate(RNA_Clusters = as.character(RNA_Clusters))
})
# get a cluster column matching RNA
per_sample_meta$Sample <- per_sample_meta$sample
cell_meta_atac <- cell_meta_atac %>%
left_join(per_sample_meta, by = c("Sample")) %>%
left_join(organs_keep, by = c("organ")) %>%
mutate(Cluster = as.numeric(gsub("c", "", RNA_Clusters))) %>%
mutate(Organ = organ) %>%
arrange(Organ, Cluster) %>%
mutate(Cluster = paste0(organ_code, "_", Cluster)) %>%
mutate(Cluster = factor(Cluster, levels = unique(.$Cluster)))Save metadata:
save(cell_meta_atac, cell_meta_rna, file = glue("{out}/cell_meta_all.Rda"))load(glue("{out}/cell_meta_all.Rda"))Basic stats:
# cells
dim(cell_meta_rna)## [1] 817740 27
dim(cell_meta_atac)## [1] 817740 46
# samples
unique(cell_meta_rna$Sample)## [1] T273_b16_Adr_PCW18 T318_b16_Adr_PCW21 T390_b16_Adr_PCW13
## [4] T40_b16_Adr_PCW17 T129_b4_Brain_PCW22 T155_b4_Brain_PCW10
## [7] T183_b4_Brain_PCW19 T382_b4_Brain_PCW15 T47_b4_Brain_PCW17
## [10] T63_b4_Brain_PCW20 T64_b4_Brain_PCW18 T94_b4_Brain_PCW17
## [13] T106_b13_Eye_PCW17 T180_b13_Eye_PCW19 T186_b13_Eye_PCW23
## [16] T410_b13_Eye_PCW13 T5_b13_Eye_PCW19 T51_b13_Eye_PCW20
## [19] T79_b13_Eye_PCW18 T014_b11_Heart_PCW18 T032_b11_Heart_PCW12
## [22] T104_b11_Heart_PCW17 T30_b11_Heart_PCW17 T361_b11_Heart_PCW20
## [25] T379_b11_Heart_PCW15 T407_b11_Heart_PCW19 T92_b11_Heart_PCW18
## [28] T166_b15_Liver_PCW19 T235_b15_Liver_PCW21 T275_b15_Liver_PCW18
## [31] T350_b15_Liver_PCW21 T375_b15_Liver_PCW15 T398_b15_Liver_PCW17
## [34] T71_b15_Liver_PCW18 T107_b12_Lung_PCW17 T132_b12_Lung_PCW21
## [37] T164_b12_Lung_PCW10 T175_b18_Lung_PCW19 T198_b18_Lung_PCW23
## [40] T226_b12_Lung_PCW21 T24_b18_Lung_PCW17 T256_b12_Lung_PCW18
## [43] T286_b12_Lung_PCW14 T304_b12_Lung_PCW21 T305_b18_Lung_PCW21
## [46] T333_b18_Lung_PCW21 T408L_b12_Lung_PCW19 T49_b18_Lung_PCW20
## [49] T77_b18_Lung_PCW18 T10_b9_Muscle_PCW19 T123_b9_Muscle_PCW22
## [52] T161_b9_Muscle_PCW10 T377_b9_Muscle_PCW15 T44_b9_Muscle_PCW17
## [55] T61_b9_Muscle_PCW20 T87_b9_Muscle_PCW18 T96_b9_Muscle_PCW17
## [58] T11_b19_Skin_PCW19 T408S_b19_Skin_PCW19 T45_b19_Skin_PCW17
## [61] T165_b17_Spleen_PCW19 T270_b17_Spleen_PCW18 T88_b17_Spleen_PCW18
## [64] T238_b10_Stomach_PCW21 T269_b10_Stomach_PCW18 T303_b10_Esophagus_PCW21
## [67] T322_b10_Stomach_PCW21 T35_b10_Stomach_PCW17 T399_b10_Stomach_PCW17
## [70] T53_b10_Stomach_PCW20 T23_b17_Thymus_PCW17 T406_b17_Thymus_PCW19
## [73] T90_b17_Thymus_PCW18 T136_b16_Thyroid_PCW21 T136_b22_Thyroid_PCW21
## [76] T176_b16_Thyroid_PCW19
## 76 Levels: T390_b16_Adr_PCW13 T40_b16_Adr_PCW17 ... T136_b22_Thyroid_PCW21
Let’s generate a few plots for per-sample distributions of QC metrics:
cell_meta_rna %>%
group_by(Sample, Organ) %>%
dplyr::count() %>%
ggplot(aes(x = Sample, y = n)) +
geom_col(aes(fill = Organ)) +
geom_text(aes(label = n), angle = 90, hjust = -0.2) +
ggtitle("Number of cells per sample") +
scale_fill_manual(values = cmap_organ) +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 30000)) +
hide_legend() +
rotate_x()# ggsave(paste0(figout, "n_cells_per_sample.pdf"), width = 13, height = 5)cell_meta_rna %>%
ggplot(aes(x = Sample, y = nCount_RNA)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("Number of UMIs per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 20000)) +
hide_legend()# ggsave(paste0(figout, "n_UMIs_per_sample.pdf"), width = 13, height = 5)cell_meta_rna %>%
ggplot(aes(x = Sample, y = nFeature_RNA)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("Number of genes per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 7000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 1000)) +
hide_legend()# ggsave(paste0(figout, "n_genes_per_sample.pdf"), width = 13, height = 5)cell_meta_atac %>%
ggplot(aes(x = Sample, y = TSSEnrichment)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
geom_hline(yintercept = c(5, 10, 15), color = "red", linetype = 2) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("TSS enrichment per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 25)) +
hide_legend()# ggsave(paste0(figout, "TSS_enrichment_ATAC_per_sample.pdf"), width = 13, height = 5)cell_meta_atac %>%
ggplot(aes(x = Sample, y = nFrags)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
geom_hline(yintercept = c(0, 2500, 5000, 7500), color = "red", linetype = 2) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("Number of fragments per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 30000)) +
scale_y_continuous(breaks = seq(0, 30000, by = 2500)) +
hide_legend()# ggsave(paste0(figout, "n_fragments_ATAC_per_sample.pdf"), width = 13, height = 5)cell_meta_rna %>%
ggplot(aes(x = Cluster)) +
geom_bar(aes(fill = PCW), position = "fill") +
scale_fill_manual(values = cmap_pcw) +
ggtitle("PCW") +
rotate_x() +
ggtitle("Cluster breakdown by age") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
theme(legend.position = "bottom", panel.border = element_blank()) +
guides(fill = guide_legend(nrow = 1))# ggsave(paste0(figout, "breakdown_by_age_per_cluster.pdf"), width = 20, height = 6)# get a cluster order
cell_meta_rna <- cell_meta_rna %>%
arrange(Organ, Clusters) %>%
mutate(Cluster = factor(Cluster, levels = unique(.$Cluster)))
cell_meta_rna %>%
group_by(Cluster, Organ) %>%
dplyr::count() %>%
ggplot(aes(x = Cluster, y = n)) +
geom_col(aes(fill = Organ)) +
geom_text(aes(label = n), angle = 90, hjust = -0.2) +
ggtitle("Number of cells per cluster") +
scale_fill_manual(values = cmap_organ) +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 30000)) +
hide_legend() +
rotate_x()# ggsave(paste0(figout, "n_cells_per_cluster.pdf"), width = 20, height = 5)cell_meta_rna %>%
ggplot(aes(x = Cluster, y = nCount_RNA)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
geom_hline(yintercept = c(2500, 5000, 7500), color = "red", linetype = 2) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("Number of UMIs per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 25000)) +
hide_legend()# ggsave(paste0(figout, "n_UMIs_RNA_per_cluster.pdf"), width = 20, height = 5)cell_meta_rna %>%
ggplot(aes(x = Cluster, y = nFeature_RNA)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
geom_hline(yintercept = c(1000, 2000, 3000), color = "red", linetype = 2) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("Number of genes per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 7000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 1000)) +
hide_legend()# ggsave(paste0(figout, "n_genes_RNA_per_cluster.pdf"), width = 20, height = 5)cell_meta_atac %>%
ggplot(aes(x = Cluster, y = TSSEnrichment)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
geom_hline(yintercept = c(5, 10, 15), color = "red", linetype = 2) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("TSS enrichment per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 25)) +
hide_legend()# ggsave(paste0(figout, "TSS_enrichment_ATAC_per_cluster.pdf"), width = 20, height = 5)cell_meta_atac %>%
ggplot(aes(x = Cluster, y = nFrags)) +
geom_boxplot(aes(fill = Organ), outlier.shape = NA) +
geom_hline(yintercept = c(0, 2500, 5000, 7500), color = "red", linetype = 2) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("Number of fragments per cell") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(0, 30000)) +
scale_y_continuous(breaks = seq(0, 30000, by = 2500)) +
hide_legend()# ggsave(paste0(figout, "n_fragments_ATAC_per_cluster.pdf"), width = 20, height = 5)Total fragments per cluster:
anno <- tibble::tribble(~Organ, ~y, ~Label,
"Adrenal", log10(2500000), "2.5M",
"Adrenal", log10(5000000), "5M",
"Adrenal", log10(10000000), "10M")
cell_meta_atac %>%
dplyr::group_by(Organ, Cluster) %>%
dplyr::summarize(Total_fragments = sum(nFrags)) %>%
ggplot(aes(x = Cluster, y = log10(Total_fragments))) +
geom_col(aes(fill = Organ)) +
# label 2.5M, 5M, and 10M fragments
geom_hline(yintercept = c(log10(10000000), log10(5000000), log10(2500000)), color = "red", linetype = 2) +
geom_text(data = anno, aes(x = 1, y = y, label = Label), hjust = -1) +
scale_fill_manual(values = cmap_organ) +
rotate_x() +
ggtitle("Log10 Total fragments per cluster") +
facet_grid(. ~ Organ, scales = "free_x", space = "free") +
hide_legend()## `summarise()` has grouped output by 'Organ'. You can override using the
## `.groups` argument.
# ggsave(paste0(figout, "total_n_fragments_ATAC_per_cluster.pdf"), width = 20, height = 5)Collect metadata from Shendure Lab Fetal atlases:
shendure_rna <- readr::read_csv(here::here("data/external/Domcke_Cao_metadata/ShendureLab_HumanFetal_RNA_Metadata.csv"))## Rows: 121 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Sample_ID, Fetus ID, Organ, Sex, Downstream_analysis, Assay, Comment
## dbl (8): Estimated age (days), Cell number, mean_mRNA_count, median_mRNA_cou...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shendure_atac <- readr::read_csv(here::here("data/external/Domcke_Cao_metadata/ShendureLab_HumanFetal_ATAC_Metadata.csv"))## Rows: 72 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Sample, Donor_id, Sex, Tissue, Batch, Included_in_downstream_analy...
## dbl (19): Day_of_pregnancy, Cell_threshold, Fraction_HS, Fraction_TSS, Media...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# change case for ATAC metadata file to match
str_sub(shendure_atac$Tissue, 1, 1) <- str_sub(shendure_atac$Tissue, 1, 1) %>% str_to_upper()
shendure_rna$experiment <- "Cao et al."
shendure_atac$experiment <- "Domcke et al."
per_sample_meta$experiment <- "HDMA"
palette_experiment <- c("HDMA" = "forestgreen", "Cao et al." = "gray70", "Domcke et al." = "gray70")
# Filter organs
# Convert Cerebrum to Brain
shendure_rna$Organ[which(shendure_rna$Organ == "Cerebrum")] <- "Brain"
shendure_atac$Tissue[which(shendure_atac$Tissue == "Cerebrum")] <- "Brain"
# Convert Stomach to StomachEsophagus
shendure_rna$Organ[which(shendure_rna$Organ == "Stomach")] <- "StomachEsophagus"
shendure_atac$Tissue[which(shendure_atac$Tissue == "Stomach")] <- "StomachEsophagus"
filt.shendure_rna <- shendure_rna %>% filter(Organ %in% organs)
filt.shendure_atac <- shendure_atac %>% filter(Tissue %in% organs)
rna.meta.toPlot <- bind_rows(data.frame(Organ = per_sample_meta$organ,
median_ngenes = per_sample_meta$median_ngene,
median_numis = per_sample_meta$median_numi,
experiment = per_sample_meta$experiment),
data.frame(Organ = filt.shendure_rna$Organ,
median_ngenes = filt.shendure_rna$median_gene_count,
median_numis = filt.shendure_rna$median_mRNA_count,
experiment = filt.shendure_rna$experiment))
atac.meta.toPlot <- bind_rows(data.frame(Organ = per_sample_meta$organ,
median_nfrags = per_sample_meta$median_nfrags,
median_tss = per_sample_meta$median_tss,
median_frip = per_sample_meta$median_frip,
experiment = per_sample_meta$experiment),
data.frame(Organ = filt.shendure_atac$Tissue,
median_nfrags = filt.shendure_atac$Median_total_fragments,
median_tss = filt.shendure_atac$TSS_enrichment,
median_frip = filt.shendure_atac$Median_per_cell_FRiP,
experiment = filt.shendure_atac$experiment))Plot QC comparisons:
p1 <- ggplot(rna.meta.toPlot, aes(x = Organ, y = median_numis, fill = experiment)) +
geom_boxplot(outlier.shape = NA) +
scale_fill_manual(values = palette_experiment, name = "Study") +
scale_y_log10() +
theme_bw() +
theme(axis.text.x = element_blank()) +
xlab("") +
ylab("median # UMIs")
p2 <- ggplot(rna.meta.toPlot, aes(x = Organ, y = median_ngenes, fill = experiment)) +
geom_boxplot(outlier.shape = NA) +
scale_fill_manual(values = palette_experiment, name = "Study") +
scale_y_log10() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
xlab("") +
ylab("median # genes")
plot_grid(p1, p2, ncol = 1, rel_heights = c(0.45, 0.55), align = "v", axis = "rl")# ggsave(paste0(figout, "BoxPlot_HDMA_vs_Cao_RNA.pdf"), width = 6, height = 5)p1 <- ggplot(atac.meta.toPlot, aes(x = Organ, y = median_nfrags, fill = experiment)) +
geom_boxplot(outlier.shape = NA) +
scale_fill_manual(values = palette_experiment, name = "Study") +
scale_y_log10() +
theme_bw() +
theme(axis.text.x = element_blank()) +
xlab("") +
ylab("median # fragments")
# p2 <- ggplot(atac.meta.toPlot, aes(x = Organ, y = median_frip, fill = experiment)) +
# geom_boxplot(outlier.shape = NA) +
# scale_fill_manual(values = palette_experiment, name = "Study") +
# scale_y_log10() +
# theme_bw() +
# theme(axis.text.x = element_blank()) +
# xlab("") +
# ylab("median FRiP")
p3 <- ggplot(atac.meta.toPlot, aes(x = Organ, y = median_tss, fill = experiment)) +
geom_boxplot(outlier.shape = NA) +
scale_fill_manual(values = palette_experiment, name = "Study") +
scale_y_log10() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
xlab("") +
ylab("median TSS enrichment")
plot_grid(p1, p3, ncol = 1, rel_heights = c(0.44, 0.55), align = "v", axis = "rl")# ggsave(paste0(figout, "boxPlot_HDMA_vs_Cao_ATAC.pdf"), width = 6, height = 5).libPaths()## [1] "/oak/stanford/groups/wjg/bliu/software/R_lib"
## [2] "/share/software/user/open/R/4.1.2/lib64/R/library"
sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.3.10/lib/libopenblas_haswellp-r0.3.10.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggthemes_4.2.4 RColorBrewer_1.1-3
## [3] DoubletFinder_2.0.3 BPCells_0.2.0
## [5] RcppAlgos_2.7.2 ggrastr_1.0.1
## [7] GenomicFeatures_1.46.5 AnnotationDbi_1.56.2
## [9] here_1.0.1 rhdf5_2.38.1
## [11] SummarizedExperiment_1.24.0 Biobase_2.54.0
## [13] MatrixGenerics_1.6.0 Rcpp_1.0.10
## [15] Matrix_1.5-4 GenomicRanges_1.46.1
## [17] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [19] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [21] matrixStats_0.63.0 plyr_1.8.8
## [23] magrittr_2.0.3 gtable_0.3.3
## [25] gtools_3.9.4 gridExtra_2.3
## [27] ArchR_1.0.2 data.table_1.14.8
## [29] cowplot_1.1.1 SeuratObject_4.1.3
## [31] Seurat_4.3.0 stringr_1.5.0
## [33] tibble_3.2.1 readr_2.1.4
## [35] purrr_1.0.2 scales_1.3.0
## [37] ggplot2_3.5.0 tidyr_1.3.1
## [39] dplyr_1.1.4 glue_1.6.2
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 spatstat.explore_3.1-0 reticulate_1.28
## [4] tidyselect_1.2.0 RSQLite_2.3.1 htmlwidgets_1.6.2
## [7] gmp_0.7-1 BiocParallel_1.28.3 Rtsne_0.16
## [10] munsell_0.5.0 codetools_0.2-18 ica_1.0-3
## [13] DT_0.27 future_1.33.2 miniUI_0.1.1.1
## [16] withr_3.0.0 spatstat.random_3.1-4 colorspace_2.1-0
## [19] progressr_0.13.0 filelock_1.0.2 highr_0.10
## [22] knitr_1.42 ROCR_1.0-11 tensor_1.5
## [25] listenv_0.9.0 labeling_0.4.2 GenomeInfoDbData_1.2.7
## [28] polyclip_1.10-4 farver_2.1.1 bit64_4.0.5
## [31] rprojroot_2.0.3 parallelly_1.35.0 vctrs_0.6.5
## [34] generics_0.1.3 xfun_0.39 BiocFileCache_2.2.1
## [37] R6_2.5.1 ggbeeswarm_0.7.2 bitops_1.0-7
## [40] rhdf5filters_1.6.0 spatstat.utils_3.0-2 cachem_1.0.8
## [43] DelayedArray_0.20.0 vroom_1.6.3 promises_1.2.0.1
## [46] BiocIO_1.4.0 beeswarm_0.4.0 globals_0.16.2
## [49] goftest_1.2-3 rlang_1.1.3 splines_4.1.2
## [52] rtracklayer_1.54.0 lazyeval_0.2.2 spatstat.geom_3.1-0
## [55] yaml_2.3.7 reshape2_1.4.4 abind_1.4-5
## [58] crosstalk_1.2.0 httpuv_1.6.10 tools_4.1.2
## [61] ellipsis_0.3.2 jquerylib_0.1.4 ggridges_0.5.4
## [64] progress_1.2.2 zlibbioc_1.40.0 RCurl_1.98-1.12
## [67] prettyunits_1.1.1 deldir_1.0-6 pbapply_1.7-0
## [70] zoo_1.8-12 ggrepel_0.9.5 cluster_2.1.2
## [73] scattermore_1.0 lmtest_0.9-40 RANN_2.6.1
## [76] fitdistrplus_1.1-11 hms_1.1.3 patchwork_1.2.0
## [79] mime_0.12 evaluate_0.21 xtable_1.8-4
## [82] XML_3.99-0.14 compiler_4.1.2 biomaRt_2.50.3
## [85] KernSmooth_2.23-20 crayon_1.5.2 htmltools_0.5.5
## [88] later_1.3.1 tzdb_0.4.0 DBI_1.1.3
## [91] dbplyr_2.3.2 MASS_7.3-54 rappdirs_0.3.3
## [94] cli_3.6.2 parallel_4.1.2 igraph_1.4.2
## [97] pkgconfig_2.0.3 GenomicAlignments_1.30.0 sp_1.6-0
## [100] plotly_4.10.1 spatstat.sparse_3.0-1 xml2_1.3.4
## [103] vipor_0.4.5 bslib_0.4.2 XVector_0.34.0
## [106] digest_0.6.31 sctransform_0.3.5 RcppAnnoy_0.0.20
## [109] spatstat.data_3.0-1 Biostrings_2.62.0 rmarkdown_2.21
## [112] leiden_0.4.2 uwot_0.1.14 restfulr_0.0.15
## [115] curl_5.0.0 shiny_1.7.4 Rsamtools_2.10.0
## [118] rjson_0.2.21 lifecycle_1.0.3 nlme_3.1-153
## [121] jsonlite_1.8.4 Rhdf5lib_1.16.0 viridisLite_0.4.2
## [124] fansi_1.0.4 pillar_1.9.0 lattice_0.20-45
## [127] KEGGREST_1.34.0 fastmap_1.1.1 httr_1.4.6
## [130] survival_3.2-13 png_0.1-8 bit_4.0.5
## [133] stringi_1.7.12 sass_0.4.6 blob_1.2.4
## [136] memoise_2.0.1 irlba_2.3.5.1 future.apply_1.10.0