1. Establish a working directory.
2. Create a sub-directory in that working directory named "pbmc_10k".
3. Download the three required input files from the following links into the pbmc_10k subdirectory:
https://cf.10xgenomics.com/samples/cell-arc/2.0.0/pbmc_unsorted_10k/pbmc_unsorted_10k_filtered_feature_bc_matrix.h5
https://cf.10xgenomics.com/samples/cell-arc/2.0.0/pbmc_unsorted_10k/pbmc_unsorted_10k_atac_fragments.tsv.gz
https://cf.10xgenomics.com/samples/cell-arc/2.0.0/pbmc_unsorted_10k/pbmc_unsorted_10k_atac_fragments.tsv.gz.tbi
4. Rename the "pbmc_unsorted_10k_filtered_feature_bc_matrix.h5" file to "filtered_feature_bc_matrix.h5"
5. From the working directory established in Step 1, run the following tutorial.
suppressPackageStartupMessages(library(ArchR))
addArchRGenome("hg38")
## Setting default genome to Hg38.
addArchRThreads(10)
## Setting default number of Parallel threads to 10.
#Get Input Fragment Files
inputFiles <- getInputFiles("pbmc_10k")[1]
names(inputFiles) <- "PBMC_10k"
#Create Arrow Files (disabled here)
ArrowFiles <- "PBMC_10k.arrow" #createArrowFiles(inputFiles, force = TRUE)
#ArchRProject
proj <- ArchRProject(ArrowFiles)
## Using GeneAnnotation set by addArchRGenome(Hg38)!
## Using GeneAnnotation set by addArchRGenome(Hg38)!
## Validating Arrows...
## Getting SampleNames...
## 1
## Copying ArrowFiles to Ouptut Directory! If you want to save disk space set copyArrows = FALSE
## 1
## Getting Cell Metadata...
## 1
## Merging Cell Metadata...
## Initializing ArchRProject...
##
## / |
## / \
## . / |.
## \\\ / |.
## \\\ / `|.
## \\\ / |.
## \ / |\
## \\#####\ / ||
## ==###########> / ||
## \\##==......\ / ||
## ______ = =|__ /__ || \\\
## ,--' ,----`-,__ ___/' --,-`-===================##========>
## \ ' ##_______ _____ ,--,__,=##,__ ///
## , __== ___,-,__,--'#' ===' `-' | ##,-/
## -,____,---' \\####\\________________,--\\_##,/
## ___ .______ ______ __ __ .______
## / \ | _ \ / || | | | | _ \
## / ^ \ | |_) | | ,----'| |__| | | |_) |
## / /_\ \ | / | | | __ | | /
## / _____ \ | |\ \\___ | `----.| | | | | |\ \\___.
## /__/ \__\ | _| `._____| \______||__| |__| | _| `._____|
##
#Import scRNA
seRNA <- import10xFeatureMatrix(
input = c("pbmc_10k/filtered_feature_bc_matrix.h5"),
names = c("PBMC_10k")
)
## Importing Feature Matrix 1 of 1
seRNA
## class: RangedSummarizedExperiment
## dim: 36578 11909
## metadata(0):
## assays(1): counts
## rownames(36578): MIR1302-2HG FAM138A ... AC007325.4 AC007325.2
## rowData names(5): feature_type genome id interval name
## colnames(11909): PBMC_10k#AAACAGCCAAGGAATC-1
## PBMC_10k#AAACAGCCAATCCCTT-1 ... PBMC_10k#TTTGTTGGTTGGTTAG-1
## PBMC_10k#TTTGTTGGTTTGCAGA-1
## colData names(0):
#Add scRNA
proj <- addGeneExpressionMatrix(input = proj, seRNA = seRNA, force = TRUE)
## ArchR logging to : ArchRLogs/ArchR-addGeneExpressionMatrix-2741a36d7911b-Date-2020-10-01_Time-13-54-58.log
## If there is an issue, please report to github with logFile!
## Overlap w/ scATAC = 0.981
## 2020-10-01 13:54:59 :
## Overlap Per Sample w/ scATAC : PBMC_10k=11361
## 2020-10-01 13:54:59 :
## 2020-10-01 13:55:02 : Batch Execution w/ safelapply!, 0 mins elapsed.
## 2020-10-01 13:55:03 : Adding PBMC_10k to GeneExpressionMatrix for Chr (1 of 23)!, 0.022 mins elapsed.
## 2020-10-01 13:55:06 : Adding PBMC_10k to GeneExpressionMatrix for Chr (2 of 23)!, 0.068 mins elapsed.
## 2020-10-01 13:55:08 : Adding PBMC_10k to GeneExpressionMatrix for Chr (3 of 23)!, 0.099 mins elapsed.
## 2020-10-01 13:55:10 : Adding PBMC_10k to GeneExpressionMatrix for Chr (4 of 23)!, 0.137 mins elapsed.
## 2020-10-01 13:55:11 : Adding PBMC_10k to GeneExpressionMatrix for Chr (5 of 23)!, 0.164 mins elapsed.
## 2020-10-01 13:55:13 : Adding PBMC_10k to GeneExpressionMatrix for Chr (6 of 23)!, 0.192 mins elapsed.
## 2020-10-01 13:55:15 : Adding PBMC_10k to GeneExpressionMatrix for Chr (7 of 23)!, 0.23 mins elapsed.
## 2020-10-01 13:55:17 : Adding PBMC_10k to GeneExpressionMatrix for Chr (8 of 23)!, 0.259 mins elapsed.
## 2020-10-01 13:55:19 : Adding PBMC_10k to GeneExpressionMatrix for Chr (9 of 23)!, 0.292 mins elapsed.
## 2020-10-01 13:55:21 : Adding PBMC_10k to GeneExpressionMatrix for Chr (10 of 23)!, 0.329 mins elapsed.
## 2020-10-01 13:55:23 : Adding PBMC_10k to GeneExpressionMatrix for Chr (11 of 23)!, 0.363 mins elapsed.
## 2020-10-01 13:55:25 : Adding PBMC_10k to GeneExpressionMatrix for Chr (12 of 23)!, 0.394 mins elapsed.
## 2020-10-01 13:55:28 : Adding PBMC_10k to GeneExpressionMatrix for Chr (13 of 23)!, 0.433 mins elapsed.
## 2020-10-01 13:55:29 : Adding PBMC_10k to GeneExpressionMatrix for Chr (14 of 23)!, 0.459 mins elapsed.
## 2020-10-01 13:55:31 : Adding PBMC_10k to GeneExpressionMatrix for Chr (15 of 23)!, 0.485 mins elapsed.
## 2020-10-01 13:55:33 : Adding PBMC_10k to GeneExpressionMatrix for Chr (16 of 23)!, 0.523 mins elapsed.
## 2020-10-01 13:55:35 : Adding PBMC_10k to GeneExpressionMatrix for Chr (17 of 23)!, 0.552 mins elapsed.
## 2020-10-01 13:55:36 : Adding PBMC_10k to GeneExpressionMatrix for Chr (18 of 23)!, 0.58 mins elapsed.
## 2020-10-01 13:55:39 : Adding PBMC_10k to GeneExpressionMatrix for Chr (19 of 23)!, 0.615 mins elapsed.
## 2020-10-01 13:55:40 : Adding PBMC_10k to GeneExpressionMatrix for Chr (20 of 23)!, 0.646 mins elapsed.
## 2020-10-01 13:55:42 : Adding PBMC_10k to GeneExpressionMatrix for Chr (21 of 23)!, 0.677 mins elapsed.
## 2020-10-01 13:55:44 : Adding PBMC_10k to GeneExpressionMatrix for Chr (22 of 23)!, 0.712 mins elapsed.
## 2020-10-01 13:55:46 : Adding PBMC_10k to GeneExpressionMatrix for Chr (23 of 23)!, 0.737 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addGeneExpressionMatrix-2741a36d7911b-Date-2020-10-01_Time-13-54-58.log
#Filter Cells
proj <- proj[proj$TSSEnrichment > 6 & proj$nFrags > 2500 & !is.na(proj$Gex_nUMI)]
#Doublet Filtration. Currently disabled just for tutorial. If you want to filter doublets uncomment below.
#proj <- addDoubletScores(proj)
#proj <- filterDoublets(proj)
#LSI-ATAC
proj <- addIterativeLSI(
ArchRProj = proj,
clusterParams = list(
resolution = 0.2,
sampleCells = 10000,
n.start = 10
),
saveIterations = FALSE,
useMatrix = "TileMatrix",
depthCol = "nFrags",
name = "LSI_ATAC"
)
## Checking Inputs...
## ArchR logging to : ArchRLogs/ArchR-addIterativeLSI-2741a3e47b68d-Date-2020-10-01_Time-13-55-48.log
## If there is an issue, please report to github with logFile!
## 2020-10-01 13:55:49 : Computing Total Across All Features, 0.001 mins elapsed.
## 2020-10-01 13:55:50 : Computing Top Features, 0.023 mins elapsed.
## ###########
## 2020-10-01 13:55:51 : Running LSI (1 of 2) on Top Features, 0.046 mins elapsed.
## ###########
## 2020-10-01 13:55:51 : Sampling Cells (N = 10000) for Estimated LSI, 0.048 mins elapsed.
## 2020-10-01 13:55:51 : Creating Sampled Partial Matrix, 0.048 mins elapsed.
## 2020-10-01 13:56:27 : Computing Estimated LSI (projectAll = FALSE), 0.64 mins elapsed.
## 2020-10-01 13:57:50 : Identifying Clusters, 2.027 mins elapsed.
## 2020-10-01 13:58:15 : Identified 9 Clusters, 2.44 mins elapsed.
## 2020-10-01 13:58:15 : Creating Cluster Matrix on the total Group Features, 2.44 mins elapsed.
## 2020-10-01 13:58:36 : Computing Variable Features, 2.787 mins elapsed.
## ###########
## 2020-10-01 13:58:36 : Running LSI (2 of 2) on Variable Features, 2.792 mins elapsed.
## ###########
## 2020-10-01 13:58:36 : Creating Partial Matrix, 2.792 mins elapsed.
## 2020-10-01 13:59:16 : Computing LSI, 3.459 mins elapsed.
## 2020-10-01 13:59:56 : Finished Running IterativeLSI, 4.123 mins elapsed.
#LSI-RNA
proj <- addIterativeLSI(
ArchRProj = proj,
clusterParams = list(
resolution = 0.2,
sampleCells = 10000,
n.start = 10
),
saveIterations = FALSE,
useMatrix = "GeneExpressionMatrix",
depthCol = "Gex_nUMI",
varFeatures = 2500,
firstSelection = "variable",
binarize = FALSE,
name = "LSI_RNA"
)
## Checking Inputs...
## ArchR logging to : ArchRLogs/ArchR-addIterativeLSI-2741a11b90423-Date-2020-10-01_Time-13-59-56.log
## If there is an issue, please report to github with logFile!
## 2020-10-01 13:59:57 : Computing Variability Across All Features, 0.004 mins elapsed.
## 2020-10-01 13:59:58 : Computing Variable Features, 0.022 mins elapsed.
## ###########
## 2020-10-01 13:59:59 : Running LSI (1 of 2) on Top Features, 0.046 mins elapsed.
## ###########
## 2020-10-01 14:00:00 : Sampling Cells (N = 10000) for Estimated LSI, 0.047 mins elapsed.
## 2020-10-01 14:00:00 : Creating Sampled Partial Matrix, 0.047 mins elapsed.
## 2020-10-01 14:00:10 : Computing Estimated LSI (projectAll = FALSE), 0.215 mins elapsed.
## 2020-10-01 14:00:22 : Identifying Clusters, 0.415 mins elapsed.
## 2020-10-01 14:00:38 : Identified 10 Clusters, 0.695 mins elapsed.
## 2020-10-01 14:00:38 : Creating Cluster Matrix on the total Group Features, 0.695 mins elapsed.
## 2020-10-01 14:00:49 : Computing Variable Features, 0.878 mins elapsed.
## ###########
## 2020-10-01 14:00:49 : Running LSI (2 of 2) on Variable Features, 0.879 mins elapsed.
## ###########
## 2020-10-01 14:00:49 : Creating Partial Matrix, 0.879 mins elapsed.
## 2020-10-01 14:00:58 : Computing LSI, 1.023 mins elapsed.
## 2020-10-01 14:01:08 : Finished Running IterativeLSI, 1.181 mins elapsed.
#Combined Dims
proj <- addCombinedDims(proj, reducedDims = c("LSI_ATAC", "LSI_RNA"), name = "LSI_Combined")
#UMAPs
proj <- addUMAP(proj, reducedDims = "LSI_ATAC", name = "UMAP_ATAC", minDist = 0.8, force = TRUE)
## 14:01:08 UMAP embedding parameters a = 0.2321 b = 1.681
## 14:01:08 Read 10887 rows and found 30 numeric columns
## 14:01:08 Using Annoy for neighbor search, n_neighbors = 40
## 14:01:08 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:01:10 Writing NN index file to temp file /tmp/RtmpCRJ6Ub/file2741a222963ac
## 14:01:10 Searching Annoy index using 5 threads, search_k = 4000
## 14:01:12 Annoy recall = 100%
## 14:01:13 Commencing smooth kNN distance calibration using 5 threads
## 14:01:14 Initializing from normalized Laplacian + noise
## 14:01:15 Commencing optimization for 200 epochs, with 628910 positive edges
## 14:01:27 Optimization finished
proj <- addUMAP(proj, reducedDims = "LSI_RNA", name = "UMAP_RNA", minDist = 0.8, force = TRUE)
## 14:01:29 UMAP embedding parameters a = 0.2321 b = 1.681
## 14:01:29 Read 10887 rows and found 30 numeric columns
## 14:01:29 Using Annoy for neighbor search, n_neighbors = 40
## 14:01:29 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:01:32 Writing NN index file to temp file /tmp/RtmpCRJ6Ub/file2741a6f02b180
## 14:01:32 Searching Annoy index using 5 threads, search_k = 4000
## 14:01:34 Annoy recall = 100%
## 14:01:35 Commencing smooth kNN distance calibration using 5 threads
## 14:01:36 Initializing from normalized Laplacian + noise
## 14:01:37 Commencing optimization for 200 epochs, with 609940 positive edges
## 14:01:50 Optimization finished
proj <- addUMAP(proj, reducedDims = "LSI_Combined", name = "UMAP_Combined", minDist = 0.8, force = TRUE)
## 14:01:51 UMAP embedding parameters a = 0.2321 b = 1.681
## 14:01:51 Read 10887 rows and found 60 numeric columns
## 14:01:51 Using Annoy for neighbor search, n_neighbors = 40
## 14:01:51 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:01:54 Writing NN index file to temp file /tmp/RtmpCRJ6Ub/file2741a549681d4
## 14:01:54 Searching Annoy index using 5 threads, search_k = 4000
## 14:01:56 Annoy recall = 100%
## 14:01:57 Commencing smooth kNN distance calibration using 5 threads
## 14:01:59 Initializing from normalized Laplacian + noise
## 14:02:00 Commencing optimization for 200 epochs, with 623338 positive edges
## 14:02:12 Optimization finished
#Add Clusters
proj <- addClusters(proj, reducedDims = "LSI_Combined", name = "Clusters", resolution = 0.4, force = TRUE)
## ArchR logging to : ArchRLogs/ArchR-addClusters-2741a3e3618eb-Date-2020-10-01_Time-14-02-14.log
## If there is an issue, please report to github with logFile!
## 2020-10-01 14:02:15 : Running Seurats FindClusters (Stuart et al. Cell 2019), 0.003 mins elapsed.
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10887
## Number of edges: 456672
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9356
## Number of communities: 20
## Elapsed time: 1 seconds
## 2020-10-01 14:02:38 : Testing Biased Clusters, 0.394 mins elapsed.
## 2020-10-01 14:02:38 : Identified Biased Clusters (n = 1), set filterBias = TRUE to re-assign these cells: , 0.4 mins elapsed.
## Biased Clusters : Cluster1
## 2020-10-01 14:02:38 : Testing Outlier Clusters, 0.4 mins elapsed.
## 2020-10-01 14:02:38 : Assigning Cluster Names to 20 Clusters, 0.4 mins elapsed.
## 2020-10-01 14:02:38 : Finished addClusters, 0.402 mins elapsed.
#Plot Embedding
p1 <- plotEmbedding(proj, name = "Clusters", embedding = "UMAP_ATAC", size = 1.5, labelAsFactors=F, labelMeans=F)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-2741a252bdc25-Date-2020-10-01_Time-14-02-38.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-2741a252bdc25-Date-2020-10-01_Time-14-02-38.log
p2 <- plotEmbedding(proj, name = "Clusters", embedding = "UMAP_RNA", size = 1.5, labelAsFactors=F, labelMeans=F)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-2741a1cb61f99-Date-2020-10-01_Time-14-02-39.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-2741a1cb61f99-Date-2020-10-01_Time-14-02-39.log
p3 <- plotEmbedding(proj, name = "Clusters", embedding = "UMAP_Combined", size = 1.5, labelAsFactors=F, labelMeans=F)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-2741a59425da1-Date-2020-10-01_Time-14-02-40.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-2741a59425da1-Date-2020-10-01_Time-14-02-40.log
#Print Plots
p1
p2
p3
#Save Plot
plotPDF(p1, p2, p3, name = "UMAP-scATAC-scRNA-Combined", addDOC = FALSE)
## Plotting Ggplot!
## Plotting Ggplot!
## Plotting Ggplot!
#Print Session Info
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.2.19/lib/libopenblasp-r0.2.19.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 Seurat_3.1.2
## [3] BSgenome.Hsapiens.UCSC.hg38_1.4.1 BSgenome_1.54.0
## [5] rtracklayer_1.46.0 Biostrings_2.54.0
## [7] XVector_0.26.0 ArchR_1.0.0
## [9] magrittr_1.5 rhdf5_2.30.1
## [11] Matrix_1.2-17 data.table_1.12.8
## [13] SummarizedExperiment_1.16.1 DelayedArray_0.12.3
## [15] BiocParallel_1.20.1 matrixStats_0.56.0
## [17] Biobase_2.46.0 GenomicRanges_1.38.0
## [19] GenomeInfoDb_1.22.1 IRanges_2.20.2
## [21] S4Vectors_0.24.4 BiocGenerics_0.32.0
## [23] ggplot2_3.3.2
##
## loaded via a namespace (and not attached):
## [1] sn_1.5-5 plyr_1.8.6 igraph_1.2.4.2
## [4] lazyeval_0.2.2 splines_3.6.1 listenv_0.8.0
## [7] TH.data_1.0-10 digest_0.6.25 htmltools_0.4.0
## [10] gdata_2.18.0 cluster_2.1.0 ROCR_1.0-7
## [13] globals_0.12.5 RcppParallel_4.4.4 R.utils_2.9.2
## [16] sandwich_2.5-1 colorspace_1.4-1 rappdirs_0.3.1
## [19] ggrepel_0.8.1 xfun_0.12 dplyr_1.0.1
## [22] crayon_1.3.4 RCurl_1.98-1.1 jsonlite_1.6.1
## [25] survival_2.44-1.1 zoo_1.8-7 ape_5.3
## [28] glue_1.4.0 gtable_0.3.0 zlibbioc_1.32.0
## [31] leiden_0.3.2 Rhdf5lib_1.8.0 future.apply_1.4.0
## [34] scales_1.1.0 mvtnorm_1.0-12 bibtex_0.4.2.2
## [37] Rcpp_1.0.4 metap_1.3 plotrix_3.7-7
## [40] viridisLite_0.3.0 reticulate_1.16 rsvd_1.0.2
## [43] SDMTools_1.1-221.2 tsne_0.1-3 htmlwidgets_1.5.1
## [46] httr_1.4.1 gplots_3.0.1.2 RColorBrewer_1.1-2
## [49] TFisher_0.2.0 ellipsis_0.3.0 ica_1.0-2
## [52] farver_2.0.3 pkgconfig_2.0.3 XML_3.99-0.3
## [55] R.methodsS3_1.8.0 uwot_0.1.5 labeling_0.3
## [58] tidyselect_1.1.0 rlang_0.4.7 reshape2_1.4.3
## [61] munsell_0.5.0 tools_3.6.1 generics_0.0.2
## [64] ggridges_0.5.2 evaluate_0.14 stringr_1.4.0
## [67] yaml_2.2.1 npsurv_0.4-0 knitr_1.27
## [70] fitdistrplus_1.0-14 caTools_1.18.0 purrr_0.3.3
## [73] RANN_2.6.1 pbapply_1.4-2 future_1.16.0
## [76] nlme_3.1-140 R.oo_1.23.0 compiler_3.6.1
## [79] plotly_4.9.1 png_0.1-7 lsei_1.2-0
## [82] tibble_3.0.3 stringi_1.4.6 RSpectra_0.16-0
## [85] lattice_0.20-38 multtest_2.42.0 vctrs_0.3.2
## [88] mutoss_0.1-12 pillar_1.4.3 lifecycle_0.2.0
## [91] Rdpack_0.11-1 lmtest_0.9-37 RcppAnnoy_0.0.14
## [94] cowplot_1.0.0 bitops_1.0-6 irlba_2.3.3
## [97] gbRd_0.4-11 R6_2.4.1 KernSmooth_2.23-15
## [100] codetools_0.2-16 MASS_7.3-51.4 gtools_3.8.2
## [103] withr_2.1.2 GenomicAlignments_1.22.1 sctransform_0.2.1
## [106] Rsamtools_2.2.3 mnormt_1.5-5 multcomp_1.4-12
## [109] GenomeInfoDbData_1.2.2 tidyr_1.0.2 rmarkdown_2.1
## [112] Cairo_1.5-10 Rtsne_0.15 numDeriv_2016.8-1.1