Chapter 9 Appendix

9.1 Protocols

The following protocols describe how this package can be used to perform the data analysis shown in the original CellTrails article.

9.1.1 Chicken E15 Utricle Data

# Load expression data
bundle <- readRDS(system.file("exdata", "bundle.rds", package="CellTrails"))

# Manifold Learning
se <- embedSamples(bundle)
d <- findSpectrum(se$eigenvalues, frac=100) #Similar to Figure 1E
latentSpace(bundle) <- se$components[, d]

# Clustering
states(bundle) <- findStates(bundle, min_size=0.01, 
                             min_feat=5, max_pval=1e-04, min_fc=2)

# Sample Ordering
bundle <- connectStates(bundle, l=10)
showTrajInfo(bundle)

bundle <- selectTrajectory(bundle, component=1)
bundle <- fitTrajectory(bundle)

# CellTrails maps
# Please note: For illustration purposes, the layout was 
# computed in yEd using functions write.ygraphml and 
# read.ygraphml, and is part of the CellTrails package
tl <- read.ygraphml(system.file("exdata", "bundle.graphml", 
                                package="CellTrails"))
trajLayout(bundle, adjust=TRUE) <- tl

# Define subtrail by adding a user-defined landmark
userLandmarks(bundle) <- "Cell-8-57"

# Analysis of Expression Dynamics
bundle <- addTrail(bundle, from="B3", to="H9", name="TrS")
bundle <- addTrail(bundle, from="B3", to="H1", name="TrES")
bundle <- addTrail(bundle, from="B3", to="U1", name="TrES*")

# Inter-trail comparison (similar to Figure 5B)
rmsd_all <- contrastTrailExpr(bundle, trail_names=c("TrS", "TrES"))
rmsd_all <- rmsd_all[rmsd_all > 0]
sort(rmsd_all[scale(log(rmsd_all)) > 1.65])

# -------------------------------
# Visualizations
# -------------------------------
# Plot size of clusters (similar to Figure 2E)
plotStateSize(bundle)

# Plot expression distribution
plotStateExpression(bundle, feature_name="OTOA")
plotStateExpression(bundle, feature_name="ATOH1")
plotStateExpression(bundle, feature_name="CALB2")
plotStateExpression(bundle, feature_name="ATP2B2")

# Plot manifold (similar to Figure S4F)
set.seed(1101)
gp <- plotManifold(bundle, color_by="phenoName", name="state")
manifold2D(bundle) <- gp
plotManifold(bundle, color_by="phenoName", name="fm143")
plotManifold(bundle, color_by="featureName", name="OTOA")
plotManifold(bundle, color_by="featureName", name="CALB2")

# Plot state trajectory graph (similar to Figure 1G)
plotStateTrajectory(bundle, color_by="phenoName", 
                    name="fm143", point_size=1.5, 
                    label_offset=4, component=1)
plotStateTrajectory(bundle, color_by="phenoName", 
                    name="origin", point_size=1.5, 
                    label_offset=4, component=1)
plotStateTrajectory(bundle, color_by="featureName", 
                    name="OTOA", point_size=5, 
                    label_offset=4, component=1)
plotStateTrajectory(bundle, color_by="featureName", 
                    name="ATOH1", point_size=5, 
                    label_offset=4, component=1)
plotStateTrajectory(bundle, color_by="featureName", 
                    name="CALB2", point_size=5, 
                    label_offset=4, component=1)

# Plot trajectory fit (similar to Figure 2E)
plotTrajectoryFit(bundle)

# Plot CellTrails maps (similar to Figure 3 and Table S2)
plotMap(bundle, color_by="phenoName", name="fm143")
plotMap(bundle, color_by="featureName", 
        name="ATOH1", type="raw")
plotMap(bundle, color_by="featureName", 
        name="ATOH1", type="surface.fit")
plotMap(bundle, color_by="featureName", 
        name="ATOH1", type="surface.se")
plotMap(bundle, color_by="featureName", 
        name="ATOH1", type="surface.fit", samples_only=TRUE)

# Plot landmarks
plotMap(bundle, color_by="phenoName", name="landmark")

# Plot trails (similar to Figure 4K)
plotTrail(bundle, name="TrS")
plotTrail(bundle, name="TrES")
plotTrail(bundle, name="TrES*")

# Plot single dynamics (similar to Figure 4B,G and Table S2)
plotDynamic(bundle, feature_name="CALB2", trail_name="TrES")
plotDynamic(bundle, feature_name="ATP2B2", trail_name="TrES")

# Compare dynamics (similar to Figure 6A)
plotDynamic(bundle, 
            feature_name=c("TECTA", "OTOA", "ATOH1", "POU4F3", 
                           "MYO7A", "CALB2", "SYN3", "SKOR2",
                           "ATP2B2", "LOXHD1", "MYO3A", "TMC2", 
                           "TNNC2"), trail_name="TrES")

plotDynamic(bundle, 
            feature_name=c("TECTA", "OTOA", "ATOH1", "POU4F3", 
                           "MYO7A", "CALB2", "SYN3", "SKOR2",
                           "ATP2B2", "LOXHD1", "MYO3A", "TMC2", 
                           "TNNC2"), trail_name="TrS")

9.1.2 Mouse P1 Utricle Data

In the following, we provide a protocol to analyze the publicly-available dataset containing single-cell RNASeq measurements of 14,313 genes in 120 cells from P1 mouse utricles (GEO accession code: GSE71982). Experimental metadata was generated during cell sorting (GFP and tdTomato fluorescence indicating major cell types). The processed data (mmu_p1_utricle.rda) can be downloaded as SingleCellExperiment object from here. The trajectory layout (mmu_p1_utricle.graphml) can be downloaded from here.

If you use this dataset for your research, please cite the original work by Burns et al. (2015).

# Load expression data
p1utricle <- readRDS("mmu_p1_utricle.rds")

# Feature Selection
trajFeatureNames(p1utricle) <- filterTrajFeaturesByDL(p1utricle, threshold=2)
trajFeatureNames(p1utricle) <- filterTrajFeaturesByCOV(p1utricle, threshold=.5)
trajFeatureNames(p1utricle) <- filterTrajFeaturesByFF(p1utricle)

showTrajInfo(p1utricle)

# Manifold Learning
se <- embedSamples(p1utricle)
d <- findSpectrum(se$eigenvalues)
latentSpace(p1utricle) <- se$components[, d]

# Clustering (parameters account for low sample size)
states(p1utricle) <- findStates(p1utricle, max_pval=1e-3, min_feat=2)

# Sample Ordering
p1utricle <- connectStates(p1utricle)
p1utricle <- fitTrajectory(p1utricle)
showTrajInfo(p1utricle)

# CellTrails maps
tl <- read.ygraphml("mmu_p1_utricle.graphml")
trajLayout(p1utricle) <- tl

# Analysis of Expression Dynamics
p1utricle <- addTrail(p1utricle, from="H1", to="H3", name="Tr1")
p1utricle <- addTrail(p1utricle, from="H1", to="H2", name="Tr2")

# Inter-trail comparison
rmsd_all <- contrastTrailExpr(p1utricle, trail_names=c("Tr1", "Tr2"))
rmsd_all <- rmsd_all[rmsd_all > 0]
sort(rmsd_all[scale(log(rmsd_all)) > 1.65])

# Alternative: using parallel computing using doSNOW
library(doSNOW)
cpu.cl <- makeCluster(parallel::detectCores() * 2)
registerDoSNOW(cpu.cl)

fnames <- featureNames(p1utricle)
all_rmsd <- foreach(i=seq_along(fnames), .combine=rbind)  %dopar%  {
  g <- fnames[i]
  CellTrails::contrastTrailExpr(p1utricle, feature_name=g, 
                                trail_names=c("Tr1", "Tr2"), score="RMSD")
}
stopCluster(cpu.cl)
all_rmsd <- all_rmsd[, 1]
names(all_rmsd) <- fnames
all_rmsd <- all_rmsd[all_rmsd > 0]
zscores <- scale(log(all_rmsd))
sort(all_rmsd[zscores > 1.65])

# -------------------------------
# Visualizations
# -------------------------------
# Plot size of clusters (similar to Figure 7A)
plotStateSize(p1utricle)

# Plot expression distribution
plotStateExpression(p1utricle, feature_name="Otoa")
plotStateExpression(p1utricle, feature_name="Atoh1")
plotStateExpression(p1utricle, feature_name="Sox2")

# Plot manifold
set.seed(1101)
gp <- plotManifold(p1utricle, color_by="phenoName", name="state")
manifold2D(p1utricle) <- gp
plotManifold(p1utricle, color_by="phenoName", name="gate")
plotManifold(p1utricle, color_by="featureName", name="Otoa")
plotManifold(p1utricle, color_by="featureName", name="Fscn2")

# Plot state trajectory graph (similar to Figure 1G)
plotStateTrajectory(p1utricle, color_by="phenoName", 
                    name="gate", point_size=1.5, 
                    label_offset=4, component=1)
plotStateTrajectory(p1utricle, color_by="featureName", 
                    name="Otoa", point_size=5, 
                    label_offset=4, component=1)

# Plot trajectory fit (similar to Figure 7A)
plotTrajectoryFit(p1utricle)

# Plot CellTrails maps (similar to Figure 7C-E)
plotMap(p1utricle, color_by="phenoName", name="gate")
plotMap(p1utricle, color_by="featureName", 
        name="Atoh1", type="raw")
plotMap(p1utricle, color_by="featureName", 
        name="Atoh1", type="surface.fit")
plotMap(p1utricle, color_by="featureName", 
        name="Atoh1", type="surface.se")
plotMap(p1utricle, color_by="featureName", 
        name="Atoh1", type="surface.fit", 
        samples_only=TRUE)

# Plot landmarks
plotMap(p1utricle, color_by="phenoName", name="landmark")

# Plot trails (similar to Figure 7F)
plotTrail(p1utricle, name="Tr1")
plotTrail(p1utricle, name="Tr2")

# Plot single dynamics (similar to Figure 7I)
plotDynamic(p1utricle, feature_name="Fgf21", trail_name="Tr2")
plotDynamic(p1utricle, feature_name="Fgf21", trail_name="Tr1")

# Compare dynamics
plotDynamic(p1utricle, feature_name=c("Fscn1", "Fscn2"), trail_name="Tr2")

9.2 Runtime

In this section, we illustrate that a CellTrails analysis can be performed in a reasonable period of time. The elapsed computational runtime of each function was measured on a MacBook Pro (Early 2015) with a 3.1 GHz Intel Core i7 processor, 16 GB 1867 MHz DDR3 RAM, and an Intel Iris Graphics 6100 1536 MB graphics card.

9.2.1 Protocol: Chicken E15 Utricle Data

This dataset consists of 183 features and 1,008 samples. The computation time of the whole protocol as listed above took less then two minutes. Let’s assume that computing the layout takes about two minutes (starting yEd, running the layouter, saving the file), then the total runtime is up to five minutes.

9.2.2 Protocol: Mouse P1 Utricle Data

This dataset consists of 14,313 features and 120 samples. The computation time of the whole protocol as listed above (with parallelization of the inter-trail dynamics comparison) took less then seven minutes. Let’s assume that computing the layout takes about two minutes (starting yEd, running the layouter, saving the file), then the total runtime is up to 10 minutes.

9.3 Session Info

This manual was created with yEd version 3.14.4.
The R session and the system used to compile this document are listed below.

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.4
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] CellTrails_0.99.14         SingleCellExperiment_1.0.0
##  [3] SummarizedExperiment_1.8.0 DelayedArray_0.4.1        
##  [5] matrixStats_0.52.2         Biobase_2.38.0            
##  [7] GenomicRanges_1.30.0       GenomeInfoDb_1.14.0       
##  [9] IRanges_2.12.0             S4Vectors_0.16.0          
## [11] BiocGenerics_0.24.0        BiocStyle_2.6.1           
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.1.2         Hmisc_4.0-3            
##   [3] RcppEigen_0.3.3.3.1     plyr_1.8.4             
##   [5] igraph_1.1.2            lazyeval_0.2.1         
##   [7] sp_1.2-5                shinydashboard_0.6.1   
##   [9] splines_3.4.3           BiocParallel_1.12.0    
##  [11] ggplot2_3.0.0           scater_1.6.1           
##  [13] digest_0.6.12           htmltools_0.3.6        
##  [15] viridis_0.4.0           magrittr_1.5           
##  [17] checkmate_1.8.5         memoise_1.1.0          
##  [19] cluster_2.0.6           limma_3.34.3           
##  [21] xts_0.10-0              prettyunits_1.0.2      
##  [23] colorspace_1.3-2        blob_1.1.0             
##  [25] ggrepel_0.7.0           xfun_0.1               
##  [27] dplyr_0.7.4             RCurl_1.95-4.8         
##  [29] tximport_1.6.0          EnvStats_2.3.0         
##  [31] lme4_1.1-14             bindr_0.1              
##  [33] survival_2.41-3         zoo_1.8-0              
##  [35] glue_1.2.0              gtable_0.2.0           
##  [37] zlibbioc_1.24.0         XVector_0.18.0         
##  [39] MatrixModels_0.4-1      cba_0.2-19             
##  [41] car_2.1-6               kernlab_0.9-25         
##  [43] prabclus_2.2-6          DEoptimR_1.0-8         
##  [45] SparseM_1.77            VIM_4.7.0              
##  [47] scales_0.5.0            mvtnorm_1.0-6          
##  [49] DBI_0.7                 edgeR_3.20.1           
##  [51] Rcpp_0.12.14            dtw_1.18-1             
##  [53] laeken_0.4.6            viridisLite_0.2.0      
##  [55] xtable_1.8-2            progress_1.1.2         
##  [57] htmlTable_1.11.0        foreign_0.8-69         
##  [59] bit_1.1-12              proxy_0.4-20           
##  [61] mclust_5.4              Formula_1.2-2          
##  [63] DT_0.2                  vcd_1.4-4              
##  [65] htmlwidgets_0.9         FNN_1.1                
##  [67] RColorBrewer_1.1-2      fpc_2.1-10             
##  [69] acepack_1.4.1           modeltools_0.2-21      
##  [71] pkgconfig_2.0.1         XML_3.98-1.9           
##  [73] flexmix_2.3-14          nnet_7.3-12            
##  [75] locfit_1.5-9.1          dynamicTreeCut_1.63-1  
##  [77] labeling_0.3            rlang_0.2.1            
##  [79] reshape2_1.4.3          later_0.7.3            
##  [81] AnnotationDbi_1.40.0    munsell_0.4.3          
##  [83] tools_3.4.3             RSQLite_2.0            
##  [85] evaluate_0.10.1         stringr_1.2.0          
##  [87] maptree_1.4-7           yaml_2.1.16            
##  [89] knitr_1.20              bit64_0.9-7            
##  [91] robustbase_0.92-8       purrr_0.2.4            
##  [93] dendextend_1.6.0        bindrcpp_0.2           
##  [95] nlme_3.1-131            quantreg_5.34          
##  [97] whisker_0.3-2           mime_0.5               
##  [99] scran_1.6.6             biomaRt_2.34.0         
## [101] pbkrtest_0.4-7          compiler_3.4.3         
## [103] rstudioapi_0.7          curl_3.1               
## [105] beeswarm_0.2.3          e1071_1.6-8            
## [107] smoother_1.1            tibble_1.3.4           
## [109] statmod_1.4.30          stringi_1.1.6          
## [111] lattice_0.20-35         trimcluster_0.1-2      
## [113] Matrix_1.2-12           nloptr_1.0.4           
## [115] lmtest_0.9-35           data.table_1.10.4-3    
## [117] bitops_1.0-6            httpuv_1.4.5           
## [119] R6_2.2.2                latticeExtra_0.6-28    
## [121] bookdown_0.7            promises_1.0.1         
## [123] gridExtra_2.3           vipor_0.4.5            
## [125] boot_1.3-20             MASS_7.3-47            
## [127] assertthat_0.2.0        destiny_2.6.1          
## [129] rhdf5_2.22.0            rprojroot_1.2          
## [131] rjson_0.2.15            withr_2.1.0            
## [133] GenomeInfoDbData_0.99.1 diptest_0.75-7         
## [135] mgcv_1.8-22             grid_3.4.3             
## [137] rpart_4.1-11            minqa_1.2.4            
## [139] tidyr_0.7.2             class_7.3-14           
## [141] rmarkdown_1.8           Rtsne_0.13             
## [143] TTR_0.23-2              shiny_1.1.0            
## [145] base64enc_0.1-3         ggbeeswarm_0.6.0

References

Burns, JC, MC Kelly, M Hoa, RJ Morell, and MW Kelley. 2015. “Single-Cell Rna-Seq Resolves Cellular Complexity in Sensory Organs from the Neonatal Inner Ear.” Nature Communications 15 (6): 8557.