Recovery and inconsistency rates

Load libraries

> library(limma)
> library(edgeR)
> library(DESeq2)
> library(baySeq)
> library(RColorBrewer)
> library(VennDiagram)

Load saved DE results

> files <- list.files(path="./Output", full.name=TRUE)
> keep <- intersect(grep("genes_removed", files, invert=TRUE), grep("fitmixture", files, invert=TRUE))
> files <- files[keep]
> methods <- gsub(".RData", "", strsplit2(files, split="/")[,3])
> for (i in 1:length(files)) {
+   load(files[i])
+   assign(methods[i], table)
+ }

Create sets of reference genes

> cutoff <- 0.05
> reference <- as.list(rep(NA, length(methods)))
> names(reference) <- methods
> for (m in methods) {
+   this.reference <- get(m)[["100vs000"]]
+   this.reference <- this.reference$GeneID[this.reference$FDR<cutoff]
+   reference[[m]] <- this.reference
+ } 

Calculate recovery and inconsistency rates

> comparisons <- c("100vs000", "075vs025", "050vs025", "075vs050", "100vs000.D", "075vs025.D", "050vs025.D", "075vs050.D")
> recovery <- matrix(NA, nrow=length(comparisons), ncol=length(methods))
> rownames(recovery) <- comparisons
> colnames(recovery) <- methods
> inconsistency <- truepositives <- falsediscoveries <- recovery
> for (m in methods) {
+   this.reference <- reference[[m]]
+   for (c in comparisons) {
+       results <- get(m)[[c]]
+       results.detected <- results$GeneID[results$FDR<cutoff]
+       tp <- sum(results.detected %in% this.reference)
+       recovery[c,m] <- tp/length(this.reference)
+       nd <- length(results.detected)
+       fd <- sum(!results.detected %in% this.reference)
+       if (nd==0) {
+         inconsistency[c,m] <- 0
+       } else {
+         inconsistency[c,m] <- fd/nd  
+       }
+       falsediscoveries[c,m] <- fd
+       truepositives[c,m] <- tp    
+   }
+ } 
> recovery
           bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw   voom
100vs000      1.0000       1.000   1.00         1.000     1.000    1.00 1.0000
075vs025      0.6244       0.615   0.80         0.795     0.791    0.82 0.8005
050vs025      0.1015       0.000   0.33         0.316     0.313    0.39 0.2998
075vs050      0.1201       0.000   0.35         0.368     0.367    0.37 0.3413
100vs000.D    0.6805       0.992   0.81         0.774     0.776    0.82 0.7642
075vs025.D    0.1973       0.520   0.47         0.383     0.387    0.57 0.3710
050vs025.D    0.0065       0.118   0.18         0.040     0.041    0.15 0.0000
075vs050.D    0.0081       0.002   0.13         0.035     0.036    0.13 0.0067
> inconsistency
           bayseq_nb bayseq_norm  deseq2 edger_classic edger_glm voom_qw    voom
100vs000      0.0000        0.00 0.00000       0.00000   0.00000  0.0000 0.00000
075vs025      0.0038        0.22 0.00954       0.00949   0.00860  0.0170 0.01307
050vs025      0.0000        0.00 0.00000       0.00030   0.00000  0.0026 0.00031
075vs050      0.0000        0.00 0.00055       0.00128   0.00051  0.0012 0.00163
100vs000.D    0.0031        0.45 0.00736       0.00399   0.00398  0.0091 0.00496
075vs025.D    0.0000        0.22 0.00143       0.00098   0.00097  0.0092 0.00125
050vs025.D    0.0000        0.17 0.00000       0.00000   0.00000  0.0012 0.00000
075vs050.D    0.0000        0.25 0.00000       0.00000   0.00000  0.0013 0.00000
> falsediscoveries
           bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw voom
100vs000           0           0      0             0         0       0    0
075vs025          25         260     81            81        73     165  114
050vs025           0           0      0             1         0      12    1
075vs050           0           0      2             5         2       5    6
100vs000.D        22        1244     63            33        33      88   41
075vs025.D         0         226      7             4         4      62    5
050vs025.D         0          37      0             0         0       2    0
075vs050.D         0           1      0             0         0       2    0
> truepositives
           bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw  voom
100vs000       10435        1532  10513         10631     10641   11606 10757
075vs025        6516         942   8413          8452      8420    9532  8611
050vs025        1059           0   3504          3362      3334    4553  3225
075vs050        1253           0   3657          3915      3900    4313  3671
100vs000.D      7101        1520   8492          8230      8260    9554  8220
075vs025.D      2059         796   4902          4076      4116    6643  3991
050vs025.D        68         181   1871           425       436    1703     0
075vs050.D        85           3   1356           374       379    1493    72
> totaldetected <- falsediscoveries + truepositives
> totaldetected
           bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw  voom
100vs000       10435        1532  10513         10631     10641   11606 10757
075vs025        6541        1202   8494          8533      8493    9697  8725
050vs025        1059           0   3504          3363      3334    4565  3226
075vs050        1253           0   3659          3920      3902    4318  3677
100vs000.D      7123        2764   8555          8263      8293    9642  8261
075vs025.D      2059        1022   4909          4080      4120    6705  3996
050vs025.D        68         218   1871           425       436    1705     0
075vs050.D        85           4   1356           374       379    1495    72

Plot of recovery rates

> name.methods <- 
+   c("baySeq", "baySeq-norm", "DESeq2", "edgeR-classic", "edgeR-glm", "voom-qw", "voom")
> names(name.methods) <- methods
> name.methods
      bayseq_nb     bayseq_norm          deseq2   edger_classic       edger_glm         voom_qw 
       "baySeq"   "baySeq-norm"        "DESeq2" "edgeR-classic"     "edgeR-glm"       "voom-qw" 
           voom 
         "voom" 
> name.comparisons <- gsub(".D", "d", comparisons)
> name.comparisons
[1] "100vs000"  "075vs025"  "050vs025"  "075vs050"  "100vs000d" "075vs025d" "050vs025d" "075vs050d"
> col <- brewer.pal(length(methods)+3, "Paired")
> col <- col[c(1,2,9,4,3,5,6)]
> names(col) <- methods
> lty <- c(1,1,1,1,3,2,2) 
> par(mfrow=c(1,2))
> par(mar=c(6,5,2,0))
> matplot(recovery[1:4,], main=" ",
+   type="l", lty=lty, lwd=2, col=col, 
+   ylim=c(0,1), ylab="", xaxt="n", yaxt="n")
> title(ylab="Recovery rate", line=3.5)
> axis(1, at=1:4, name.comparisons[1:4], tick=FALSE, las=2, font=1)
> axis(2, at=c(0,0.25,0.5,0.75,1), paste0(100*c(0,0.25,0.5,0.75,1), "%"), las=1)
> par(mar=c(6,3,2,2))
> matplot(recovery[5:8,], main=" ",
+   type="l", lty=lty, lwd=2, col=col,
+   ylim=c(0,1), ylab="", xaxt="n", yaxt="n")
> axis(1, at=1:4, name.comparisons[5:8], tick=FALSE, las=2, font=1)
> legend("topright", text.col=col, col=col, lty=lty, legend=name.methods, cex=0.7, bty="n")

Overlap in reference genes

> reference.all <- unique(unlist(reference))
> names <- c("bayseq_nb", "deseq2", "edger_glm", "voom_qw")
> venncounts <- matrix(0, nrow=length(reference.all), ncol=length(names))
> colnames(venncounts) <- names
> for (i in names) venncounts[,i] <- as.numeric(reference.all %in% reference[[i]])
> venn.plot <- draw.quad.venn(
+   area1 = sum(venncounts[,1]),
+   area2 = sum(venncounts[,2]),
+   area3 = sum(venncounts[,3]),
+   area4 = sum(venncounts[,4]),
+   n12 = sum(venncounts[,1]==1 & venncounts[,2]==1),
+   n13 = sum(venncounts[,1]==1 & venncounts[,3]==1),
+   n14 = sum(venncounts[,1]==1 & venncounts[,4]==1),
+   n23 = sum(venncounts[,2]==1 & venncounts[,3]==1),
+   n24 = sum(venncounts[,2]==1 & venncounts[,4]==1),
+   n34 = sum(venncounts[,3]==1 & venncounts[,4]==1),
+   n123 = sum(venncounts[,1]==1 & venncounts[,2]==1 & venncounts[,3]==1),
+   n124 = sum(venncounts[,1]==1 & venncounts[,2]==1 & venncounts[,4]==1),
+   n134 = sum(venncounts[,1]==1 & venncounts[,3]==1 & venncounts[,4]==1),
+   n234 = sum(venncounts[,2]==1 & venncounts[,3]==1 & venncounts[,4]==1),
+   n1234 = sum(rowSums(venncounts)==4),
+   category=name.methods[names], fill=col[names], lty=1, cex=2, cat.cex=2, cat.col="black"
+ )

Predicted and estimated logFCs

> load("RSCE.mRNA.unstranded.rda")
> data <- RSCE.mRNA.unstranded
> data <- calcNormFactors(data)
> batch <- strsplit2(colnames(data), split="\\.")[,1]
> mixes <- as.numeric(strsplit2(strsplit2(colnames(data), split="\\.")[,2], split="_")[,1])/100
> design <- model.matrix(~data$samples$group-1)
> v <- voom(data, design, plot=FALSE)
> remove <- batch=="R2D"
> fitmixgood <- fitmixture(v$E[,!remove], mixes[!remove])
> c12 <- 2^(fitmixgood$M/2+fitmixgood$A)
> c22 <- 2^(fitmixgood$A-fitmixgood$M/2)
> save(c12, c22, file="./Output/fitmixture_concentrations.RData")
> for (i in 1:length(deseq2)) colnames(deseq2[[i]])[colnames(deseq2[[i]])=="log2FoldChange"] <- "logFC"
> par(mfrow=c(4,4))
> par(mar=c(4,4,0,0))
> for (i in c("100vs000","100vs000.D","075vs050","075vs050.D")) {
+   if (i %in% c("100vs000", "100vs000.D")) xmax <- 10
+   if (i %in% c("075vs050", "075vs050.D")) xmax <- 1
+   conc <- strsplit2(i, split="vs")
+   conc <- as.numeric(gsub(".D", "", conc))
+   p <- conc[1]/100
+   q <- conc[2]/100
+   logfc.adjusted <- log2(p*c12+(1-p)*c22)-log2(q*c12+(1-q)*c22)  
+   for (m in c("deseq2", "edger_glm", "voom_qw", "voom")) {
+     table <- get(m)[[i]]
+     mm <- match(table$GeneID, names(logfc.adjusted))
+     logfc.adjusted2 <- logfc.adjusted[mm]
+     n <- nrow(table)
+     dev <- table$logFC-logfc.adjusted2
+     rmse <- sqrt(sum(dev^2)/(n-1))
+     smoothScatter(table$logFC, logfc.adjusted2, ylab="", xlab="", xlim=c(-xmax, xmax), ylim=c(-xmax, xmax), las=2)
+     legend("topleft", legend=paste0("RMSE=", round(rmse, 4)), pch="", bty="n", cex=1.2)
+     abline(0,1, col="red")
+   }
+ }

Session time and information

Total run time for this report: 0.32 minutes 
> sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.11.3 (El Capitan)

locale:
[1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8

attached base packages:
 [1] grid      parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] VennDiagram_1.6.17         futile.logger_1.4.1        RColorBrewer_1.1-2        
 [4] baySeq_2.6.0               perm_1.0-0.0               abind_1.4-5               
 [7] DESeq2_1.12.3              SummarizedExperiment_1.2.3 Biobase_2.32.0            
[10] GenomicRanges_1.24.2       GenomeInfoDb_1.8.1         IRanges_2.6.1             
[13] S4Vectors_0.10.1           BiocGenerics_0.18.0        edgeR_3.14.0              
[16] limma_3.28.14             

loaded via a namespace (and not attached):
 [1] genefilter_1.54.2    locfit_1.5-9.1       splines_3.3.1        lattice_0.20-33     
 [5] colorspace_1.2-6     htmltools_0.3.5      yaml_2.1.13          chron_2.3-47        
 [9] survival_2.39-5      XML_3.98-1.4         foreign_0.8-66       DBI_0.4-1           
[13] BiocParallel_1.6.2   lambda.r_1.1.7       plyr_1.8.4           stringr_1.0.0       
[17] zlibbioc_1.18.0      munsell_0.4.3        gtable_0.2.0         evaluate_0.9        
[21] latticeExtra_0.6-28  knitr_1.13           geneplotter_1.50.0   AnnotationDbi_1.34.3
[25] Rcpp_0.12.5          KernSmooth_2.23-15   acepack_1.3-3.3      xtable_1.8-2        
[29] scales_0.4.0         formatR_1.4          Hmisc_3.17-4         annotate_1.50.0     
[33] XVector_0.12.0       gridExtra_2.2.1      ggplot2_2.1.0        digest_0.6.9        
[37] stringi_1.1.1        tools_3.3.1          magrittr_1.5         RSQLite_1.0.0       
[41] Formula_1.2-1        cluster_2.0.4        futile.options_1.0.0 Matrix_1.2-6        
[45] data.table_1.9.6     rmarkdown_1.0        rpart_4.1-10         nnet_7.3-12