> library(limma)
> library(edgeR)
> library(DESeq2)
> library(baySeq)
> library(RColorBrewer)
> library(VennDiagram)
> files <- list.files(path="./Output", full.name=TRUE)
> keep <- intersect(grep("genes_removed", files, invert=TRUE), grep("fitmixture", files, invert=TRUE))
> files <- files[keep]
> methods <- gsub(".RData", "", strsplit2(files, split="/")[,3])
> for (i in 1:length(files)) {
+ load(files[i])
+ assign(methods[i], table)
+ }
> cutoff <- 0.05
> reference <- as.list(rep(NA, length(methods)))
> names(reference) <- methods
> for (m in methods) {
+ this.reference <- get(m)[["100vs000"]]
+ this.reference <- this.reference$GeneID[this.reference$FDR<cutoff]
+ reference[[m]] <- this.reference
+ }
> comparisons <- c("100vs000", "075vs025", "050vs025", "075vs050", "100vs000.D", "075vs025.D", "050vs025.D", "075vs050.D")
> recovery <- matrix(NA, nrow=length(comparisons), ncol=length(methods))
> rownames(recovery) <- comparisons
> colnames(recovery) <- methods
> inconsistency <- truepositives <- falsediscoveries <- recovery
> for (m in methods) {
+ this.reference <- reference[[m]]
+ for (c in comparisons) {
+ results <- get(m)[[c]]
+ results.detected <- results$GeneID[results$FDR<cutoff]
+ tp <- sum(results.detected %in% this.reference)
+ recovery[c,m] <- tp/length(this.reference)
+ nd <- length(results.detected)
+ fd <- sum(!results.detected %in% this.reference)
+ if (nd==0) {
+ inconsistency[c,m] <- 0
+ } else {
+ inconsistency[c,m] <- fd/nd
+ }
+ falsediscoveries[c,m] <- fd
+ truepositives[c,m] <- tp
+ }
+ }
> recovery
bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw voom
100vs000 1.0000 1.000 1.00 1.000 1.000 1.00 1.0000
075vs025 0.6244 0.615 0.80 0.795 0.791 0.82 0.8005
050vs025 0.1015 0.000 0.33 0.316 0.313 0.39 0.2998
075vs050 0.1201 0.000 0.35 0.368 0.367 0.37 0.3413
100vs000.D 0.6805 0.992 0.81 0.774 0.776 0.82 0.7642
075vs025.D 0.1973 0.520 0.47 0.383 0.387 0.57 0.3710
050vs025.D 0.0065 0.118 0.18 0.040 0.041 0.15 0.0000
075vs050.D 0.0081 0.002 0.13 0.035 0.036 0.13 0.0067
> inconsistency
bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw voom
100vs000 0.0000 0.00 0.00000 0.00000 0.00000 0.0000 0.00000
075vs025 0.0038 0.22 0.00954 0.00949 0.00860 0.0170 0.01307
050vs025 0.0000 0.00 0.00000 0.00030 0.00000 0.0026 0.00031
075vs050 0.0000 0.00 0.00055 0.00128 0.00051 0.0012 0.00163
100vs000.D 0.0031 0.45 0.00736 0.00399 0.00398 0.0091 0.00496
075vs025.D 0.0000 0.22 0.00143 0.00098 0.00097 0.0092 0.00125
050vs025.D 0.0000 0.17 0.00000 0.00000 0.00000 0.0012 0.00000
075vs050.D 0.0000 0.25 0.00000 0.00000 0.00000 0.0013 0.00000
> falsediscoveries
bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw voom
100vs000 0 0 0 0 0 0 0
075vs025 25 260 81 81 73 165 114
050vs025 0 0 0 1 0 12 1
075vs050 0 0 2 5 2 5 6
100vs000.D 22 1244 63 33 33 88 41
075vs025.D 0 226 7 4 4 62 5
050vs025.D 0 37 0 0 0 2 0
075vs050.D 0 1 0 0 0 2 0
> truepositives
bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw voom
100vs000 10435 1532 10513 10631 10641 11606 10757
075vs025 6516 942 8413 8452 8420 9532 8611
050vs025 1059 0 3504 3362 3334 4553 3225
075vs050 1253 0 3657 3915 3900 4313 3671
100vs000.D 7101 1520 8492 8230 8260 9554 8220
075vs025.D 2059 796 4902 4076 4116 6643 3991
050vs025.D 68 181 1871 425 436 1703 0
075vs050.D 85 3 1356 374 379 1493 72
> totaldetected <- falsediscoveries + truepositives
> totaldetected
bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw voom
100vs000 10435 1532 10513 10631 10641 11606 10757
075vs025 6541 1202 8494 8533 8493 9697 8725
050vs025 1059 0 3504 3363 3334 4565 3226
075vs050 1253 0 3659 3920 3902 4318 3677
100vs000.D 7123 2764 8555 8263 8293 9642 8261
075vs025.D 2059 1022 4909 4080 4120 6705 3996
050vs025.D 68 218 1871 425 436 1705 0
075vs050.D 85 4 1356 374 379 1495 72
> name.methods <-
+ c("baySeq", "baySeq-norm", "DESeq2", "edgeR-classic", "edgeR-glm", "voom-qw", "voom")
> names(name.methods) <- methods
> name.methods
bayseq_nb bayseq_norm deseq2 edger_classic edger_glm voom_qw
"baySeq" "baySeq-norm" "DESeq2" "edgeR-classic" "edgeR-glm" "voom-qw"
voom
"voom"
> name.comparisons <- gsub(".D", "d", comparisons)
> name.comparisons
[1] "100vs000" "075vs025" "050vs025" "075vs050" "100vs000d" "075vs025d" "050vs025d" "075vs050d"
> col <- brewer.pal(length(methods)+3, "Paired")
> col <- col[c(1,2,9,4,3,5,6)]
> names(col) <- methods
> lty <- c(1,1,1,1,3,2,2)
> par(mfrow=c(1,2))
> par(mar=c(6,5,2,0))
> matplot(recovery[1:4,], main=" ",
+ type="l", lty=lty, lwd=2, col=col,
+ ylim=c(0,1), ylab="", xaxt="n", yaxt="n")
> title(ylab="Recovery rate", line=3.5)
> axis(1, at=1:4, name.comparisons[1:4], tick=FALSE, las=2, font=1)
> axis(2, at=c(0,0.25,0.5,0.75,1), paste0(100*c(0,0.25,0.5,0.75,1), "%"), las=1)
> par(mar=c(6,3,2,2))
> matplot(recovery[5:8,], main=" ",
+ type="l", lty=lty, lwd=2, col=col,
+ ylim=c(0,1), ylab="", xaxt="n", yaxt="n")
> axis(1, at=1:4, name.comparisons[5:8], tick=FALSE, las=2, font=1)
> legend("topright", text.col=col, col=col, lty=lty, legend=name.methods, cex=0.7, bty="n")
> reference.all <- unique(unlist(reference))
> names <- c("bayseq_nb", "deseq2", "edger_glm", "voom_qw")
> venncounts <- matrix(0, nrow=length(reference.all), ncol=length(names))
> colnames(venncounts) <- names
> for (i in names) venncounts[,i] <- as.numeric(reference.all %in% reference[[i]])
> venn.plot <- draw.quad.venn(
+ area1 = sum(venncounts[,1]),
+ area2 = sum(venncounts[,2]),
+ area3 = sum(venncounts[,3]),
+ area4 = sum(venncounts[,4]),
+ n12 = sum(venncounts[,1]==1 & venncounts[,2]==1),
+ n13 = sum(venncounts[,1]==1 & venncounts[,3]==1),
+ n14 = sum(venncounts[,1]==1 & venncounts[,4]==1),
+ n23 = sum(venncounts[,2]==1 & venncounts[,3]==1),
+ n24 = sum(venncounts[,2]==1 & venncounts[,4]==1),
+ n34 = sum(venncounts[,3]==1 & venncounts[,4]==1),
+ n123 = sum(venncounts[,1]==1 & venncounts[,2]==1 & venncounts[,3]==1),
+ n124 = sum(venncounts[,1]==1 & venncounts[,2]==1 & venncounts[,4]==1),
+ n134 = sum(venncounts[,1]==1 & venncounts[,3]==1 & venncounts[,4]==1),
+ n234 = sum(venncounts[,2]==1 & venncounts[,3]==1 & venncounts[,4]==1),
+ n1234 = sum(rowSums(venncounts)==4),
+ category=name.methods[names], fill=col[names], lty=1, cex=2, cat.cex=2, cat.col="black"
+ )
> load("RSCE.mRNA.unstranded.rda")
> data <- RSCE.mRNA.unstranded
> data <- calcNormFactors(data)
> batch <- strsplit2(colnames(data), split="\\.")[,1]
> mixes <- as.numeric(strsplit2(strsplit2(colnames(data), split="\\.")[,2], split="_")[,1])/100
> design <- model.matrix(~data$samples$group-1)
> v <- voom(data, design, plot=FALSE)
> remove <- batch=="R2D"
> fitmixgood <- fitmixture(v$E[,!remove], mixes[!remove])
> c12 <- 2^(fitmixgood$M/2+fitmixgood$A)
> c22 <- 2^(fitmixgood$A-fitmixgood$M/2)
> save(c12, c22, file="./Output/fitmixture_concentrations.RData")
> for (i in 1:length(deseq2)) colnames(deseq2[[i]])[colnames(deseq2[[i]])=="log2FoldChange"] <- "logFC"
> par(mfrow=c(4,4))
> par(mar=c(4,4,0,0))
> for (i in c("100vs000","100vs000.D","075vs050","075vs050.D")) {
+ if (i %in% c("100vs000", "100vs000.D")) xmax <- 10
+ if (i %in% c("075vs050", "075vs050.D")) xmax <- 1
+ conc <- strsplit2(i, split="vs")
+ conc <- as.numeric(gsub(".D", "", conc))
+ p <- conc[1]/100
+ q <- conc[2]/100
+ logfc.adjusted <- log2(p*c12+(1-p)*c22)-log2(q*c12+(1-q)*c22)
+ for (m in c("deseq2", "edger_glm", "voom_qw", "voom")) {
+ table <- get(m)[[i]]
+ mm <- match(table$GeneID, names(logfc.adjusted))
+ logfc.adjusted2 <- logfc.adjusted[mm]
+ n <- nrow(table)
+ dev <- table$logFC-logfc.adjusted2
+ rmse <- sqrt(sum(dev^2)/(n-1))
+ smoothScatter(table$logFC, logfc.adjusted2, ylab="", xlab="", xlim=c(-xmax, xmax), ylim=c(-xmax, xmax), las=2)
+ legend("topleft", legend=paste0("RMSE=", round(rmse, 4)), pch="", bty="n", cex=1.2)
+ abline(0,1, col="red")
+ }
+ }
Total run time for this report: 0.32 minutes
> sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.11.3 (El Capitan)
locale:
[1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
attached base packages:
[1] grid parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] VennDiagram_1.6.17 futile.logger_1.4.1 RColorBrewer_1.1-2
[4] baySeq_2.6.0 perm_1.0-0.0 abind_1.4-5
[7] DESeq2_1.12.3 SummarizedExperiment_1.2.3 Biobase_2.32.0
[10] GenomicRanges_1.24.2 GenomeInfoDb_1.8.1 IRanges_2.6.1
[13] S4Vectors_0.10.1 BiocGenerics_0.18.0 edgeR_3.14.0
[16] limma_3.28.14
loaded via a namespace (and not attached):
[1] genefilter_1.54.2 locfit_1.5-9.1 splines_3.3.1 lattice_0.20-33
[5] colorspace_1.2-6 htmltools_0.3.5 yaml_2.1.13 chron_2.3-47
[9] survival_2.39-5 XML_3.98-1.4 foreign_0.8-66 DBI_0.4-1
[13] BiocParallel_1.6.2 lambda.r_1.1.7 plyr_1.8.4 stringr_1.0.0
[17] zlibbioc_1.18.0 munsell_0.4.3 gtable_0.2.0 evaluate_0.9
[21] latticeExtra_0.6-28 knitr_1.13 geneplotter_1.50.0 AnnotationDbi_1.34.3
[25] Rcpp_0.12.5 KernSmooth_2.23-15 acepack_1.3-3.3 xtable_1.8-2
[29] scales_0.4.0 formatR_1.4 Hmisc_3.17-4 annotate_1.50.0
[33] XVector_0.12.0 gridExtra_2.2.1 ggplot2_2.1.0 digest_0.6.9
[37] stringi_1.1.1 tools_3.3.1 magrittr_1.5 RSQLite_1.0.0
[41] Formula_1.2-1 cluster_2.0.4 futile.options_1.0.0 Matrix_1.2-6
[45] data.table_1.9.6 rmarkdown_1.0 rpart_4.1-10 nnet_7.3-12