> library(limma)
> library(edgeR)
> library(DEXSeq)
> library(RColorBrewer)
> library(VennDiagram)
> library(BiocParallel)
> dir.create("Output_using_degraded_samples", showWarnings=FALSE)
> load("RSCE.mRNA.unstranded.exon.rda")
> keep.exprs <- rowSums(cpm(RSCE.mRNA.unstranded)>0.125)>=10
> table(keep.exprs)
keep.exprs
FALSE TRUE
61169 138042
> RSCE.mRNA.unstranded <- RSCE.mRNA.unstranded[keep.exprs,]
> RSCE.mRNA.unstranded <- calcNormFactors(RSCE.mRNA.unstranded)
> comparisons <- c("100vs000.D", "075vs025.D", "050vs025.D", "075vs050.D")
> sample <- strsplit2(RSCE.mRNA.unstranded$samples$Sample, split="\\.")
> colnames(sample) <- c("Batch", "Mixture")
> sample <- as.data.frame(sample)
> table <- as.list(rep(NA, length(comparisons)))
> names(table) <- comparisons
> for (c in comparisons){
+ cat("== Running comparison", c, "== \n")
+ batch <- paste0("R2", substr(c, start=10, stop=10))
+ batch <- c(batch, "R1", "R3")
+ name <- gsub(".D", "", c)
+ select <- c(strsplit2(name, split="vs"))
+ sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
+ data <- RSCE.mRNA.unstranded[,sample.select]
+ group <- data$samples$group
+ design <- model.matrix(~group)
+ v <- voom(data, design)
+ fit <- lmFit(v, design)
+ ds <- diffSplice(fit, geneid="GeneID")
+ tt <- topSplice(ds, test="F", n=Inf)
+ table[[c]] <- tt
+ save(comparisons, table, file="./Output_using_degraded_samples/diffsplice.f.RData")
+ }
== Running comparison 100vs000.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
== Running comparison 075vs025.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
== Running comparison 050vs025.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
== Running comparison 075vs050.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
> table <- as.list(rep(NA, length(comparisons)))
> names(table) <- comparisons
> for (c in comparisons){
+ cat("== Running comparison", c, "== \n")
+ batch <- paste0("R2", substr(c, start=10, stop=10))
+ batch <- c(batch, "R1", "R3")
+ name <- gsub(".D", "", c)
+ select <- c(strsplit2(name, split="vs"))
+ sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
+ data <- RSCE.mRNA.unstranded[,sample.select]
+ group <- data$samples$group
+ design <- model.matrix(~group)
+ y <- estimateDisp(data, design=design, robust=TRUE)
+ fit <- glmQLFit(y, design=design, robust=TRUE)
+ ds <- diffSpliceDGE(fit, geneid="GeneID")
+ tt <- topSpliceDGE(ds, test="gene", n=Inf)
+ table[[c]] <- tt
+ save(comparisons, table, file="./Output_using_degraded_samples/edger.RData")
+ }
== Running comparison 100vs000.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
== Running comparison 075vs025.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
== Running comparison 050vs025.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
== Running comparison 075vs050.D ==
Total number of exons: 138042
Total number of genes: 16500
Number of genes with 1 exon: 3113
Mean number of exons in a gene: 8
Max number of exons in a gene: 111
> ncores <- MulticoreParam(workers=4)
> table <- as.list(rep(NA, length(comparisons)))
> names(table) <- comparisons
> for (c in comparisons){
+ cat("== Running comparison", c, "== \n")
+ batch <- paste0("R2", substr(c, start=10, stop=10))
+ batch <- c(batch, "R1", "R3")
+ name <- gsub(".D", "", c)
+ select <- c(strsplit2(name, split="vs"))
+ sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
+ data <- RSCE.mRNA.unstranded[,sample.select]
+ group <- data$samples$group
+ dex.samples <- data.frame(cbind(condition=group))
+ dex.samples$condition <- as.factor(dex.samples$condition)
+ dexds <- DEXSeqDataSet(countData=data$counts, sampleData=dex.samples,
+ design=~sample+exon+condition:exon, featureID=as.character(1:nrow(data)),
+ groupID=as.character(data$genes$GeneID))
+ dexds <- estimateSizeFactors(dexds)
+ dexds <- estimateDispersions(dexds, BPPARAM=ncores)
+ dexds <- testForDEU(dexds, BPPARAM=ncores)
+ dexds <- estimateExonFoldChanges(dexds, BPPARAM=ncores)
+ dex.exon <- DEXSeqResults(dexds)
+ dex.gene <- perGeneQValue(dex.exon, p="pvalue")
+ dex.gene <- as.data.frame(dex.gene)
+ dex.gene <- data.frame(rownames(dex.gene), dex.gene)
+ colnames(dex.gene) <- c("GeneID","FDR")
+ o <- order(dex.gene$FDR)
+ tt <- dex.gene[o,]
+ table[[c]] <- tt
+ save(comparisons, table, file="./Output_using_degraded_samples/dexseq.RData")
+ }
== Running comparison 100vs000.D ==
== Running comparison 075vs025.D ==
== Running comparison 050vs025.D ==
== Running comparison 075vs050.D ==
> files <- list.files(path="./Output", full.name=TRUE)
> methods <- gsub(".RData", "", strsplit2(files, split="/")[,3])
> methods.label <- c("DEXSeq", "voom-ds", "edgeR-ds")
> names(methods.label) <- methods
> methods.label
dexseq diffsplice.f edger
"DEXSeq" "voom-ds" "edgeR-ds"
> for (i in 1:length(methods)) {
+ load(files[i])
+ assign(methods[i], table)
+ }
> cutoff <- 0.05
> reference <- as.list(rep(NA, length(methods)))
> names(reference) <- methods
> for (m in methods) {
+ this.reference <- get(m)[["100vs000"]]
+ this.reference <- this.reference$GeneID[this.reference$FDR<cutoff]
+ reference[[m]] <- as.character(this.reference)
+ }
> files <- list.files(path="./Output_using_degraded_samples", full.name=TRUE)
> methods <- gsub(".RData", "", strsplit2(files, split="/")[,3])
> name.comparisons <- gsub(".D", "d", comparisons)
> for (i in 1:length(methods)) {
+ load(files[i])
+ assign(methods[i], table)
+ }
> recovery <- matrix(NA, nrow=length(comparisons), ncol=length(methods))
> rownames(recovery) <- comparisons
> colnames(recovery) <- methods
> inconsistency <- truepositives <- falsediscoveries <- recovery
> for (m in methods) {
+ this.reference <- reference[[m]]
+ for (c in comparisons) {
+ results <- get(m)[[c]]
+ results.detected <- results$GeneID[results$FDR<cutoff]
+ tp <- sum(results.detected %in% this.reference)
+ recovery[c,m] <- tp/length(this.reference)
+ nd <- length(results.detected)
+ fd <- sum(!results.detected %in% this.reference)
+ inconsistency[c,m] <- fd/nd
+ truepositives[c,m] <- tp
+ falsediscoveries[c,m] <- fd
+ }
+ }
> recovery
dexseq diffsplice.f edger
100vs000.D 0.3669 0.3453 0.33
075vs025.D 0.0469 0.0067 0.01
050vs025.D 0.0063 0.0000 0.00
075vs050.D 0.0063 0.0000 0.00
> inconsistency
dexseq diffsplice.f edger
100vs000.D 0.066 0.048 0.044
075vs025.D 0.000 0.000 0.000
050vs025.D 0.000 NaN NaN
075vs050.D 0.000 NaN NaN
> falsediscoveries
dexseq diffsplice.f edger
100vs000.D 37 13 12
075vs025.D 0 0 0
050vs025.D 0 0 0
075vs050.D 0 0 0
> truepositives
dexseq diffsplice.f edger
100vs000.D 524 259 260
075vs025.D 67 5 8
050vs025.D 9 0 0
075vs050.D 9 0 0
> totaldetected <- falsediscoveries + truepositives
> totaldetected
dexseq diffsplice.f edger
100vs000.D 561 272 272
075vs025.D 67 5 8
050vs025.D 9 0 0
075vs050.D 9 0 0
> col <- brewer.pal(9, "Paired")[c(9,5,4)]
> names(col) <- methods
> lty <- c(1,1,3)
> par(mfrow=c(1,2))
> par(mar=c(6,4.5,2,0.5))
> matplot(recovery, type="l", lty=lty, lwd=2, col=col, ylim=c(0,1), ylab="", xaxt="n", yaxt="n")
> title(ylab="Recovery rate", line=3.5)
> axis(1, at=1:4, comparisons, tick=FALSE, las=2, font=1)
> axis(2, at=c(0,0.25,0.5,0.75,1), paste0(100*c(0,0.25,0.5,0.75,1), "%"), las=1)
> legend("topright", text.col=col, lty=lty, col=col, legend=methods.label, cex=0.7, bty="n")
> par(mar=c(6,4.9,2,0.1))
> matplot(inconsistency, type="l", lty=lty, lwd=2, col=col, ylim=c(0,0.4), ylab="", xaxt="n", yaxt="n")
> title(ylab="Inconsistency rate", line=3.5)
> axis(1, at=1:4, comparisons, tick=FALSE, las=2, font=1)
> axis(2, at=c(0,0.1,0.2,0.3,0.4), paste0(100*c(0,0.1,0.2,0.3,0.4), "%"), las=1)
Total run time for this report: 39 minutes
> sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.11.3 (El Capitan)
locale:
[1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
attached base packages:
[1] grid stats4 parallel stats graphics grDevices utils datasets methods base
other attached packages:
[1] VennDiagram_1.6.17 futile.logger_1.4.1 DEXSeq_1.18.4
[4] RColorBrewer_1.1-2 AnnotationDbi_1.34.3 DESeq2_1.12.3
[7] SummarizedExperiment_1.2.3 GenomicRanges_1.24.2 GenomeInfoDb_1.8.1
[10] IRanges_2.6.1 S4Vectors_0.10.1 Biobase_2.32.0
[13] BiocGenerics_0.18.0 BiocParallel_1.6.2 edgeR_3.14.0
[16] limma_3.28.14
loaded via a namespace (and not attached):
[1] genefilter_1.54.2 statmod_1.4.24 locfit_1.5-9.1 splines_3.3.1
[5] lattice_0.20-33 colorspace_1.2-6 htmltools_0.3.5 yaml_2.1.13
[9] chron_2.3-47 survival_2.39-5 XML_3.98-1.4 foreign_0.8-66
[13] DBI_0.4-1 lambda.r_1.1.7 plyr_1.8.4 stringr_1.0.0
[17] zlibbioc_1.18.0 Biostrings_2.40.2 munsell_0.4.3 gtable_0.2.0
[21] hwriter_1.3.2 evaluate_0.9 latticeExtra_0.6-28 knitr_1.13
[25] geneplotter_1.50.0 biomaRt_2.28.0 Rcpp_0.12.5 acepack_1.3-3.3
[29] xtable_1.8-2 scales_0.4.0 formatR_1.4 Hmisc_3.17-4
[33] annotate_1.50.0 XVector_0.12.0 Rsamtools_1.24.0 gridExtra_2.2.1
[37] ggplot2_2.1.0 digest_0.6.9 stringi_1.1.1 bitops_1.0-6
[41] tools_3.3.1 magrittr_1.5 RCurl_1.95-4.8 RSQLite_1.0.0
[45] futile.options_1.0.0 Formula_1.2-1 cluster_2.0.4 Matrix_1.2-6
[49] data.table_1.9.6 rmarkdown_1.0 rpart_4.1-10 nnet_7.3-12