--- title: "Differential splicing analyses on mixology data" author: "Charity Law" date: "30 August 2016" output: html_document --- ```{r global_options, include=FALSE} knitr::opts_chunk$set(prompt=TRUE, comment="", message=FALSE, warning=FALSE, fig.width=5, fig.align="center") options(digits=2) options(width=105) starttime <- proc.time() ``` # Preparing the data for analysis ### Setting up ```{r setup} library(limma) library(edgeR) library(DEXSeq) library(RColorBrewer) library(VennDiagram) library(BiocParallel) dir.create("Output", showWarnings=FALSE) ``` ### Load data ```{r changedisplay, include=FALSE} options(digits=10) ``` ```{r data} load("RSCE.mRNA.unstranded.exon.rda") RSCE.mRNA.unstranded$samples[,1:6] ``` ```{r changedisplayback, include=FALSE} options(digits=2) ``` ### Multi-dimensional scaling plot ```{r mdsplot} lcpm <- cpm(RSCE.mRNA.unstranded, log=TRUE) pchby <- as.numeric(grepl("D", RSCE.mRNA.unstranded$samples$Sample))+1 colby <- as.factor(RSCE.mRNA.unstranded$samples$group) labelby.group <- paste0(as.numeric(levels(colby)), ":", 100-as.numeric(levels(colby))) palette(rainbow(5)) plotMDS(lcpm, pch=as.numeric(pchby)+15, col=as.numeric(colby), main="Poly-A") legend("topleft", text.col=unique(as.numeric(colby)), legend=labelby.group, bty="n") ``` ### Filter out lowly expressed genes ```{r filtering} keep.exprs <- rowSums(cpm(RSCE.mRNA.unstranded)>0.125)>=10 table(keep.exprs) RSCE.mRNA.unstranded <- RSCE.mRNA.unstranded[keep.exprs,] ``` ### Normalisation ```{r normalisation} RSCE.mRNA.unstranded <- calcNormFactors(RSCE.mRNA.unstranded) ``` ### Set comparisons to be carried out downstream ```{r comparisons} comparisons <- c("100vs000", "075vs025", "050vs025", "075vs050") sample <- strsplit2(RSCE.mRNA.unstranded$samples$Sample, split="\\.") colnames(sample) <- c("Batch", "Mixture") sample <- as.data.frame(sample) sample ``` # Differential splicing analysis ### Voom-diffSplice using method of F-tests (voom-ds) ```{r voom} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group design <- model.matrix(~group) v <- voom(data, design) fit <- lmFit(v, design) ds <- diffSplice(fit, geneid="GeneID") tt <- topSplice(ds, test="F", n=Inf) table[[c]] <- tt save(comparisons, table, file="./Output/diffsplice.f.RData") } ``` ### EdgeR-diffSplice using method of F-tests (edgeR-ds) ```{r edger} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group design <- model.matrix(~group) y <- estimateDisp(data, design=design, robust=TRUE) fit <- glmQLFit(y, design=design, robust=TRUE) ds <- diffSpliceDGE(fit, geneid="GeneID") tt <- topSpliceDGE(ds, test="gene", n=Inf) table[[c]] <- tt save(comparisons, table, file="./Output/edger.RData") } ``` ### DEXSeq ```{r dexseq} ncores <- MulticoreParam(workers=4) table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group dex.samples <- data.frame(cbind(condition=group)) dex.samples$condition <- as.factor(dex.samples$condition) dexds <- DEXSeqDataSet(countData=data$counts, sampleData=dex.samples, design=~sample+exon+condition:exon, featureID=as.character(1:nrow(data)), groupID=as.character(data$genes$GeneID)) dexds <- estimateSizeFactors(dexds) dexds <- estimateDispersions(dexds, BPPARAM=ncores) dexds <- testForDEU(dexds, BPPARAM=ncores) dexds <- estimateExonFoldChanges(dexds, BPPARAM=ncores) dex.exon <- DEXSeqResults(dexds) dex.gene <- perGeneQValue(dex.exon, p="pvalue") dex.gene <- as.data.frame(dex.gene) dex.gene <- data.frame(rownames(dex.gene), dex.gene) colnames(dex.gene) <- c("GeneID","FDR") o <- order(dex.gene$FDR) tt <- dex.gene[o,] table[[c]] <- tt save(comparisons, table, file="./Output/dexseq.RData") } ``` # Recovery and inconsistency rates ### Load saved DE results ```{r loadresults} files <- list.files(path="./Output", full.name=TRUE) methods <- gsub(".RData", "", strsplit2(files, split="/")[,3]) methods.label <- c("DEXSeq", "voom-ds", "edgeR-ds") names(methods.label) <- methods methods.label for (i in 1:length(methods)) { load(files[i]) assign(methods[i], table) } ``` ### Create sets of reference genes ```{r refgenes} cutoff <- 0.05 reference <- as.list(rep(NA, length(methods))) names(reference) <- methods for (m in methods) { this.reference <- get(m)[["100vs000"]] this.reference <- this.reference$GeneID[this.reference$FDR