--- title: "Differential expression analyses on mixology data (using conservative filter)" author: "Charity Law" date: "7 October 2016" output: html_document --- ```{r global_options, include=FALSE} knitr::opts_chunk$set(prompt=TRUE, comment="", message=FALSE, warning=FALSE, fig.width=5, fig.align="center") options(digits=2) options(width=105) starttime <- proc.time() ``` # Preparing the data for analysis ### Setting up ```{r setup} library(limma) library(edgeR) library(DESeq2) library(baySeq) library(RColorBrewer) library(VennDiagram) dir.create("Output_using_conservative_filter", showWarnings=FALSE) ``` ### Load data ```{r changedisplay, include=FALSE} options(digits=10) ``` ```{r data} load("RSCE.mRNA.unstranded.rda") RSCE.mRNA.unstranded$samples[,1:6] ``` ```{r changedisplayback, include=FALSE} options(digits=2) ``` ### Multi-dimensional scaling plot ```{r mdsplot} lcpm <- cpm(RSCE.mRNA.unstranded, log=TRUE) pchby <- as.numeric(grepl("D", RSCE.mRNA.unstranded$samples$Sample))+1 colby <- as.factor(RSCE.mRNA.unstranded$samples$group) labelby.group <- paste0(as.numeric(levels(colby)), ":", 100-as.numeric(levels(colby))) palette(rainbow(5)) plotMDS(lcpm, pch=as.numeric(pchby)+15, col=as.numeric(colby), main="Poly-A") legend("bottomleft", text.col=unique(as.numeric(colby)), legend=labelby.group, bty="n") ``` ### Clean up gene annotations ```{r geneannotation} RSCE.mRNA.unstranded$genes <- RSCE.mRNA.unstranded$genes[,c("GeneID", "Symbols")] ``` ### Filter out lowly expressed genes ```{r filtering} keep.exprs <- rowSums(cpm(RSCE.mRNA.unstranded)>0.5)>=3 table(keep.exprs) genes.removed <- RSCE.mRNA.unstranded$genes$GeneID[!keep.exprs] save(genes.removed, file="./Output_using_conservative_filter/genes_removed_from_analysis.RData") RSCE.mRNA.unstranded <- RSCE.mRNA.unstranded[keep.exprs,] ``` ### Normalisation ```{r normalisation} RSCE.mRNA.unstranded <- calcNormFactors(RSCE.mRNA.unstranded) ``` ### Set comparisons to be carried out downstream ```{r comparisons} comparisons <- c("100vs000", "075vs025", "050vs025", "075vs050", "100vs000.D", "075vs025.D", "050vs025.D", "075vs050.D") sample <- strsplit2(RSCE.mRNA.unstranded$samples$Sample, split="\\.") colnames(sample) <- c("Batch", "Mixture") sample <- as.data.frame(sample) sample ``` # DE methods ### voom ```{r voom} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group design <- model.matrix(~group) v <- voom(data, design) fit <- lmFit(v, design) fit <- eBayes(fit) tt <- topTable(fit, coef=ncol(design), n=Inf) colnames(tt)[colnames(tt)=="adj.P.Val"] <- "FDR" table[[c]] <- tt save(table, file="./Output_using_conservative_filter/voom.RData") } ``` ### voom with quality weights (voom-qw) ```{r voomqw} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group design <- model.matrix(~group) v <- voomWithQualityWeights(data, design) fit <- lmFit(v, design) fit <- eBayes(fit) tt <- topTable(fit, coef=ncol(design), n=Inf) colnames(tt)[colnames(tt)=="adj.P.Val"] <- "FDR" table[[c]] <- tt save(table, file="./Output_using_conservative_filter/voom_qw.RData") } ``` ### edgeR with generalised linear models (edgeR-glm) ```{r edgerglm} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group design <- model.matrix(~group) data <- estimateGLMCommonDisp(data, design) data <- estimateGLMTrendedDisp(data, design) data <- estimateGLMTagwiseDisp(data, design) fit <- glmFit(data, design) lrt <- glmLRT(fit) tt <- topTags(lrt, n=Inf) tt <- as.data.frame(tt) table[[c]] <- tt save(table, file="./Output_using_conservative_filter/edger_glm.RData") } ``` ### edgeR using exact tests (edgeR-classic) ```{r edgerclassic} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group design <- model.matrix(~group) data <- estimateCommonDisp(data) data <- estimateTagwiseDisp(data) et <- exactTest(data) tt <- topTags(et, n=Inf) tt <- as.data.frame(tt) table[[c]] <- tt save(table, file="./Output_using_conservative_filter/edger_classic.RData") } ``` ### DESeq2 ```{r deseq2} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] levels(data$samples$group) <- c("low", "high") dds <- DESeqDataSetFromMatrix(data$counts, colData=data$samples, design=~group) mcols(dds) <- DataFrame(mcols(dds), data$genes) dds <- DESeq(dds) res <- results(dds, contrast=c("group", "high", "low")) tt <- as.data.frame(res) tt$GeneID <- rownames(tt) colnames(tt)[colnames(tt)=="padj"] <- "FDR" table[[c]] <- tt save(table, file="./Output_using_conservative_filter/deseq2.RData") } ``` ### baySeq on a normal distribution (bayseq-norm) ```{r bayseqnorm} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group CD <- new("countData", data=data$counts, replicates=group, groups=list(NDE=rep(1, 6), DE=as.numeric(group))) CD@annotation <- data$genes libsizes(CD) <- getLibsizes(CD) densityFunction(CD) <- normDensity cl <- makeCluster(8) normCD <- getPriors(CD, cl=cl) normCD <- getLikelihoods(normCD, cl=cl) tt <- topCounts(normCD, group="DE", number=nrow(data)) colnames(tt)[colnames(tt)=="FDR.DE"] <- "FDR" table[[c]] <- tt save(table, file="./Output_using_conservative_filter/bayseq_norm.RData") } ``` ### baySeq on a negative binomial distribution (bayseq-nb) ```{r bayseqnb} table <- as.list(rep(NA, length(comparisons))) names(table) <- comparisons for (c in comparisons){ cat("== Running comparison", c, "== \n") batch <- paste0("R2", substr(c, start=10, stop=10)) batch <- c(batch, "R1", "R3") name <- gsub(".D", "", c) select <- c(strsplit2(name, split="vs")) sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch) data <- RSCE.mRNA.unstranded[,sample.select] group <- data$samples$group CD <- new("countData", data=data$counts, replicates=group, groups=list(NDE=rep(1, 6), DE=as.numeric(group))) CD@annotation <- data$genes libsizes(CD) <- getLibsizes(CD) cl <- makeCluster(8) CD <- getPriors.NB(CD, cl=cl) CD <- getLikelihoods(CD, cl=cl) tt <- topCounts(CD, group="DE", number=nrow(data)) colnames(tt)[colnames(tt)=="FDR.DE"] <- "FDR" table[[c]] <- tt save(table, file="./Output_using_conservative_filter/bayseq_nb.RData") } ``` # Session time and information ```{r runtime, echo=FALSE} totaltime <- proc.time()-starttime cat("Total run time for this report:", round(totaltime[3]/60, 2), "minutes \n") ``` ```{r sessioninfo} sessionInfo() ```