---
title: "Differential expression analyses on mixology data (using conservative filter)"
author: "Charity Law"
date: "7 October 2016"
output: html_document
---

```{r global_options, include=FALSE}
knitr::opts_chunk$set(prompt=TRUE, comment="", message=FALSE, warning=FALSE, fig.width=5, fig.align="center")
options(digits=2)
options(width=105)
starttime <- proc.time()
```
	
	
# Preparing the data for analysis

### Setting up
```{r setup}
library(limma)
library(edgeR)
library(DESeq2)
library(baySeq)
library(RColorBrewer)
library(VennDiagram)
dir.create("Output_using_conservative_filter", showWarnings=FALSE)
```

### Load data
```{r changedisplay, include=FALSE}
options(digits=10)
```

```{r data}
load("RSCE.mRNA.unstranded.rda") 
RSCE.mRNA.unstranded$samples[,1:6]
```

```{r changedisplayback, include=FALSE}
options(digits=2)
```

### Multi-dimensional scaling plot
```{r mdsplot}
lcpm <- cpm(RSCE.mRNA.unstranded, log=TRUE)
pchby <- as.numeric(grepl("D", RSCE.mRNA.unstranded$samples$Sample))+1
colby <- as.factor(RSCE.mRNA.unstranded$samples$group)
labelby.group <- paste0(as.numeric(levels(colby)), ":", 100-as.numeric(levels(colby)))
palette(rainbow(5))
plotMDS(lcpm, pch=as.numeric(pchby)+15, col=as.numeric(colby), main="Poly-A")
legend("bottomleft", text.col=unique(as.numeric(colby)), legend=labelby.group, bty="n")  
```

### Clean up gene annotations
```{r geneannotation}
RSCE.mRNA.unstranded$genes <- RSCE.mRNA.unstranded$genes[,c("GeneID", "Symbols")]
```  

### Filter out lowly expressed genes
```{r filtering}
keep.exprs <- rowSums(cpm(RSCE.mRNA.unstranded)>0.5)>=3
table(keep.exprs)
genes.removed <- RSCE.mRNA.unstranded$genes$GeneID[!keep.exprs]
save(genes.removed, file="./Output_using_conservative_filter/genes_removed_from_analysis.RData")
RSCE.mRNA.unstranded <- RSCE.mRNA.unstranded[keep.exprs,]
```    
  
### Normalisation
```{r normalisation}  
RSCE.mRNA.unstranded <- calcNormFactors(RSCE.mRNA.unstranded)
``` 
  
### Set comparisons to be carried out downstream
```{r comparisons}  
comparisons <- c("100vs000", "075vs025", "050vs025", "075vs050", "100vs000.D", "075vs025.D", "050vs025.D", "075vs050.D")
sample <- strsplit2(RSCE.mRNA.unstranded$samples$Sample, split="\\.")
colnames(sample) <- c("Batch", "Mixture")
sample <- as.data.frame(sample)
sample
```



# DE methods

### voom
```{r voom}
table <- as.list(rep(NA, length(comparisons)))
names(table) <- comparisons
for (c in comparisons){
  cat("== Running comparison", c, "== \n")
  batch <- paste0("R2", substr(c, start=10, stop=10))
  batch <- c(batch, "R1", "R3")
  name <- gsub(".D", "", c)
  select <- c(strsplit2(name, split="vs"))
  sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
  data <- RSCE.mRNA.unstranded[,sample.select]
  group <- data$samples$group
  design <- model.matrix(~group)
  v <- voom(data, design)
  fit <- lmFit(v, design)
  fit <- eBayes(fit)
  tt <- topTable(fit, coef=ncol(design), n=Inf)
  colnames(tt)[colnames(tt)=="adj.P.Val"] <- "FDR"
  table[[c]] <- tt
  save(table, file="./Output_using_conservative_filter/voom.RData")
}
```  

### voom with quality weights (voom-qw)
```{r voomqw}
table <- as.list(rep(NA, length(comparisons)))
names(table) <- comparisons
for (c in comparisons){
  cat("== Running comparison", c, "== \n")
  batch <- paste0("R2", substr(c, start=10, stop=10))
  batch <- c(batch, "R1", "R3")
  name <- gsub(".D", "", c)
  select <- c(strsplit2(name, split="vs"))
  sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
  data <- RSCE.mRNA.unstranded[,sample.select]
  group <- data$samples$group
  design <- model.matrix(~group)
  v <- voomWithQualityWeights(data, design)
  fit <- lmFit(v, design)
  fit <- eBayes(fit)
  tt <- topTable(fit, coef=ncol(design), n=Inf)
  colnames(tt)[colnames(tt)=="adj.P.Val"] <- "FDR"
  table[[c]] <- tt
  save(table, file="./Output_using_conservative_filter/voom_qw.RData")
}
```

### edgeR with generalised linear models (edgeR-glm)
```{r edgerglm}
table <- as.list(rep(NA, length(comparisons)))
names(table) <- comparisons
for (c in comparisons){
  cat("== Running comparison", c, "== \n")
  batch <- paste0("R2", substr(c, start=10, stop=10))
  batch <- c(batch, "R1", "R3")
  name <- gsub(".D", "", c)
  select <- c(strsplit2(name, split="vs"))
  sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
  data <- RSCE.mRNA.unstranded[,sample.select]
  group <- data$samples$group
  design <- model.matrix(~group)
  data <- estimateGLMCommonDisp(data, design)
  data <- estimateGLMTrendedDisp(data, design)
  data <- estimateGLMTagwiseDisp(data, design)
  fit <- glmFit(data, design)
  lrt <- glmLRT(fit)
  tt <- topTags(lrt, n=Inf)
  tt <- as.data.frame(tt)
  table[[c]] <- tt
  save(table, file="./Output_using_conservative_filter/edger_glm.RData")
}
```

### edgeR using exact tests (edgeR-classic)
```{r edgerclassic}
table <- as.list(rep(NA, length(comparisons)))
names(table) <- comparisons
for (c in comparisons){
  cat("== Running comparison", c, "== \n")
  batch <- paste0("R2", substr(c, start=10, stop=10))
  batch <- c(batch, "R1", "R3")
  name <- gsub(".D", "", c)
  select <- c(strsplit2(name, split="vs"))
  sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
  data <- RSCE.mRNA.unstranded[,sample.select]
  group <- data$samples$group
  design <- model.matrix(~group)
  data <- estimateCommonDisp(data)
  data <- estimateTagwiseDisp(data)
  et <- exactTest(data)
  tt <- topTags(et, n=Inf)
  tt <- as.data.frame(tt)
  table[[c]] <- tt
  save(table, file="./Output_using_conservative_filter/edger_classic.RData")
}
```

### DESeq2
```{r deseq2}
table <- as.list(rep(NA, length(comparisons)))
names(table) <- comparisons
for (c in comparisons){
  cat("== Running comparison", c, "== \n")
  batch <- paste0("R2", substr(c, start=10, stop=10))
  batch <- c(batch, "R1", "R3")
  name <- gsub(".D", "", c)
  select <- c(strsplit2(name, split="vs"))
  sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
  data <- RSCE.mRNA.unstranded[,sample.select]
  levels(data$samples$group) <- c("low", "high")
  dds <- DESeqDataSetFromMatrix(data$counts, colData=data$samples, design=~group)
  mcols(dds) <- DataFrame(mcols(dds), data$genes)
  dds <- DESeq(dds)
  res <- results(dds, contrast=c("group", "high", "low"))
  tt <- as.data.frame(res)
  tt$GeneID <- rownames(tt)
  colnames(tt)[colnames(tt)=="padj"] <- "FDR"
  table[[c]] <- tt
  save(table, file="./Output_using_conservative_filter/deseq2.RData")
}
```

### baySeq on a normal distribution (bayseq-norm)
```{r bayseqnorm}
table <- as.list(rep(NA, length(comparisons)))
names(table) <- comparisons
for (c in comparisons){
  cat("== Running comparison", c, "== \n")
  batch <- paste0("R2", substr(c, start=10, stop=10))
  batch <- c(batch, "R1", "R3")
  name <- gsub(".D", "", c)
  select <- c(strsplit2(name, split="vs"))
  sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
  data <- RSCE.mRNA.unstranded[,sample.select]
  group <- data$samples$group
  CD <- new("countData", data=data$counts, 
            replicates=group, groups=list(NDE=rep(1, 6), DE=as.numeric(group)))
  CD@annotation <- data$genes
  libsizes(CD) <- getLibsizes(CD)
  densityFunction(CD) <- normDensity
  cl <- makeCluster(8)
  normCD <- getPriors(CD, cl=cl)
  normCD <- getLikelihoods(normCD, cl=cl)
  tt <- topCounts(normCD, group="DE", number=nrow(data))
  colnames(tt)[colnames(tt)=="FDR.DE"] <- "FDR"
  table[[c]] <- tt
  save(table, file="./Output_using_conservative_filter/bayseq_norm.RData")
}
```

### baySeq on a negative binomial distribution (bayseq-nb)
```{r bayseqnb}
table <- as.list(rep(NA, length(comparisons)))
names(table) <- comparisons
for (c in comparisons){
  cat("== Running comparison", c, "== \n")
  batch <- paste0("R2", substr(c, start=10, stop=10))
  batch <- c(batch, "R1", "R3")
  name <- gsub(".D", "", c)
  select <- c(strsplit2(name, split="vs"))
  sample.select <- which(sample$Mixture %in% select & sample$Batch %in% batch)
  data <- RSCE.mRNA.unstranded[,sample.select]
  group <- data$samples$group
  CD <- new("countData", data=data$counts, 
            replicates=group, groups=list(NDE=rep(1, 6), DE=as.numeric(group)))
  CD@annotation <- data$genes
  libsizes(CD) <- getLibsizes(CD)
  cl <- makeCluster(8)
  CD <- getPriors.NB(CD, cl=cl)
  CD <- getLikelihoods(CD, cl=cl)
  tt <- topCounts(CD, group="DE", number=nrow(data))
  colnames(tt)[colnames(tt)=="FDR.DE"] <- "FDR"
  table[[c]] <- tt
  save(table, file="./Output_using_conservative_filter/bayseq_nb.RData")
}
```


# Session time and information  
```{r runtime, echo=FALSE}
totaltime <- proc.time()-starttime
cat("Total run time for this report:", round(totaltime[3]/60, 2), "minutes \n")
```

```{r sessioninfo}
sessionInfo()
```