load("LK_data.RData")
source("functions.R")
D <- MA.subsetA$M

require(limma)
require(MASS)


# ---------------------------------------
# simulation to explore various parameter settings
# ---------------------------------------


#dataList <- list()

library(edgeR)
nSims <- 1000
output <- matrix(,nr=nSims,nc=8,dimnames=list(NULL,c("true","pUp", "pDiff", "TMM","TMM-weighted","RLM","uniq1","uniq2")))

for(i in 1:nSims) {

  pUp <- runif(1,.1,.9)
  pDiff <- runif(1,.05,.25)
  uniq1 <- runif(1,0,2000)
  uniq2 <- runif(1,0,2000)
  xx <- generateDataset(commonTags=2e4, uniqueTags=c(floor(uniq1),floor(uniq2)),
                        foldDifference=4,pUp=pUp, pDifferential=pDiff, empiricalDist=D[,1], libLimits=c(8e5,1.2e6));

  ff2 <- calcFactor(xx$DATA[,1]/xx$libSizes[1], xx$DATA[,2]/xx$libSizes[2]);
  #ff2a <- calcFactorWeighted(xx$DATA[,1], xx$DATA[,2]);
  ff2a <- calcNormFactors(xx$DATA, refColumn=2)[1]
  ff2r <- calcFactorRLM(xx$DATA[,1], xx$DATA[,2]);
  
  cs <- colMeans(xx$LAMBDA)
  output[i,] <- c(cs[2]/cs[1], pUp, pDiff, ff2, ff2a, ff2r, uniq1, uniq2);
  
  #dataList[[i]] <- xx  
  
  cat(".")
  if(i %% 50==0)
    cat(i,"\n")
}

trueM <- outer(output[,1], c(1,1,1))

#png("figS4.est-vs-true.png", h=1000,w=1000)
#par(mfrow=c(2,2))

matplot(output[,1], output[,4:6], pch=19, col=c("black","blue","red"), xlab="true factor",ylab="estimated factor", cex=.2)
abline(0,1)
legend("topleft",c("TMM","TMM-weighted","RLM"),col=c("black","blue","red"),pch=19)
matplot(output[,1], output[,4:6]-trueM, pch=19, col=c("black","blue","red"), xlab="true factor",ylab="bias", cex=.2)
matplot(output[,2], output[,4:6]-trueM, pch=19, col=c("black","blue","red"), xlab="percent up",ylab="bias", cex=.2)
matplot(output[,3], output[,4:6]-trueM, pch=19, col=c("black","blue","red"), xlab="percent differential",ylab="bias", cex=.2)
dev.off()


# ---------------------------------------
# reverse cum dist plots for marioni data
# ---------------------------------------

load("LK_data.RData")
source("functions.R")

D <- MA.subsetA$M

liverSamples <- (1:ncol(D)) %in% grep("Liver",colnames(D))

png("figS5.reverseCumSuppFig.png", 500,500)
for(i in 1:ncol(D))
  plotReverseCumDist( D[,i], add=!(i==1), type="l", col=c("blue","black")[(i %in% grep("Liver",colnames(D))) +1], lwd=3 )
legend("bottomleft",c("Liver","Kidney"),col=c("blue","black"),lty=1,lwd=3)
dev.off()



# ---------------------------------------
# MA-like-plot for Sultan et al. data
# ---------------------------------------

# download files from:
# http://www.sciencemag.org/content/vol0/issue2008/images/data/1160342/DC1/1160342s_tablesS2-S9.zip
# resave XLS file as TXT file with out normalized expression columns

x <- read.table("1160342s_tableS2.txt",sep="\t", header=TRUE, stringsAsFactors=FALSE, fill=TRUE)
p1 <- x[,2]
p2 <- x[,3]

png("figS7.sultanFig.png",h=500,w=700)
plot( log2(p1)+log2(p2), log2(p1/p2), pch=19, cex=.4, xlab="Absolute expression", ylab="Log Fold Change (HEK/B cells)"  )
grid(col="black")
abline(h=mean(log2(p1/p2), trim=.3, na.rm=TRUE),col="blue",lwd=3)
dev.off()


# ---------------------------------------
# MA-like-plot for Mortazavi et al. data
# ---------------------------------------

# download files from:
# http://www.nature.com/nmeth/journal/v5/n7/extref/nmeth.1226-S3.txt
# http://www.nature.com/nmeth/journal/v5/n7/extref/nmeth.1226-S4.txt
# http://www.nature.com/nmeth/journal/v5/n7/extref/nmeth.1226-S5.txt

brain <- read.table("nmeth.1226-S3.txt", sep="\t", header=TRUE)
liver <- read.table("nmeth.1226-S4.txt", sep="\t", header=TRUE)
muscle <- read.table("nmeth.1226-S5.txt", sep="\t", header=TRUE)

sum(brain$gid==liver$gid)
sum(brain$gid==muscle$gid)

source("functions.R")

b <- brain[,"finalRPKM"]
v <- liver[,"finalRPKM"]
m <- muscle[,"finalRPKM"]

w <- which( brain$fractionMulti < .2 & liver$fractionMulti < .2 & muscle$fractionMulti < .2 )

png("figS6.mortazaviSuppFig.png",h=800,w=800)
par(mfrow=c(2,2))
plot( log2(v[w])+log2(b[w]), log2(b[w])-log2(v[w]), pch=19, cex=.3, xlab="Absolute expression", ylab="Log Fold Change (Brain/Liver)", ylim=c(-15,15) )
abline(h=mean(log2(b[w])-log2(v[w]), trim=.3, na.rm=TRUE),col="blue",lwd=3)
grid(col="black")
plot( log2(m[w])+log2(b[w]), log2(b[w])-log2(m[w]), pch=19, cex=.3, xlab="Absolute expression", ylab="Log Fold Change (Brain/Muscle)", ylim=c(-15,15) )
abline(h=mean(log2(b[w])-log2(m[w]), trim=.3, na.rm=TRUE),col="blue",lwd=3)
grid(col="black")
plot( log2(v[w])+log2(m[w]), log2(v[w])-log2(m[w]), pch=19, cex=.3, xlab="Absolute expression", ylab="Log Fold Change (Liver/Muscle)", ylim=c(-15,15) )
abline(h=mean(log2(v[w])-log2(m[w]), trim=.3, na.rm=TRUE),col="blue",lwd=3)
grid(col="black")
dev.off()


# ---------------------------------------
# Cloonan et al.  ES versus EB comparison
# ---------------------------------------

# download file from:
# http://genome.cshlp.org/content/suppl/2008/10/15/gr.077578.108.DC1/Supplementary_Table_3_exp_lev_fold_change.xls
# ... remove some rows/columns and save as TXT file

x <- read.table("kuchenbauer_mostcommonseq.txt", sep="\t", header=TRUE, stringsAsFactors=FALSE)
png("figS3.miRNA-MA.png",w=800,h=600)
maPlot(x[,2],x[,3], normalize=TRUE, pch=19)
f <- calcNormFactors(as.matrix(x[,2:3]))[2]
abline(h=log2(f), col="blue", lwd=4)
grid(col="black")
dev.off()




# ---------------------------------------
# Cloonan et al.  ES versus EB comparison
# ---------------------------------------

library(edgeR)

mhk <- read.table("mouse_housekeeping.txt", sep="\t", header=TRUE, stringsAsFactors=FALSE)
x <- read.table("Grimmond_lengths.txt", sep="\t", header=TRUE)
d <- x[,5:6]
f <- calcNormFactors(as.matrix(d), logratioTrim=.35, Acutoff=-16)

w <- d[,1] == min(d[,1]) | d[,2] == min(d[,2])
h <- toupper(x$Gene_symbol) %in% mhk$Identifiers

cols <- rep("black", nrow(d))
cols[w] <- "orange"
cols[h] <- "blue"

cexs <- rep(.2, nrow(d))
cexs[h] <- .5


png("figS1.maPlot_cloonan.png",w=1000,h=700)
ma <- maPlot(d[,1],d[,2], normalize=TRUE, pch=19, cex=cexs, ylab="log-fold-change: EB - ES", ylim=c(-6,6), col=cols)
grid(col="black")
abline(h=log2(f[2]),col="orange",lwd=3)
points(ma$A[h], ma$M[h], col="blue", cex=.5, pch=19)  # add points on top
abline(h=median(ma$M[h]),col="blue",lwd=3)
arrows( -18, -2.5, -19, -1.75, length=.1, lwd=6, col="red" )
dev.off()

cs <- colSums(d)

#library(statmod)
#s <- sage.test(d[,1], d[,2])
#sa <- sage.test(d[,1], d[,2], sum(d[,1])/sqrt(f[2]), sum(d[,2])*sqrt(f[2]))

#fc_raw <- d[,1]/cs[1] / ( d[,2]/cs[2] )
#fc_adj <- d[,1]/(cs[1]*f[1]) / ( d[,2]/(cs[2]*f[2]) )

# before normalization
#table(sig=s < .01/nrow(d), up=fc_raw>1)

# after normalization
#table(sig=sa < .01/nrow(d), up=fc_adj>1)


# ---------------------------------------
# Li et al. LNCAP (androgen versus mock)
# ---------------------------------------

library(edgeR)
hk <- read.table("human_housekeeping.txt", header=FALSE, sep=" ", as.is=TRUE, stringsAsFactors=FALSE)$V1
hk1 <- gsub("*","",hk, fixed=TRUE)

fn <- dir(,"map.sim")

# needed to fix a little problem with readDGE()
de <- readDGE(fn)
de$samples$group <- factor(rep(1:2,4:3))


library(biomaRt)

mart <- useMart("ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl",host="www.ensembl.org")
ens2refseq <- getBM(attributes=c("ensembl_gene_id","refseq_dna"),
                    filters="ensembl_gene_id",values=rownames(de$counts), mart=mart)

genes <- unique(ens2refseq$ensembl_gene_id[ens2refseq$refseq_dna %in% hk1])

#dea <- DGEList(counts=de$counts, lib.size=de$samples$lib.size*f, group=de$samples$group)
#de <- estimateCommonDisp(de)
#dea <- estimateCommonDisp(dea)

f <- calcNormFactors(de$counts)

D <- de$counts

w <- D[,1] == min(D[,1]) | D[,2] == min(D[,2])
h <- rownames(D) %in% genes

cols <- rep("black", nrow(D))
cols[w] <- "orange"
cols[h] <- "blue"

cexs <- rep(.2, nrow(D))
cexs[h] <- .5

png("figS2.maPlot_li.png",w=1000,h=700)
ma <- maPlot(D[,1], D[,6], normalize=TRUE, pch=19, cex=cexs, ylab="log-fold-change: DHT - control", ylim=c(-6,6), col=cols)
grid(col="black")
abline(h=log2(f[6]),col="orange",lwd=3)
points(ma$A[h], ma$M[h], col="blue", cex=.5, pch=19)  # add points on top
abline(h=median(ma$M[h]),col="blue",lwd=3)
dev.off()