load("LK_data.RData") source("functions.R") D <- MA.subsetA$M require(limma) require(MASS) # --------------------------------------- # simulation to explore various parameter settings # --------------------------------------- #dataList <- list() library(edgeR) nSims <- 1000 output <- matrix(,nr=nSims,nc=8,dimnames=list(NULL,c("true","pUp", "pDiff", "TMM","TMM-weighted","RLM","uniq1","uniq2"))) for(i in 1:nSims) { pUp <- runif(1,.1,.9) pDiff <- runif(1,.05,.25) uniq1 <- runif(1,0,2000) uniq2 <- runif(1,0,2000) xx <- generateDataset(commonTags=2e4, uniqueTags=c(floor(uniq1),floor(uniq2)), foldDifference=4,pUp=pUp, pDifferential=pDiff, empiricalDist=D[,1], libLimits=c(8e5,1.2e6)); ff2 <- calcFactor(xx$DATA[,1]/xx$libSizes[1], xx$DATA[,2]/xx$libSizes[2]); #ff2a <- calcFactorWeighted(xx$DATA[,1], xx$DATA[,2]); ff2a <- calcNormFactors(xx$DATA, refColumn=2)[1] ff2r <- calcFactorRLM(xx$DATA[,1], xx$DATA[,2]); cs <- colMeans(xx$LAMBDA) output[i,] <- c(cs[2]/cs[1], pUp, pDiff, ff2, ff2a, ff2r, uniq1, uniq2); #dataList[[i]] <- xx cat(".") if(i %% 50==0) cat(i,"\n") } trueM <- outer(output[,1], c(1,1,1)) #png("figS4.est-vs-true.png", h=1000,w=1000) #par(mfrow=c(2,2)) matplot(output[,1], output[,4:6], pch=19, col=c("black","blue","red"), xlab="true factor",ylab="estimated factor", cex=.2) abline(0,1) legend("topleft",c("TMM","TMM-weighted","RLM"),col=c("black","blue","red"),pch=19) matplot(output[,1], output[,4:6]-trueM, pch=19, col=c("black","blue","red"), xlab="true factor",ylab="bias", cex=.2) matplot(output[,2], output[,4:6]-trueM, pch=19, col=c("black","blue","red"), xlab="percent up",ylab="bias", cex=.2) matplot(output[,3], output[,4:6]-trueM, pch=19, col=c("black","blue","red"), xlab="percent differential",ylab="bias", cex=.2) dev.off() # --------------------------------------- # reverse cum dist plots for marioni data # --------------------------------------- load("LK_data.RData") source("functions.R") D <- MA.subsetA$M liverSamples <- (1:ncol(D)) %in% grep("Liver",colnames(D)) png("figS5.reverseCumSuppFig.png", 500,500) for(i in 1:ncol(D)) plotReverseCumDist( D[,i], add=!(i==1), type="l", col=c("blue","black")[(i %in% grep("Liver",colnames(D))) +1], lwd=3 ) legend("bottomleft",c("Liver","Kidney"),col=c("blue","black"),lty=1,lwd=3) dev.off() # --------------------------------------- # MA-like-plot for Sultan et al. data # --------------------------------------- # download files from: # http://www.sciencemag.org/content/vol0/issue2008/images/data/1160342/DC1/1160342s_tablesS2-S9.zip # resave XLS file as TXT file with out normalized expression columns x <- read.table("1160342s_tableS2.txt",sep="\t", header=TRUE, stringsAsFactors=FALSE, fill=TRUE) p1 <- x[,2] p2 <- x[,3] png("figS7.sultanFig.png",h=500,w=700) plot( log2(p1)+log2(p2), log2(p1/p2), pch=19, cex=.4, xlab="Absolute expression", ylab="Log Fold Change (HEK/B cells)" ) grid(col="black") abline(h=mean(log2(p1/p2), trim=.3, na.rm=TRUE),col="blue",lwd=3) dev.off() # --------------------------------------- # MA-like-plot for Mortazavi et al. data # --------------------------------------- # download files from: # http://www.nature.com/nmeth/journal/v5/n7/extref/nmeth.1226-S3.txt # http://www.nature.com/nmeth/journal/v5/n7/extref/nmeth.1226-S4.txt # http://www.nature.com/nmeth/journal/v5/n7/extref/nmeth.1226-S5.txt brain <- read.table("nmeth.1226-S3.txt", sep="\t", header=TRUE) liver <- read.table("nmeth.1226-S4.txt", sep="\t", header=TRUE) muscle <- read.table("nmeth.1226-S5.txt", sep="\t", header=TRUE) sum(brain$gid==liver$gid) sum(brain$gid==muscle$gid) source("functions.R") b <- brain[,"finalRPKM"] v <- liver[,"finalRPKM"] m <- muscle[,"finalRPKM"] w <- which( brain$fractionMulti < .2 & liver$fractionMulti < .2 & muscle$fractionMulti < .2 ) png("figS6.mortazaviSuppFig.png",h=800,w=800) par(mfrow=c(2,2)) plot( log2(v[w])+log2(b[w]), log2(b[w])-log2(v[w]), pch=19, cex=.3, xlab="Absolute expression", ylab="Log Fold Change (Brain/Liver)", ylim=c(-15,15) ) abline(h=mean(log2(b[w])-log2(v[w]), trim=.3, na.rm=TRUE),col="blue",lwd=3) grid(col="black") plot( log2(m[w])+log2(b[w]), log2(b[w])-log2(m[w]), pch=19, cex=.3, xlab="Absolute expression", ylab="Log Fold Change (Brain/Muscle)", ylim=c(-15,15) ) abline(h=mean(log2(b[w])-log2(m[w]), trim=.3, na.rm=TRUE),col="blue",lwd=3) grid(col="black") plot( log2(v[w])+log2(m[w]), log2(v[w])-log2(m[w]), pch=19, cex=.3, xlab="Absolute expression", ylab="Log Fold Change (Liver/Muscle)", ylim=c(-15,15) ) abline(h=mean(log2(v[w])-log2(m[w]), trim=.3, na.rm=TRUE),col="blue",lwd=3) grid(col="black") dev.off() # --------------------------------------- # Cloonan et al. ES versus EB comparison # --------------------------------------- # download file from: # http://genome.cshlp.org/content/suppl/2008/10/15/gr.077578.108.DC1/Supplementary_Table_3_exp_lev_fold_change.xls # ... remove some rows/columns and save as TXT file x <- read.table("kuchenbauer_mostcommonseq.txt", sep="\t", header=TRUE, stringsAsFactors=FALSE) png("figS3.miRNA-MA.png",w=800,h=600) maPlot(x[,2],x[,3], normalize=TRUE, pch=19) f <- calcNormFactors(as.matrix(x[,2:3]))[2] abline(h=log2(f), col="blue", lwd=4) grid(col="black") dev.off() # --------------------------------------- # Cloonan et al. ES versus EB comparison # --------------------------------------- library(edgeR) mhk <- read.table("mouse_housekeeping.txt", sep="\t", header=TRUE, stringsAsFactors=FALSE) x <- read.table("Grimmond_lengths.txt", sep="\t", header=TRUE) d <- x[,5:6] f <- calcNormFactors(as.matrix(d), logratioTrim=.35, Acutoff=-16) w <- d[,1] == min(d[,1]) | d[,2] == min(d[,2]) h <- toupper(x$Gene_symbol) %in% mhk$Identifiers cols <- rep("black", nrow(d)) cols[w] <- "orange" cols[h] <- "blue" cexs <- rep(.2, nrow(d)) cexs[h] <- .5 png("figS1.maPlot_cloonan.png",w=1000,h=700) ma <- maPlot(d[,1],d[,2], normalize=TRUE, pch=19, cex=cexs, ylab="log-fold-change: EB - ES", ylim=c(-6,6), col=cols) grid(col="black") abline(h=log2(f[2]),col="orange",lwd=3) points(ma$A[h], ma$M[h], col="blue", cex=.5, pch=19) # add points on top abline(h=median(ma$M[h]),col="blue",lwd=3) arrows( -18, -2.5, -19, -1.75, length=.1, lwd=6, col="red" ) dev.off() cs <- colSums(d) #library(statmod) #s <- sage.test(d[,1], d[,2]) #sa <- sage.test(d[,1], d[,2], sum(d[,1])/sqrt(f[2]), sum(d[,2])*sqrt(f[2])) #fc_raw <- d[,1]/cs[1] / ( d[,2]/cs[2] ) #fc_adj <- d[,1]/(cs[1]*f[1]) / ( d[,2]/(cs[2]*f[2]) ) # before normalization #table(sig=s < .01/nrow(d), up=fc_raw>1) # after normalization #table(sig=sa < .01/nrow(d), up=fc_adj>1) # --------------------------------------- # Li et al. LNCAP (androgen versus mock) # --------------------------------------- library(edgeR) hk <- read.table("human_housekeeping.txt", header=FALSE, sep=" ", as.is=TRUE, stringsAsFactors=FALSE)$V1 hk1 <- gsub("*","",hk, fixed=TRUE) fn <- dir(,"map.sim") # needed to fix a little problem with readDGE() de <- readDGE(fn) de$samples$group <- factor(rep(1:2,4:3)) library(biomaRt) mart <- useMart("ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl",host="www.ensembl.org") ens2refseq <- getBM(attributes=c("ensembl_gene_id","refseq_dna"), filters="ensembl_gene_id",values=rownames(de$counts), mart=mart) genes <- unique(ens2refseq$ensembl_gene_id[ens2refseq$refseq_dna %in% hk1]) #dea <- DGEList(counts=de$counts, lib.size=de$samples$lib.size*f, group=de$samples$group) #de <- estimateCommonDisp(de) #dea <- estimateCommonDisp(dea) f <- calcNormFactors(de$counts) D <- de$counts w <- D[,1] == min(D[,1]) | D[,2] == min(D[,2]) h <- rownames(D) %in% genes cols <- rep("black", nrow(D)) cols[w] <- "orange" cols[h] <- "blue" cexs <- rep(.2, nrow(D)) cexs[h] <- .5 png("figS2.maPlot_li.png",w=1000,h=700) ma <- maPlot(D[,1], D[,6], normalize=TRUE, pch=19, cex=cexs, ylab="log-fold-change: DHT - control", ylim=c(-6,6), col=cols) grid(col="black") abline(h=log2(f[6]),col="orange",lwd=3) points(ma$A[h], ma$M[h], col="blue", cex=.5, pch=19) # add points on top abline(h=median(ma$M[h]),col="blue",lwd=3) dev.off()