plotReverseCumDist <- function(x, xlab="Tag count X", ylab="# tags >= X", add=FALSE, ...) {
  v <- ecdf(x)
  matplot( knots(v), (1-v(knots(v)))*sum(D[,1]), log="xy", xlab=xlab, ylab=ylab, add=add, ... )
}

generateDataset2 <- function(commonTags=15000, uniqueTags=c(1000,3000), group=c(1,2), libLimits=c(.9,1.1)*1e6, 
                            empiricalDist=NULL, lengthDist=NULL, pDifferential=.05, pUp=.5, foldDifference=2, nreps=c(2,2)) {
                            
  # some checks
  stopifnot( length(group) == length(uniqueTags) )
  stopifnot( length(group) == length(nreps) )
  stopifnot( length(empiricalDist) == length(lengthDist) )
  group <- as.factor(rep(group,nreps))
  stopifnot( nlevels(group) == 2 ) 
  
  print(group)

  #exampleCounts <- empiricalDist/lengthDist
  exampleCounts <- empiricalDist
  exampleLambda <- exampleCounts/sum(exampleCounts)
  exampleIds <- seq_len( length(empiricalDist) )
 
  # set up libraries
  nLibraries <- sum( nreps )
  libSizes <- runif(nLibraries, min=libLimits[1], max=libLimits[2] )

  # vector of starts/stops for the unique Tags
  en <- commonTags + cumsum(uniqueTags)
  st <- c(commonTags+1,en[-nLibraries]+1)

  # create matrix of LAMBDA(=relative expression levels)
  LAMBDA <- matrix(0, nrow=max(en), ncol=nLibraries)
  
  ID <- rep(0, max(en))
  ID[1:commonTags] <- sample(exampleIds, commonTags, replace=TRUE)
  LAMBDA[1:commonTags,] <- exampleLambda[ ID[1:commonTags] ]

  # set unique tag totals
  for(i in 1:length(nreps))
    if(uniqueTags[i] > 0) {
      ID[st[i]:en[i]] <- sample(exampleIds, uniqueTags[i], replace=TRUE)
      LAMBDA[st[i]:en[i],group==levels(group)[i]] <- exampleLambda[ ID[st[i]:en[i]] ]
    }
    
  g <- group == levels(group)[1]
  ind <- seq_len(floor(pDifferential*commonTags))
  if(length(ind)>0) {
    fcDir <- sample(c(-1,1), length(ind), prob=c(1-pUp,pUp), replace=TRUE)
    LAMBDA[ind,g] <- LAMBDA[ind,g]*exp(log(foldDifference)/2*fcDir)
    LAMBDA[ind,!g] <- LAMBDA[ind,!g]*exp(log(foldDifference)/2*(-fcDir))
  }

  sampFactors <- colSums(LAMBDA)

  sampFactorsM <- outer( rep(1,max(en)), sampFactors )
  libSizesM <- outer(  rep(1,max(en)), libSizes )

  # create observed means
  MEAN <- LAMBDA / sampFactorsM * libSizesM  # to get the totals to sum to 1

  # sample observed data (column sums will be *close* to set library sizes)
  DATA <- matrix(0, nr=nrow(LAMBDA), ncol=nLibraries)
  DATA <- matrix(rpois(length(MEAN), lambda=MEAN),ncol=nLibraries)

  trueFactors <- colSums(MEAN[1:commonTags,])
  trueFactors <- trueFactors/trueFactors[1]
  
  colnames(DATA) <- paste(paste("group",group,sep=""),1:ncol(DATA),sep=".")
  
  list(DATA=DATA, LAMBDA=LAMBDA, MEAN=MEAN, trueFactors=trueFactors, group=group, libSizes=libSizes,  
       differentialInd=c(ind,(commonTags+1):nrow(DATA)), commonInd=1:commonTags, ID=ID, length=lengthDist[ID])
}

takeSubset <- function(obj, subsetInd) {
  allInd <- 1:nrow(obj$DATA)
  commonInd <- allInd %in% obj$commonInd
  differentialInd <- allInd %in% obj$differentialInd
  list(DATA=obj$DATA[subsetInd,], LAMBDA=obj$LAMBDA[subsetInd,], trueFactors=obj$trueFactors, group=obj$group, 
       libSizes=obj$libSizes, differentialInd=which(differentialInd[subsetInd]), commonInd=which(commonInd[subsetInd]),
	   ID=obj$ID[subsetInd], length=obj$length[subsetInd])
}

generateDataset <- function(commonTags=15000, uniqueTags=c(1000,3000), group=c(1,2), libLimits=c(.9,1.1)*1e6, 
                            empiricalDist=NULL, randomRate=1/100, pDifferential=.05, pUp=.5, foldDifference=2) {
                            
  # some checks
  group <- as.factor(group)
  stopifnot( length(group) == length(uniqueTags) )
  #stopifnot( length(group) == 2 ) # code below only works for 2 samples
  stopifnot( nlevels(group) == 2 ) 

  # define where to take random sample from (empirical distribution OR random exponential)
  if(is.null(empiricalDist))
    exampleCounts <- ceiling(rexp(commonTags,rate=randomRate))
  else
    exampleCounts <- empiricalDist
  
  exampleLambda <- exampleCounts/sum(exampleCounts)
 
  # set up libraries
  nLibraries <- length(uniqueTags)
  libSizes <- runif(nLibraries, min=libLimits[1], max=libLimits[2] )

  # vector of starts/stops for the unique Tags
  en <- commonTags + cumsum(uniqueTags)
  st <- c(commonTags+1,en[-nLibraries]+1)

  # create matrix of LAMBDA(=relative expression levels)
  LAMBDA <- matrix(0, nrow=max(en), ncol=nLibraries)
  LAMBDA[1:commonTags,] <- sample(exampleLambda, commonTags, replace=TRUE)

  # set unique tag totals
  for(i in 1:nLibraries)
    if(uniqueTags[i] > 0)
      LAMBDA[st[i]:en[i],i] <- sample(exampleLambda, uniqueTags[i])    
    
  ind <- seq_len(floor(pDifferential*commonTags))
  g <- group == levels(group)[1]
  if(length(ind)>0) {
    fcDir <- sample(c(-1,1), length(ind), prob=c(1-pUp,pUp), replace=TRUE)
    LAMBDA[ind,g] <- LAMBDA[ind,!g]*exp(log(foldDifference)/2*fcDir)
    LAMBDA[ind,!g] <- LAMBDA[ind,!g]*exp(log(foldDifference)/2*(-fcDir))
  }
  
  sampFactors <- colSums(LAMBDA)

  sampFactorsM <- outer( rep(1,max(en)), sampFactors )
  libSizesM <- outer(  rep(1,max(en)), libSizes )

  # create observed means
  MEAN <- LAMBDA / sampFactorsM * libSizesM

  # sample observed data (column sums will be *close* to set library sizes)
  DATA <- matrix(rpois(length(MEAN), lambda=MEAN),ncol=nLibraries)

  trueFactors <- colSums(MEAN[1:commonTags,])
  trueFactors <- trueFactors/trueFactors[1]
  list(DATA=DATA, LAMBDA=LAMBDA, MEAN=MEAN, trueFactors=trueFactors, group=group, libSizes=libSizes,  differentialInd=c(ind,(commonTags+1):nrow(DATA)), 
  commonInd=1:commonTags)
}

calcFactor <- function(obs, ref, trim=.45) {
  logR <- log2(obs/ref)
  fin <- is.finite(logR)
  2^mean(logR[fin],trim=trim)
}

Poisson.model <- function(MA,group1,group2){

  require(limma)
  Poisson.glm.pval <- vector()
  Fold.changes <- vector()
  
  CS <- colSums(MA$M[,c(group1,group2)])

  for (i in 1:(nrow(MA))){
    S1 <- MA$M[i,group1] 
    S2 <- MA$M[i,group2] 
    In <- c(S1,S2)
    sample.f <- factor(c(rep(1,length(group1)),rep(2,length(group2))))
    In <- as.vector(unlist(In))
    GLM.Poisson <- glm(In ~ 1 + sample.f + offset(log(CS)),family=poisson)
    Poisson.glm.pval[i] <- anova(GLM.Poisson,test="Chisq")[5][2,1]
    Fold.changes[i] <- exp(GLM.Poisson$coefficients[1])/(exp(GLM.Poisson$coefficients[1]+GLM.Poisson$coefficients[2]))
  }
  
  output <- matrix(ncol=2,nrow=nrow(MA$M))
  output[,1] <- Poisson.glm.pval
  output[,2] <- Fold.changes
  output <- as.data.frame(output)
  names(output) <- c("pval","FC")
  output
}

Poisson.model.new <- function(countMatrix,group1,group2, ref=1, calcFactor=TRUE){

  Poisson.glm.pval <- vector()
  Fold.changes <- vector()
  
  props <- countMatrix / outer( rep(1,nrow(countMatrix)), colSums(countMatrix) )
  
  refS <- colSums(countMatrix[,c(group1,group2)])
  
  if( calcFactor ) {
    require(edgeR)
	  CS <- calcNormFactors(countMatrix[,c(group1,group2)])
  } else {
    CS <- rep(1,length(group1)+length(group2))
  }
  
  offsets <- log(CS)+log(refS)

  sample.f <- factor(c(rep(1,length(group1)),rep(2,length(group2))))
  
  for (i in 1:(nrow(countMatrix))){
    S1 <- countMatrix[i,group1] 
    S2 <- countMatrix[i,group2] 
    In <- c(S1,S2)
    In <- as.vector(unlist(In))
    GLM.Poisson <- glm(In ~ 1 + sample.f + offset(offsets),family=poisson)
    Poisson.glm.pval[i] <- anova(GLM.Poisson,test="Chisq")[5][2,1]
    Fold.changes[i] <- exp(GLM.Poisson$coefficients[1])/(exp(GLM.Poisson$coefficients[1]+GLM.Poisson$coefficients[2]))
    if(i %% 100==0) cat(".")
  }
  cat("\n")
  
  #output <- matrix(ncol=2,nrow=nrow(countMatrix))
  #output[,1] <- Poisson.glm.pval
  #output[,2] <- Fold.changes
  #output <- as.data.frame(output)
  #names(output) <- c("pval","FC")
  
  list(stats=data.frame(pval=Poisson.glm.pval, FC=Fold.changes),offsets=offsets,factors=CS)
}


exactTestPoisson <- function(dataMatrix, meanMatrix, group1Ind, group2Ind, verbose=TRUE) {
  
  y1 <- rowSums(dataMatrix[,group1Ind])
  y2 <- rowSums(dataMatrix[,group2Ind])
  m1 <- rowSums(meanMatrix[,group1Ind])
  m2 <- rowSums(meanMatrix[,group2Ind])
  
  N <- rowSums( dataMatrix[,c(group1Ind,group2Ind)] )
  
  pvals <- rep(NA, nrow(dataMatrix))
  
  for (i in 1:length(pvals)) {
    v <- 0:N[i]
    p.top <- dpois(v, lambda=m1[i]) * dpois(N[i]-v, lambda=m2[i])
    p.obs <- dpois(y1[i], lambda=m1[i]) * dpois(y2[i], lambda=m2[i])
    p.bot <- dpois(N[i], lambda=m1[i]+m2[i])
    keep <- p.top <= p.obs
    pvals[i] <- sum(p.top[keep]/p.bot)
    if (verbose)
        if (i%%1000 == 0)
          cat(".")
  }
  if (verbose)
    cat("\n")
    
  pvals

}

calcFactorRLM <- function(obs, ref, logratioTrim=.20, sumTrim=0.01) {

  if( all(obs==ref) )
    return(1)

  nO <- sum(obs)
  nR <- sum(ref)
  logR <- log2((obs/nO)/(ref/nR))         # log ratio of expression, accounting for library size
  p0 <- obs/nO
  pR <- ref/nR
  
  x <- log2(p0)-log2(pR)
  x <- x[ !is.na(x) & is.finite(x) ]
  
  r <- rlm(x~1, method="MM")
  2^r$coef
  
}

calcFactorWeighted <- function(obs, ref, logratioTrim=.3, sumTrim=0.05) {

  if( all(obs==ref) )
    return(1)

  nO <- sum(obs)
  nR <- sum(ref)
  logR <- log2((obs/nO)/(ref/nR))         # log ratio of expression, accounting for library size
  absE <- log2(obs/nO) + log2(ref/nR)     # absolute expression
  v <- (nO-obs)/nO/obs + (nR-ref)/nR/ref  # estimated asymptotic variance
  
  fin <- is.finite(logR) & is.finite(absE)
  
  logR <- logR[fin]
  absE <- absE[fin]
  v <- v[fin]

  # taken from the original mean() function
  n <- sum(fin)
  loL <- floor(n * logratioTrim) + 1
  hiL <- n + 1 - loL
  loS <- floor(n * sumTrim) + 1
  hiS <- n + 1 - loS
  
  keep <- (rank(logR) %in% loL:hiL) & (rank(absE) %in% loS:hiS)
  2^( sum(logR[keep]/v[keep], na.rm=TRUE) / sum(1/v[keep], na.rm=TRUE) )
  
}

calcFactor2 <- function(obs, ref) {
  logR <- log2(obs/ref)
  fin <- is.finite(logR)
  d<-density(logR,na.rm=TRUE)
  2^d$x[which.max(d$y)]
}


fdPlot <- function( score, indDiff, add=FALSE, xlab="Number of Genes Selected", 
                    ylab="Number of False Discoveries", lwd=4, type="l", ... ) {
  o <- order(score)
  w <- o %in% indDiff
  x <- 1:length(indDiff)
  y <- cumsum(!w[indDiff])
  matplot(x, y, xlab=xlab, ylab=ylab, lwd=lwd, type=type, add=add, ... )
}