CHIPNAME = "MoGene-1_0-st-v1"
ANNOTFILE = "MoGene-1_0-st-v1.na28.mm9.transcript.csv"
#PROBETAB = "MoGene-1_0-st-v1.probe.tab"
PROBETAB = "MoGene_mm9_probetab.txt"
#DATASET = "mmurinus_gene_ensembl"
DATASET   = "mmusculus_gene_ensembl"

##################################################################################
##################################################################################

library(aroma.affymetrix)
# If aroma.affymetrix isn't installed, do so with the following commands:
# source("http://www.braju.com/R/hbLite.R")
# hbInstall("aroma.affymetrix")

library(GenomeGraphs)
# If GenomeGraphs isn't installed, do so with the following commands:
# source("http://bioconductor.org/biocLite.R")
# biocLite("GenomeGraphs")

library(FIRMAGene)
# If GenomeGraphs isn't installed, do so with the following commands:
# install.packages("FIRMAGene", repos="http://R-Forge.R-project.org")

##################################################################################
##################################################################################

verbose = Arguments$getVerbose(-30); timestampOn(verbose)
#cdf = AffymetrixCdfFile$fromChipType(CHIPNAME, verbose=verbose) # this assumes the CDF file is in ./annotationData/chipTypes/CHIPNAME/
cdf = AffymetrixCdfFile$fromChipType(CHIPNAME, tags="mm9",verbose=verbose) # this assumes the CDF file is in ./annotationData/chipTypes/CHIPNAME/
cs = AffymetrixCelSet$fromName("tissues", cdf=cdf, verbose=verbose) # this assumes the CEL files are at ./rawData/tissues/CHIPNAME/
bc = RmaBackgroundCorrection(cs)
csBC = process(bc, verbose=verbose)
qn = QuantileNormalization(csBC, typesToUpdate="pm")
csN = process(qn, verbose=verbose) # time required first time through
cdfu = getUniqueCdf(cdf, verbose=-80) # convert to unique cell
#csNU = convertToUnique(csN, verbose=-20)
csNU = convertToUnique(csN, tags="mm9,UNQ", verbose=-20)
plm = RmaPlm(csNU)

fit(plm, verbose=verbose) #time required first time through

cls <- gsub("MouseTP_", "", gsub("_0[1-3]_mGENE", "", getNames(cs)))
netaffx=read.csv(ANNOTFILE, sep=",", skip=19, header=TRUE, comment.char="", stringsAsFactors=FALSE)
probetab=read.table(PROBETAB, sep="\t", header=TRUE, comment.char="", stringsAsFactors=FALSE)

nc <- nbrOfCellsPerUnit(cdf)
u <- which(getUnitNames(cdf) %in% netaffx$probeset_id[netaffx$category == "main"] & nc > 7 & nc < 200)
fg <- FIRMAGene(plm, idsToUse=u, cls=cls)
sampNames <- getNames(csNU)

mart=useMart(biomart="ensembl", dataset=DATASET)

getNames=function(...) UseMethod("getNames")# clash of the 'getNames' function
top20=topSplices(fg, 20)

pdf("FIRMAGene_MoGene_tissue.pdf", height=11, width=8.5)
for(i in 1:nrow(top20)) {
col= rep("black",33)
ww=grep(top20$Sample[i], sampNames)
col[ww]="blue"
lwd=c(1,3)[(col!="black")+1]
y=FIRMAGene::getDetails(plm, id=top20$ID[i], mart=mart, probesets=probetab, col=col, lwd=lwd, verbose=TRUE, transcriptClusterId="transcript_cluster_id")
y[[1]]@title=paste(y[[1]]@title, paste("blue", top20$Sample[i], sep="="), sep=" -- ")
gdPlot(y)
}
dev.off()

top1k = topSplices(fg,1000, plm=plm, cls=cls) # for additional file 2
#top1k = topSplices(fg,1000) # for additional file 2
m = match( top1k$ID, netaffx$transcript_cluster_id )
symb = as.character(sapply(as.character(netaffx$gene_assignment[m]), FUN=function(u) strsplit(u," // ")[[1]][2]))
symb[is.na(symb)] = "---"
top1k = cbind(top1k, Symbol=symb)
write.table(top1k,"FIRMAGene_MoGene_tissue.csv",sep=",",row.names=FALSE)