# For general comments/questions, contact Mark Robinson

# See http://groups.google.com/group/aroma-affymetrix/ for instructions on how
# to install/setup the aroma.affymetrix package
library(aroma.affymetrix)


verbose <- Arguments$getVerbose(-30); timestampOn(verbose)

# this assumes the CDF file is in ./annotationData/chipTypes/HuGene-1_0-st-v1
cdf<-AffymetrixCdfFile$fromChipType("HuGene-1_0-st-v1",verbose=verbose)

# this assumes the CEL files are at ./rawData/tissues/HuGene-1_0-st-v1/
cs<-AffymetrixCelSet$fromName("tissues",cdf=cdf,verbose=verbose)

bc <- RmaBackgroundCorrection(cs)
csBC <- process(bc,verbose=verbose)
qn <- QuantileNormalization(csBC, typesToUpdate="pm")
csN <- process(qn, verbose=verbose) #time required first time through

# convert to unique cell
cdfu <- getUniqueCdf(cdf,verbose=-80)
csNU <- convertToUnique(csN,verbose=-20)

plm <- RmaPlm(csNU)
fit(plm, verbose=verbose) #time required first time through


cls <- gsub("TisMap_","",gsub("_0[1-3]_v1_WTGene1","",getNames(cs)))

library(FIRMAGene)

hgnetaffx <- read.csv("HuGene-1_0-st-v1.na25.hg18.transcript.csv",sep=",",skip=19,header=TRUE,comment.char="",stringsAsFactors=FALSE)
probetab <- read.table("HuGene-1_0-st-v1.probe.tab",sep="\t",header=TRUE,comment.char="",stringsAsFactors=FALSE)

u <- which(getUnitNames(cdf) %in% hgnetaffx$probeset_id[hgnetaffx$category == "main" & hgnetaffx$total_probes > 7 & hgnetaffx$total_probes < 200])

fg <- FIRMAGene(plm, idsToUse=u, cls=cls)


sampNames <- getNames(csNU)

library(GenomeGraphs)
mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")

library(grid)
getNames <- function(...) UseMethod("getNames") # clash of the 'getNames' function

top20 <- topSplices(fg,20)

pdf("FIRMAGene_additional_file_1-plots.pdf",height=11,width=8.5)
for(i in 1:nrow(top20)) {
  col <- rep("black",33)
  ww <- grep(top20$Sample[i],sampNames)
  col[ ww ] <- "blue"
  lwd <- c(1,3)[(col!="black")+1]
  y <- FIRMAGene::getDetails(plm,id=top20$ID[i],mart=mart,probesets=probetab,col=col,lwd=lwd)
  y[[1]]@title <- paste( y[[1]]@title, paste("blue",top20$Sample[i],sep="="), sep=" -- ")
  if(length(y)>3) {
    gdPlot(y)
  }
}
dev.off()


top1k <- topSplices(fg,1000)  # for additional file 2
m <- match( top1k$ID, hgnetaffx$transcript_cluster_id )
symb <- as.character(sapply(as.character(hgnetaffx$gene_assignment[m]), FUN=function(u) strsplit(u," // ")[[1]][2]))
symb[is.na(symb)] <- "---"
top1k <- cbind(top1k,Symbol=symb)
write.table(top1k,"FIRMAGene-additional_file_2.csv",sep=",",row.names=FALSE)