CHIPNAME = "MoGene-1_0-st-v1" ANNOTFILE = "MoGene-1_0-st-v1.na28.mm9.transcript.csv" #PROBETAB = "MoGene-1_0-st-v1.probe.tab" PROBETAB = "MoGene_mm9_probetab.txt" #DATASET = "mmurinus_gene_ensembl" DATASET = "mmusculus_gene_ensembl" ################################################################################## ################################################################################## library(aroma.affymetrix) # If aroma.affymetrix isn't installed, do so with the following commands: # source("http://www.braju.com/R/hbLite.R") # hbInstall("aroma.affymetrix") library(GenomeGraphs) # If GenomeGraphs isn't installed, do so with the following commands: # source("http://bioconductor.org/biocLite.R") # biocLite("GenomeGraphs") library(FIRMAGene) # If GenomeGraphs isn't installed, do so with the following commands: # install.packages("FIRMAGene", repos="http://R-Forge.R-project.org") ################################################################################## ################################################################################## verbose = Arguments$getVerbose(-30); timestampOn(verbose) #cdf = AffymetrixCdfFile$fromChipType(CHIPNAME, verbose=verbose) # this assumes the CDF file is in ./annotationData/chipTypes/CHIPNAME/ cdf = AffymetrixCdfFile$fromChipType(CHIPNAME, tags="mm9",verbose=verbose) # this assumes the CDF file is in ./annotationData/chipTypes/CHIPNAME/ cs = AffymetrixCelSet$fromName("tissues", cdf=cdf, verbose=verbose) # this assumes the CEL files are at ./rawData/tissues/CHIPNAME/ bc = RmaBackgroundCorrection(cs) csBC = process(bc, verbose=verbose) qn = QuantileNormalization(csBC, typesToUpdate="pm") csN = process(qn, verbose=verbose) # time required first time through cdfu = getUniqueCdf(cdf, verbose=-80) # convert to unique cell #csNU = convertToUnique(csN, verbose=-20) csNU = convertToUnique(csN, tags="mm9,UNQ", verbose=-20) plm = RmaPlm(csNU) fit(plm, verbose=verbose) #time required first time through cls <- gsub("MouseTP_", "", gsub("_0[1-3]_mGENE", "", getNames(cs))) netaffx=read.csv(ANNOTFILE, sep=",", skip=19, header=TRUE, comment.char="", stringsAsFactors=FALSE) probetab=read.table(PROBETAB, sep="\t", header=TRUE, comment.char="", stringsAsFactors=FALSE) nc <- nbrOfCellsPerUnit(cdf) u <- which(getUnitNames(cdf) %in% netaffx$probeset_id[netaffx$category == "main"] & nc > 7 & nc < 200) fg <- FIRMAGene(plm, idsToUse=u, cls=cls) sampNames <- getNames(csNU) mart=useMart(biomart="ensembl", dataset=DATASET) getNames=function(...) UseMethod("getNames")# clash of the 'getNames' function top20=topSplices(fg, 20) pdf("FIRMAGene_MoGene_tissue.pdf", height=11, width=8.5) for(i in 1:nrow(top20)) { col= rep("black",33) ww=grep(top20$Sample[i], sampNames) col[ww]="blue" lwd=c(1,3)[(col!="black")+1] y=FIRMAGene::getDetails(plm, id=top20$ID[i], mart=mart, probesets=probetab, col=col, lwd=lwd, verbose=TRUE, transcriptClusterId="transcript_cluster_id") y[[1]]@title=paste(y[[1]]@title, paste("blue", top20$Sample[i], sep="="), sep=" -- ") gdPlot(y) } dev.off() top1k = topSplices(fg,1000, plm=plm, cls=cls) # for additional file 2 #top1k = topSplices(fg,1000) # for additional file 2 m = match( top1k$ID, netaffx$transcript_cluster_id ) symb = as.character(sapply(as.character(netaffx$gene_assignment[m]), FUN=function(u) strsplit(u," // ")[[1]][2])) symb[is.na(symb)] = "---" top1k = cbind(top1k, Symbol=symb) write.table(top1k,"FIRMAGene_MoGene_tissue.csv",sep=",",row.names=FALSE)