setwd('/Users/anhjaeil/Work/CNV/bayes_rnaseq/All_kidney/')
library(edgeR)
## Loading required package: limma
load("RSCE.mRNA.unstranded.rda")
d <- getCounts(RSCE.mRNA.unstranded)
sumd <- apply(d ,2, sum)
sample <- colnames(d)
pur <- rep(0, 20)
for(i in 1:20)
pur[i] = strsplit(strsplit(sample, split = '\\.')[[i]], split = '_')[[2]][1]
pur <- as.numeric(pur)
pur <- pur/100
normalid <- which(pur == 0.00)
tumorid <- which(pur == 1.00)
mixid <- which(pur != 0.00 & pur != 1.00)
normal <- d[ , normalid]
tumor <- d[ , tumorid]
mix <- d[ , mixid]
input <- cbind(normal, mix)
tumor<-tumor[apply(input == 0, 1, sum) == 0,]
input <- input[apply(input == 0, 1, sum) == 0, ]
We use DSS total normalization
library(DSS)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colnames,
## do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff,
## sort, table, tapply, union, unique, unsplit
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: bsseq
## Loading required package: GenomicRanges
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## colMeans, colSums, expand.grid, rowMeans, rowSums
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: splines
designs=c(rep("1",4), rep("0", 12))
newt=as.matrix(input)
colnames(newt)=NULL
seqData=newSeqCountSet(newt, designs)
seqData=estNormFactors(seqData, "total")
k3=seqData@normalizationFactor
mk3=median(k3)
k3=k3/mk3
for(i in 1:ncol(newt))
newt[,i] = newt[,i]/k3[i]
We select informative genes based on fold-changes cutoffs of >2 or <0.5.
mu_normal <- apply(newt[,1:4], 1, mean)
mu_tumor <- apply(newt[,5:16], 1, mean)
mu_prop <- mu_tumor/mu_normal
id <- (mu_prop > 2)| (mu_prop < 0.5)
inputt <- newt[id, ]
dim(inputt)
## [1] 2055 16
setwd('/Users/anhjaeil/work/CNV/bayes_rnaseq/R_public/');
source("TumorHv1_para.R");
# RNA-seq use integer
rnadata=round(inputt)
# 0 for Normal and 1 for Mixed
cnvgroup<-c(rep(0, 4), rep(1,12));
cbit=64;
matched=0;
nstart=2
# Demix Call
testr2<-Rdemix(as.matrix(rnadata), cnvgroup, cbit, 0, rep(0,10), 1, 28,3)
res=rbind(testr2$Poipi, testr2$pi)
# Proportion estimates
res[4,]
## [1] 0.3058938 0.5609315 0.7633198 0.2845112 0.5100475 0.7300598 0.2589308
## [8] 0.5275297 0.7476616 0.2761266 0.5204228 0.7458482
# Deconvolved expression estimates
testr2$decov[1:10,1:10]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 11 11 14 10 9 11 7 4 11 9
## [2,] 266 227 218 117 184 223 112 131 121 133
## [3,] 76 83 85 49 52 56 50 61 79 50
## [4,] 491 557 663 427 383 452 490 396 423 330
## [5,] 3 2 1 3 1 2 2 2 0 2
## [6,] 7 2 4 6 3 4 6 1 3 6
## [7,] 137 164 162 185 195 186 268 206 205 215
## [8,] 49 54 71 27 43 43 39 48 57 43
## [9,] 36 30 54 36 49 36 55 38 28 36
## [10,] 5 9 13 4 10 9 6 2 11 9