# R Functions to run EM algorithm to estimate parameters for # T-cell Assay Data from ELISPOT by Tim Beissbarth. # Ref: # A systematic approach for comprehensive T-cell epitope discovery using peptide libraries. # Beissbarth T, Tye-Din JA, Smyth GK, Speed TP, Anderson RP # Bioinformatics; 7.2005; 21(ISMB Supplement 1). # Freely distributed. Licence: GPL2 (see http://www.gnu.org/licenses/gpl.html) # Two alternative functions. Do the same, but use different data types as input. # Maybe different speeds. First is newer and recommended. # First: operates on DataFrame, uses tapply # Get Data from database or file # Format "data" (column labels): # assay: assay_id (character or factor) # peptides: antigen_id (character or factor) # patients: patient_sample_id (character or factor) # spots: number of spots from ELISPOT (numeric or integer) # # steps: number of EM cycles elispot.em.dataframe <- function(data, steps=100) { logL <- numeric(steps) peptides <- as.factor(data$peptides) patients <- as.factor(data$patients) y <- as.integer(data$spots) # Set Initial Values max <- tapply(y, patients, max, na.rm=T) z <- pmax(0.2, 0.8 * y / max[patients]) # Set Initial Parameters p <- tapply(z, peptides, mean, na.rm=T) lambda <- tapply(y, peptides, mean, na.rm=T) lambda.0 <- mean(y, na.rm=T) alpha <- tapply(y, patients, mean, na.rm=T) # EM iterations for(iteration in 1:steps) { # Restrict lambda to be 2 * greater background lambda[lambda < lambda.0 | is.na(lambda)] <- max(0, lambda.0, na.rm=T) # Restrict mean alpha to be 1 // Alpha_6 (Wingrove) to be 1 alpha <- pmax(0, alpha / mean(alpha, na.rm=T), na.rm=T) pp <- as.numeric(p[peptides]) l <- as.numeric(lambda[peptides]) a <- as.numeric(alpha[patients]) # E-Step z <- as.numeric(pp * dpois(y, a*l) / (pp * dpois(y, a*l) + ((1-pp) * dpois(y, a*lambda.0)))) logL[iteration] <- sum(z*log(pp), na.rm=T) + sum((1-z)*log(1-pp), na.rm=T) + sum(y*z*log(a*l), na.rm=T) + sum(y*(1-z)*log(a*lambda.0), na.rm=T) - sum(z*l*a, na.rm=T) - sum((1-z)*lambda.0*a, na.rm=T) # M-Step p <- pmax(0, tapply(z, peptides, mean, na.rm=T), na.rm=T) alpha <- pmax(0, tapply(y, patients, sum, na.rm=T) / tapply(l*z + (lambda.0*(1-z)), patients, sum, na.rm=T), na.rm=T) lambda.0u <- sum(y*(1-z), na.rm=T) lambda.0b <- sum((1-z)*a, na.rm=T) lambda.0 <- pmax(0, lambda.0u / lambda.0b, na.rm=T) lambda.u <- tapply(y*z, peptides, sum, na.rm=T) lambda.b <- tapply(z*a, peptides, sum, na.rm=T) lambda <- pmax(0, lambda.u / lambda.b, na.rm=T) for(i in 1:3) { lambda.bak.index <- lambda < lambda.0 | is.na(lambda) lambda.0 <- max(0, (lambda.0u + sum(lambda.u[lambda.bak.index], na.rm=T)) / (lambda.0b + sum(lambda.b[lambda.bak.index], na.rm=T)), na.rm=T) } lambda[lambda