## @knitr install source("http://bioconductor.org/biocLite.R") biocLite("RamiGO") ## @knitr exampleCode ######## ### compute cindices in VDX ### taking the subtypes into account ######## rm(list=ls()) ## create result folder saveres <- "results" if(!file.exists(saveres)) { system(sprintf("mkdir %s", saveres)) } ## Required files in current folder ## see http://www.broadinstitute.org/gsea for files exe.path.string <- "gsea2-2.07.jar" gmt.path.string <- "c5.all.v3.0.entrez.gmt" ######## ## load libraries ######## library(survcomp) library(Biobase) library(genefu) library(RamiGO) library(breastCancerVDX) ######## ### import data ######## data(vdx) data(scmgene.robust) data.all <- c("vdx"=vdx) platform.all <- c("affy") ######## ### get subtypes ######## subtypeData <- as.list(NULL) pdf("subtype_plot.pdf") for(i in 1:length(data.all)){ domap <- FALSE subtypeData[[i]] <- subtype.cluster.predict(sbt.model=scmgene.robust, data=t(exprs(data.all[[i]])), annot=fData(data.all[[i]]), do.mapping=domap, do.prediction.strength=FALSE, do.BIC=FALSE, plot=TRUE, verbose=TRUE) title(names(data.all)[[i]]) } dev.off() names(subtypeData) <- names(data.all) subtypesOrder <- c("ER-/HER2-","HER2+", "ER+/HER2- High Prolif","ER+/HER2- Low Prolif", "All") ######## ### compute concordance indices ######## cindices <- array(NA, dim=c(nrow(exprs(vdx)), 2, length(data.all), length(subtypesOrder)), dimnames=list(row.names(exprs(vdx)), c("cindex", "cindex.se"), names(data.all), subtypesOrder)) ## dimension 1 = gene ids ## dimension 2 = concordance indices and standard errors ## dimension 3 = datasets ## dimension 4 = subtypes for(i in 1:length(data.all)){ ## simplify dataset ## consider only the unique untreated node-negative patients myx <- complete.cases(pData(data.all[[i]])[ , c("node", "treatment")]) & pData(data.all[[i]])[ , "node"] == 0 & pData(data.all[[i]])[ , "treatment"] == 0 exprsDataTmp <- exprs(data.all[[i]])[ ,myx, drop=FALSE] ## extract survival data sData <- c("t.dmfs","e.dmfs") censData <- censor.time(pData(data.all[[i]])[ ,sData[1]], pData(data.all[[i]])[ ,sData[2]],3600) timeDataTmp <- censData$surv.time.cens eventDataTmp <- censData$surv.event.cens ## extract subtype probabilities sbtprob <- subtypeData[[i]]$subtype.proba2[myx, , drop=FALSE] for(j in 1:length(subtypesOrder)){ if(subtypesOrder[j] != "All"){ subtypeWeights <- as.numeric(sbtprob[,j]) } else { subtypeWeights <- rep(1, nrow(pData(vdx))) } cindexTmp <- t(apply(X=exprsDataTmp, MARGIN=1, FUN=function(x, y, z, w) { tt <- concordance.index( x=x, surv.time=y, surv.event=z, method="noether", na.rm=TRUE, weights=w); return(c("cindex"=tt$c.index, "cindex.se"=tt$se)); }, y=timeDataTmp, z=eventDataTmp, w=subtypeWeights)) cindices[, , i, j] <- cindexTmp } } save(cindices, file="cindices_4-subtypes-ramigo-vdx.RData") ######## ### combining the cindices for each gene from all datasets ######## combcind <- function(x) { rr <- unlist(combine.est(x=x["cindex", ], x.se=x["cindex.se", ], na.rm=TRUE)) ## compute two-sided p-values statt <- (rr[1] - 0.5)/rr[2] rr <- c(rr, pnorm(statt, lower.tail = rr[1] < 0.5) * 2) ## compute a score rr <- c(rr, statt) names(rr) <- c("cindex", "cindex.se", "cindex.p", "cindex.score") return(rr) } cindices.meta <- apply(X=cindices, MARGIN=c(1, 4), FUN=combcind) save(cindices.meta, file="cindices_meta_4-subtypes-ramigo-vdx.RData") ######## ### preRanked GSEA ######## gsea.res <- NULL ## save ranking for each subtype for(i in 1:dim(cindices.meta)[3]) { gg <- as.character(fData(vdx)[,"EntrezGene.ID"]) ss <- cindices.meta["cindex.score", ,i] write.table(cbind(gg, ss), file=sprintf( "prognosis_s%s_entrez.rnk", i), col.names=FALSE, row.names=FALSE, sep="\t", quote=FALSE) } rnk.all <- sprintf("prognosis_s%i_entrez.rnk", 1:length(subtypesOrder)) ## set GSEA parameters exe.path <- exe.path.string gmt.path <- gmt.path.string gsea.collapse <- "false" nperm <- 1000 gsea.seed <- 54321 gsea.out <- saveres for(i in 1:length(rnk.all)) { resn <- sprintf("prognosis_s%i_entrez", i) ## build GSEA command rnk.path <- rnk.all[i] gsea.report <- resn rest <- dir(getwd()) rest <- rest[grep(pattern=resn, x=rest)[1]] ## -Xmx sets amount of memory to allocate. lower ## 8g (8GB) to 2g or 4g if not enough memory is available gsea.cmd <- sprintf("java -Xmx8g -cp %s xtools.gsea.GseaPreranked -gmx %s -collapse %s -nperm %i -rnk %s -scoring_scheme weighted -rpt_label %s -include_only_symbols true -make_sets true -plot_top_x 75 -rnd_seed %i -set_max 500 -set_min 10 -zip_report true -out %s -gui false", exe.path, gmt.path, gsea.collapse, nperm, rnk.path, gsea.report, gsea.seed, gsea.out) system(gsea.cmd) cat("\n-------------------------------------\n\n") ## read results rest <- dir(saveres) rest <- rest[grep(pattern=resn, x=rest)[1]] restn <- sapply(strsplit(rest, "[.]"), function(x) { return(x[length(x)]) }) tt <- rbind(read.table(sprintf( "%s/%s/gsea_report_for_na_pos_%s.xls", saveres, rest, restn), stringsAsFactors=FALSE, sep="\t", header=TRUE), read.table( sprintf("%s/%s/gsea_report_for_na_neg_%s.xls", saveres, rest, restn), stringsAsFactors=FALSE, sep="\t", header=TRUE)) gsea.res <- c(gsea.res, list(tt)) } names(gsea.res) <- rnk.all save(gsea.res, file=sprintf("%s/gsea_res_RamiGO.RData", saveres)) ######## ### Create RamiGO output ######## sigTerms <- NULL maxp <- 0.25 for(i in 1:length(gsea.res)){ sigTerms <- c(sigTerms, list(gsea.res[[i]][ gsea.res[[i]]$FDR.q.val < maxp, ])) } names(sigTerms) <- subtypesOrder ## choose ER+/HER2- High Proliferation subtype for the example i <- 3 goNames <- sigTerms[[i]]$NAME[order(sigTerms[[i]]$NES)] pvalues <- sigTerms[[i]][order(sigTerms[[i]]$NES), "FDR.q.val"] goNES <- sigTerms[[i]]$NES[order(sigTerms[[i]]$NES)] colors <- ifelse(goNES > 0, "tomato", "lightblue") ## Code for updated RamiGO version data(c5.go.mapping) goIDs <- sapply(goNames, function(x)c5.go.mapping[ c5.go.mapping$description == x, "goid"]) goIDs <- as.character(goIDs) treeAll <- getAmigoTree(goIDs, color=colors, filename=sprintf("%s_-tree_up-down-CC-only", i)) ## Code for export and displayin GO tree in Cytoscape #treeAll <- getAmigoTree(goIDs, color=colors, #filename=sprintf("%s_-tree_up-down-CC-only", i), picType="dot") #treeAllDot <- readAmigoDot(treeAll) #AmigoDot.to.Cyto(treeAllDot)