############################################## #### Code based on Guha (2008) JCGS paper ############################################## library(MASS) library(mvtnorm) fn.gen.data <- function(n, K) {true <- NULL true$beta.v <- c(3,.5,1) true$X <- array(,c(n,3)) true$X[,1] <- 1 true$X[,2] <- rnorm(n=n) true$X[,3] <- rnorm(n=n) true$M <- 20 true$p.v <- rgamma(n=K, shape=true$M/K) true$p.v <- true$p.v/sum(true$p.v) true$s.v <- sample(1:K, size=n, replace=TRUE, prob=true$p.v) true$tau <- 1 true$phi.v <- rnorm(n=K, sd=true$tau) true$theta.v <- true$phi.v[true$s.v] true$eta.v <- as.vector(true$X %*% true$beta.v) + true$theta.v true$mu.v <- exp(true$eta.v) true$Y <- rpois(n=n, lam=true$mu.v) true } fn.init <- function(data) { parm <- NULL parm$M <- 20 parm$p.v <- rgamma(n=K, shape=parm$M/K) parm$p.v <- parm$p.v/sum(parm$p.v) parm$s.v <- sample(1:K, size=n, replace=TRUE, prob=parm$p.v) parm$K <- length(unique(parm$s.v)) parm$n.vec <- array(,K) for (j in 1:K) {parm$n.vec[j] <- sum(parm$s.v==j) } tmp <- glm(data$Y ~ data$X[,-1] + as.factor(parm$s.v), family=poisson) parm$beta.v <- as.vector(tmp$coeff[1:3]) parm$phi.v <- c(0,as.vector(tmp$coeff[-(1:3)])) tick <- 0 for (j in 1:K) {if (parm$n.vec[j]>0) {tick <- tick + 1 parm$s.v[parm$s.v==j] <- tick } } parm$n.vec <- parm$n.vec[parm$n.vec>0] parm$tau <- sd(parm$phi.v) parm$theta.v <- parm$phi.v[parm$s.v] parm$eta.v <- as.vector(data$X %*% parm$beta.v) + parm$theta.v parm$mu.v <- exp(parm$eta.v) parm <- fn.laplace(parm) parm } fn.laplace <- function(parm) {parm$eta.v <- as.vector(true$X %*% parm$beta.v) + parm$theta.v parm$mu.v <- exp(parm$eta.v) parm$w.v <- parm$mu.v parm$Z <- parm$eta.v + (data$Y-parm$mu.v)/parm$mu.v parm } fn.beta <- function(parm, data) { scale <- 2*2.38/sqrt(2) r.v <- parm$Z - parm$theta.v ### forward sqrt.mu.X <- data$X[,-1] * as.vector(sqrt(parm$mu.v)) post.Var <- scale^2 * solve(.01 * diag(2) + t(sqrt.mu.X) %*% sqrt.mu.X) new.beta.v <- as.vector(rmvnorm(n=1, mean=parm$beta.v[-1], sigma=post.Var)) new.eta.v <- as.vector(data$X[,-1] %*% new.beta.v) + parm$theta.v new.mu.v <- exp(new.eta.v) log.ratio <- sum(dpois(data$Y, lam=new.mu.v, log=TRUE)) + sum(dnorm(new.beta.v, sd=10, log=TRUE)) - dmvnorm(new.beta.v, mean=parm$beta.v[-1], sigma=post.Var) ### backward new.sqrt.mu.X <- data$X[,-1] * as.vector(sqrt(new.mu.v)) new.post.Var <- scale^2 * solve(.01 * diag(2) + t(new.sqrt.mu.X) %*% new.sqrt.mu.X) log.ratio <- log.ratio - sum(dpois(data$Y, lam=parm$mu.v, log=TRUE)) - sum(dnorm(parm$beta.v, sd=10, log=TRUE)) + dmvnorm(parm$beta.v[-1], mean=new.beta.v, sigma=new.post.Var) ### toss prob <- min(0, log.ratio) prob <- exp(prob) flip <- as.logical(rbinom(n=1, size=1, prob=prob)) parm$beta.flip <- flip if (flip) {parm$beta.v[-1] <- new.beta.v parm <- fn.laplace(parm) } parm } fn.reparametrize <- function(parm) {parm$mu0 <- parm$beta.v[1] parm$theta.v <- parm$theta.v + parm$mu0 parm$phi.v <- parm$phi.v + parm$mu0 parm$beta.v[1] <- 0 parm } fn.unparametrize <- function(parm) {parm$beta.v[1] <- parm$mu0 parm$theta.v <- parm$theta.v - parm$mu0 parm$phi.v <- parm$phi.v - parm$mu0 parm$mu0 <- 0 parm } fn.MDP <- function(parm, data) { parm$mdp.flip <- array(,n) parm <- fn.unparametrize(parm) # looping thru i for(i in 1:n) { ##### bookkeeping: leaving out ith subject old.parm <- parm ############ old.s <- parm$s.v[i] parm$n.vec[old.s] <- parm$n.vec[old.s] - 1 if(parm$n.vec[old.s] > 0) {flag <- 0} if((parm$n.vec[old.s] == 0) && (parm$K > 1)) {flag <- 1} if((parm$n.vec[old.s] == 0) && (parm$K == 1)) {flag <- 2} if(flag == 1) { parm$phi.v[old.s] <- parm$phi.v[parm$K] parm$phi.v <- parm$phi.v[-parm$K] parm$s.v[parm$s.v == parm$K] <- old.s parm$n.vec[old.s] <- parm$n.vec[parm$K] parm$n.vec <- parm$n.vec[-parm$K] parm$K <- parm$K - 1 } if(flag == 2) {parm$n.vec <- NULL parm$phi.v <- NULL parm$K <- 0 } #### computing the likelihood contributions tmp.i <- parm$eta.v[i] - parm$theta.v[i] r.i <- parm$Z[i] - sum(data$X[i,]*parm$beta.v) if (parm$K > 0) {tmp2 <- log(parm$n.vec) + dpois(data$Y[i], lam=exp(tmp.i)*exp(parm$phi.v), log=TRUE) } if (parm$K == 0) {tmp2 <- NULL } ############ Ei.0 <- dnorm(r.i, mean=0, sd=sqrt(parm$tau^2 + 1/parm$w.v[i]),log=TRUE) - log(parm$mu.v[i]) tmp <- c(tmp2, (log(parm$M) + Ei.0)) tmp[is.na(tmp)] <- -Inf #### rescale maxx <- max(tmp) tmp <- tmp - maxx # convert to non-normalized probabilities tmp <- exp(tmp) tmp <- tmp/sum(tmp) prob.v <- tmp # sample new s_i and store new.s <- sample(1:length(tmp), size = 1, prob = prob.v) parm$s.v[i] <- new.s if ((new.s == parm$K + 1) | (flag > 0)) {mui.0 <- parm$mu.v[i]/(parm$mu.v[i] + 1/parm$tau^2) * r.i omi.0 <- 1/(parm$mu.v[i] + 1/parm$tau^2) } ############ if (new.s <= parm$K) {flag2 <- 0 parm$n.vec[new.s] <- parm$n.vec[new.s] + 1 parm$theta.v[i] <- parm$phi.v[new.s] } if (new.s == parm$K + 1) {flag2 <- 10 new.phi <- rnorm(n=1, mean=mui.0, sd=sqrt(omi.0)) parm$phi.v <- c(parm$phi.v, new.phi) parm$theta.v[i] <- parm$phi.v[new.s] parm$K <- parm$K + 1 parm$n.vec <- c(parm$n.vec,1) } parm$eta.v[i] <- parm$eta.v[i] - old.parm$theta.v[i] + parm$theta.v[i] parm$mu.v[i] <- exp(parm$eta.v[i]) parm$w.v[i] <- parm$mu.v[i] if (flag2 == 10) {Ei.1 <- dnorm(r.i, mean=0, sd=sqrt(parm$tau^2 + 1/parm$w.v[i]),log=TRUE) - log(parm$mu.v[i]) rho1 <- dpois(data$Y[i], lam=parm$mu.v[i], log=TRUE) + dnorm(parm$theta.v[i], sd=parm$tau, log=TRUE) - Ei.1 - dnorm(parm$theta.v[i], mean=mui.0, sd=sqrt(omi.0), log=TRUE) } if (flag>0) {mui.1 <- parm$mu.v[i]/(parm$mu.v[i] + 1/parm$tau^2) * r.i omi.1 <- 1/(parm$mu.v[i] + 1/parm$tau^2) rho0 <- dpois(data$Y[i], lam=old.parm$mu.v[i], log=TRUE) + dnorm(old.parm$theta.v[i], sd=parm$tau, log=TRUE) - Ei.0 - dnorm(old.parm$theta.v[i], mean=mui.1, sd=sqrt(omi.1), log=TRUE) } if ((flag == 0) & (flag2 == 0)) {log.ratio <- 0 } if ((flag == 0) & (flag2 > 0)) {log.ratio <- rho1 } if ((flag > 0) & (flag2 == 0)) {log.ratio <- -rho0 } if ((flag > 0) & (flag2 > 0)) {log.ratio <- rho1-rho0 } #toss prob <- min(0, log.ratio) prob <- exp(prob) flip <- as.logical(rbinom(n=1, size=1, prob=prob)) if (!flip) {parm <- old.parm } parm$mdp.flip[i] <- flip } # end for loop parm$theta.v <- parm$phi.v[parm$s.v] parm <- fn.laplace(parm) parm$mdp.flip <- mean(parm$mdp.flip) parm <- fn.reparametrize(parm) parm } fn.invariant <- function(parm, data) { scale <- 8* 2.38/sqrt(parm$K) parm$invariant.flip <- array(,parm$K) for (j in 1:parm$K) {new.parm <- parm indx.j <- which(parm$s.v==j) sd.j <- 1/sqrt(sum(parm$mu.v[indx.j])) new.parm$phi.v[j] <- rnorm(n=1, mean=parm$phi.v[j], sd=scale*sd.j) new.parm$eta.v[indx.j] <- parm$eta.v[indx.j] - parm$phi.v[j] + new.parm$phi.v[j] new.parm$mu.v[indx.j] <- exp(new.parm$eta.v[indx.j]) log.ratio <- sum(dpois(data$Y[indx.j], lam=new.parm$mu.v[indx.j], log=TRUE)) - sum(dpois(data$Y[indx.j], lam=parm$mu.v[indx.j], log=TRUE)) log.ratio <- log.ratio + dnorm(new.parm$phi.v[j], mean=parm$mu0, sd=parm$tau, log=TRUE) - dnorm(parm$phi.v[j], mean=parm$mu0, sd=parm$tau, log=TRUE) log.ratio <- log.ratio + dnorm(parm$phi.v[j], mean=new.parm$phi.v[j], sd=scale*sd.j, log=TRUE) - dnorm(new.parm$phi.v[j], mean=parm$phi.v[j], sd=scale*sd.j, log=TRUE) #toss prob <- min(0, log.ratio) prob <- exp(prob) flip <- as.logical(rbinom(n=1, size=1, prob=prob)) if (flip) {parm <- new.parm } parm$invariant.flip[j] <- flip } parm$theta.v <- parm$phi.v[parm$s.v] parm <- fn.laplace(parm) parm$invariant.flip <- mean(parm$invariant.flip) parm } fn.MDP.hyper <- function(parm, data) { a <- 1 + parm$K/2 b <- 1 + sum((parm$phi.v-parm$mu0)^2)/2 parm$tau <- 1/sqrt(rgamma(n=1, shape=a, rate=b)) post.sd <- 1/sqrt(1e-4 + parm$K/parm$tau^2) post.mean <- post.sd^2 * parm$K/parm$tau^2 * mean(parm$phi.v) parm$mu0 <- rnorm(n=1, mean=post.mean, sd=post.sd) parm } fn.post.dens.M <- function(parm) { log.lik <- parm$K*log(parm$M) + lgamma(parm$M) - lgamma(parm$M + n) + sum(lgamma(parm$n.vec)) log.prior <- dgamma(parm$M, shape=true$M/2, rate=1/2, log=TRUE) val <- log.lik + log.prior val } ########################################################## n <- 100 K <- 40 true <- fn.gen.data(n, K) data <- NULL data$Y <- true$Y data$X <- true$X parm <- fn.init(data) # subsample quantities size <- 5000 gap <- 1 n.burn <- size*gap n.reps <- size*gap for (cc in 1:n.burn) {parm <- fn.reparametrize(parm) parm <- fn.laplace(parm) parm <- fn.beta(parm, data) parm <- fn.MDP(parm, data) parm <- fn.invariant(parm, data) parm <- fn.MDP.hyper(parm, data) parm <- fn.unparametrize(parm) if (cc %% 100 == 0) {print(paste(date(), "BURN cc=",cc)) } } All.Stuff <- NULL All.Stuff$beta.mt <- array(,c(size,3)) All.Stuff$tau.v <- array(,size) All.Stuff$K.v <- array(,size) All.Stuff$theta.mt <- array(,c(size,n)) All.Stuff$M.v <- array(,size) All.Stuff$beta.flip.v <- All.Stuff$mdp.flip.v <- All.Stuff$invariant.flip.v <- All.Stuff$M.flip.v <- array(,size) for (cc in 1:n.reps) {parm <- fn.reparametrize(parm) parm <- fn.laplace(parm) parm <- fn.beta(parm, data) parm <- fn.MDP(parm, data) parm <- fn.invariant(parm, data) parm <- fn.MDP.hyper(parm, data) parm <- fn.unparametrize(parm) if (cc %% gap == 0) {All.Stuff$beta.mt[cc/gap,] <- parm$beta.v All.Stuff$tau.v[cc/gap] <- parm$tau All.Stuff$K.v[cc/gap] <- parm$K All.Stuff$theta.mt[cc/gap,] <- parm$theta.v All.Stuff$M.v[cc/gap] <- parm$M All.Stuff$beta.flip.v[cc/gap] <- as.numeric(parm$beta.flip) All.Stuff$mdp.flip.v[cc/gap] <- parm$mdp.flip All.Stuff$invariant.flip.v[cc/gap] <- as.numeric(parm$invariant.flip) } if (cc %% 100 == 0) {print(paste(date(), "REPS cc=",cc)) } }