# Written by Dr. Andrea Cook # Weighted Cumulative Geographic Residual Test ###################################################################################### # Function to indicate if points are within or outside of area in.areaSQ<-function(bxy,loc) { bxy<-as.numeric(bxy) area<-(((bxy[2]-bxy[1])<=loc[,1])& ((bxy[2]+bxy[1])>=loc[,1])& ((bxy[3]-bxy[1])<=loc[,2])& ((bxy[3]+bxy[1])>=loc[,2])) return(as.numeric(area)) } # Must load MASS package library(MASS) #################################################################################### # Function to run Area-Level Weighted Cumulative Geographic Residual Tests # Data Requirements: # Y = Vector of outcomes # X = Matrix of area-level covariates (Make NULL for no covariates # W = Vector of Weights # loc = two column matrix of centroid of area locations # model = formula for how you want to model the relationship to Y and X # inarea = matrix of number of areas by number of potential clusters with # each column is an indicator function if the area is in cluster # bxy = matrix with number of areas by 3 with the first column being # b, half-edge length of square potential cluster, the second two columns # are the location of the center of the square of potentialcluster # b, xstart, ystart, xfin, yfin only needed if inarea and bxy are notspecified # b = a vector of 1/2 edge-length of square clusters to be looked at # xstart, ystart = values of location to start to create grid to look forclusters # xfin, yfin = values of location to finish creating grid to to look forclusters # nkeep = number of simulated Zhats to keep # nsims = number of total simulated Zhats CumRes.wCont<-function(Y,X,W,loc,model=NULL,inarea=NULL,bxy=NULL, b=NULL,xstart=NULL,ystart=NULL,xfin=NULL,yfin=NULL, nkeep=100,nsims=1000) { # number of areas n<-length(Y) Xint<-cbind(rep(1,n),X) # Run Model comparing Y and X if(is.null(model)) { model<-formula(Y~(-1)+Xint) } model1<-lm(model,weights=W) beta<-model1\$coefficients sigma2<-(summary(model1)\$sigma)^2 # Residuals resid<-as.matrix(model1\$residuals) # Score UiB<-Xint*matrix(rep(resid/(sigma2/W),ncol(Xint)),ncol=ncol(Xint)) UB<-rep(1,n)%*%UiB # Inverse Information Iinv<-summary(model1)\$cov.unscaled # Obtain standard normal and discrete vectors G<-t(mvrnorm(nsims,rep(0,n),diag(1,n))) # When inarea is not provided if(is.null(inarea)) { if(is.null(xstart)) xstart<-min(loc[,1]) if(is.null(ystart)) ystart<-min(loc[,2]) if(is.null(xfin)) xfin<-max(loc[,1]) if(is.null(yfin)) yfin<-max(loc[,2]) bxycur<-NULL for(i in 1:length(b)) { # Finishing Point for x and y xfin1<-xfin+b[i]/5 yfin1<-yfin+b[i]/5 # Values of x and y to be evaluated x<-seq(xstart,xfin1,by=b[i]/5) y<-seq(ystart,yfin1,by=b[i]/5) bxycur<-rbind(bxycur,cbind(b[i],expand.grid(x,y))) } remove(x,y) cat("Obtained Expanded Grid: dim = ",dim(bxycur),"\n") bxy<-NULL Zobs<-NULL Zhat<-NULL # Loop through bxy to calculate Wobs, What for(j in 1:nrow(bxycur)) { inarea<-as.matrix(in.areaSQ(bxycur[j,],loc=loc)) weights<-inarea*W #Remove Potential Clusters Without Data if(sum(inarea)>0) { bxy<-rbind(bxy,bxycur[j,]) # Calculate Zobs Zobs<-rbind(Zobs,1/sqrt(n)*t(weights)%*%(resid)) # Calculate Zhat nu.in<-t(Xint)%*%(weights) fixedpart<-(weights*resid-t(nu.in%*%Iinv%*%t(UiB))) Zhat<-rbind(Zhat,1/sqrt(n)*t(fixedpart)%*%G) } if(round(j*.01)==j*.01) cat("j = ",j,"\n") } # Sup of Zhat Slochat<-apply(Zhat,2,max) # Keeping nkeep of What Zhatkeep<-Zhat[,1:nkeep] } # when inarea is provided else { Zobs<-NULL Zhat<-NULL Slochat<-NULL # Loop through bxy to calculate Wobs, What for(j in 1:nrow(bxy)) { inareacur<-inarea[,j] weights<-inareacur*W # Calculate Zobs Zobs<-rbind(Zobs,1/sqrt(n)*t(weights)%*%(resid)) # Calculate Zhat nu.in<-t(Xint)%*%(weights) fixedpart<-(weights*resid-t(t(nu.in)%*%Iinv%*%t(UiB))) Zhatcur<-1/sqrt(n)*t(fixedpart)%*%G Zhat<-rbind(Zhat,Zhatcur[,1:nkeep]) Slochat<-rbind(Slochat,Zhatcur) Slochat<-apply(Slochat,2,max) if(round(j*.01)==j*.01) cat("j = ",j,"\n") } } cat("Obtained Zhat Distribution","\n") print(dim(Zhat)) # Calculate One-Sided Test Sloc<-max(Zobs) pval<-sum(Sloc<=Slochat)/nsims # 70,80,90, and 95% Critical Value for W Slochat<-sort(Slochat) crit<-c(Slochat[round(.7*nsims)],Slochat[round(.8*nsims)], Slochat[round(.9*nsims)],Slochat[round(.95*nsims)]) meanSlochat<-mean(Slochat) quantSlochat<-quantile(Slochat,prob=c(.05,.1,.5,.9,.95)) return(list(Y=Y,Xmod=Xint,loc=loc,Zobs=Zobs, Zhat=Zhat, bxy=bxy,nsims=nsims, Sloc=Sloc,pval=pval,crit=crit, meanSlochat=meanSlochat, quantSlochat=quantSlochat)) } #################################################################################### # Function to run Individual level Cumulative Geographic Residual Tests # Only for fixed effect approach # Data Requirements: # Y = Vector of individual outcomes # Uind = Indicator matrix function saying which area each individualresides in # Xind = Matrix of individual-level covariates (Make NULL for nocovariates) # Xarea = Matrix of area-level covariates (Make NULL for no covariates) # loc = two column matrix of centroid of area locations # modelI = formula for how you want to model the relationship to Y andXind # modelA = formula for how you want to model the relationship toarea-level outcome and Xarea # inarea = matrix of number of areas by number of potential clusters with # each column is an indicator function if the area is in cluster # bxy = matrix with number of areas by 3 with the first column being # b, half-edge length of square potential cluster, the second twocolumns # are the location of the center of the square of potentialcluster # b, xstart, ystart, xfin, yfin only needed if inarea and bxy are notspecified # b = a vector of 1/2 edge-length of square clusters to be looked at # xstart, ystart = values of location to start to create grid to look forclusters # xfin, yfin = values of location to finish creating grid to to look forclusters # nkeep = number of simulated Zhats to keep # nsims = number of total simulated Zhats CumRes.wContInd<-function(Y,Uind,Xind,Xarea,loc,modelI=NULL,modelA=NULL, bxy=NULL,inarea=NULL,b=NULL,xstart=NULL,ystart=NULL,xfin=NULL,yfin=NULL, nkeep=100,nsims=1000,method="Fixed") { # Number of individual-level observations nI<-length(Y) # Number of areas n<-ncol(Uind) # Create model formula for Y and Xind if(is.null(modelI)) { modelI<-formula(Y~(-1)+Uind+Xind) } modI<-summary(lm(modelI)) betaI<-modI\$coefficients[,1] VarI<-modI\$cov.unscaled*modI\$sigma^2 Xmean<-aggregate(Xind,by=list(rep(1,nI)),mean)[-1] Xmean<-matrix(rep(as.numeric(Xmean),n),byrow=T,nrow=n) temp<-cbind(diag(1,n),Xmean) V<-temp%*%betaI VarV<-diag(temp%*%VarI%*%t(temp)) W=1/sqrt(VarV) return(list(stage1=list(Y=Y,Uind=Uind,Xind=Xind,betaA=betaI,V=V,W=W), stage2=CumRes.wCont(Y=V,X=Xarea,W=W,loc=loc,bxy=bxy,b=b,inarea=inarea, model=modelA,xstart=xstart,ystart=ystart,xfin=xfin,yfin=yfin, nkeep=nkeep,nsims=nsims))) }