# Example of analysis of excess mortality linked to ZIP code social metrics
# downloaded from The Public Health Disparities Geocoding Project
# https://www.hsph.harvard.edu/thegeocodingproject/covid-19-resources/
#
# Replicates analyses used in:
# Chen JT, Waterman PD, Krieger N. COVID-19 and the unequal surge in mortality rates in Massachusetts, 
# by city/town and ZIP Code measures of poverty, household crowding, race/ethnicity, and racialized economic 
# segregation. Harvard Center for Population and Development Studies Working Paper Series, Volume 19, Number 2. May 9, 2020.
#
# This code updates the analyses in the working paper by extending to April 28, 2020 and
# calculating rates for one-week rather than two-week periods.

# Jarvis T. Chen, ScD
# 18 May 2020

# Assumes that we have:
# 1. Dataset of deaths in 2020: we show an example of cleaning ma.death.raw.2020 below.
# 2. Dataset of deaths in 2015-2019: assume we have ma.death2019, ma.death2018, ma.death2017, ma.death2016, ma.death2015
# 3. Dataset of population denominators by ZIP code tabulation area (ZCTA) and age: this is called ma.acs.pop2 below.
# 4. Dataset of ZCTA area-based socioeconomic measures (ABSMs): this is called d.acs2018 below.

library(tidycensus)
library(tidyverse)
library(dplyr)

# an example of data cleaning of 2020 death data

ma.death2020 <- select(ma.death.raw.2020,
                       "GENDER", 
                       "DOD",
                       "DOB", 
                       "RES_CITY",
                       "RES_STATE",
                       "RES_ZIP",                                                                                          
                       "AGE")  %>%
                rename(Sex=GENDER,
                       citytown="RES_CITY") %>%
                filter(RES_STATE=="MASSACHUSETTS") %>%
                mutate(GEOID=ifelse(RES_ZIP<=9999, paste0("0",RES_ZIP), paste0(RES_ZIP)),
                       dthdate=as.Date(DOD, "%Y-%m-%d", tz="US/Eastern"),
                       bthdate=as.Date(DOB, "%Y-%m-%d", tz="US/Eastern"),
                       ageAtDth = as.numeric(dthdate-bthdate)/365.25,
                       year="2020",
                       oneweek = case_when("2020-01-01"<=dthdate  & dthdate <"2020-01-07" ~ 1,
                                           "2020-01-08"<=dthdate  & dthdate <"2020-01-14" ~ 2,
                                           "2020-01-15"<=dthdate  & dthdate <"2020-01-21" ~ 3,
                                           "2020-01-22"<=dthdate  & dthdate <"2020-01-28" ~ 4,
                                           "2020-01-29"<=dthdate  & dthdate <"2020-02-04" ~ 5,
                                           "2020-02-05"<=dthdate  & dthdate <"2020-02-11" ~ 6,
                                           "2020-02-12"<=dthdate  & dthdate <"2020-02-18" ~ 7,
                                           "2020-02-19"<=dthdate  & dthdate <"2020-02-25" ~ 8,
                                           "2020-02-26"<=dthdate  & dthdate <"2020-03-03" ~ 9,
                                           "2020-03-04"<=dthdate  & dthdate <"2020-03-10" ~ 10,
                                           "2020-03-11"<=dthdate  & dthdate <"2020-03-17" ~ 11,
                                           "2020-03-18"<=dthdate  & dthdate <"2020-03-24" ~ 12,
                                           "2020-03-25"<=dthdate  & dthdate <"2020-03-31" ~ 13,
                                           "2020-04-01"<=dthdate  & dthdate <"2020-04-07" ~ 14,
                                           "2020-04-08"<=dthdate  & dthdate <"2020-04-14" ~ 15,
                                           "2020-04-15"<=dthdate & dthdate <"2020-04-21" ~ 16,
                                           "2020-04-22"<=dthdate & dthdate <"2020-04-28" ~ 17),
                       twoweek=floor(oneweek/2),
                       agecat = case_when(
                         ageAtDth <5 ~ "Age0-4",
                         5 <= ageAtDth & ageAtDth<10 ~ "Age5-9",
                         10 <= ageAtDth & ageAtDth<15 ~ "Age10-14",
                         15 <= ageAtDth & ageAtDth<20 ~ "Age15-19",
                         20 <= ageAtDth & ageAtDth<25 ~ "Age20-24",
                         25 <= ageAtDth & ageAtDth<30 ~ "Age25-29",
                         30 <= ageAtDth & ageAtDth<35 ~ "Age30-34",
                         35 <= ageAtDth & ageAtDth<45 ~ "Age35-44",
                         45 <= ageAtDth & ageAtDth<55 ~ "Age45-54",
                         55 <= ageAtDth & ageAtDth<65 ~ "Age55-64",
                         65 <= ageAtDth & ageAtDth<75 ~ "Age65-74", 
                         75 <= ageAtDth & ageAtDth<85 ~ "Age75-84",
                         ageAtDth >=85 ~ "Age85+")) %>%
            filter(oneweek<=17 & twoweek>=0) %>%
            select(Sex, agecat, ageAtDth, GEOID, dthdate, citytown, RES_STATE, year, oneweek, twoweek)

# Example:
# extract MA zip code list from all years of data
# in order to match with US ZCTA ABSM dataset
ma.zips2020 <- names(table(ma.death2020$GEOID))
ma.zips2019 <- names(table(ma.death2019$GEOID))
ma.zips2018 <- names(table(ma.death2018$GEOID))
ma.zips2017 <- names(table(ma.death2017$GEOID))
ma.zips2016 <- names(table(ma.death2016$GEOID))
ma.zips2015 <- names(table(ma.death2015$GEOID))

ma.allzips <- full_join(data.frame(GEOID=ma.zips2020, in2020=1),
                        data.frame(GEOID=ma.zips2019, in2019=1), by="GEOID") %>%
              full_join(data.frame(GEOID=ma.zips2018, in2018=1), by="GEOID") %>%
              full_join(data.frame(GEOID=ma.zips2018, in2018=1), by="GEOID") %>%
              full_join(data.frame(GEOID=ma.zips2017, in2017=1), by="GEOID") %>%
              full_join(data.frame(GEOID=ma.zips2016, in2016=1), by="GEOID") %>%
              full_join(data.frame(GEOID=ma.zips2015, in2015=1), by="GEOID") 

# restrict ABSM datasets to the full list of MA zip codes

d.acs2018.ma <- d.acs2018 %>%
                right_join(ma.allzips, by="GEOID") %>%
                mutate(year=2018,
                       apINDPOV=factor(case_when(0<=zctaPov  & zctaPov<0.05 ~ "0-4.9%",
                                   0.05<=zctaPov & zctaPov<0.10 ~ "5-9.9%",
                                   0.10<=zctaPov& zctaPov<0.2 ~ "10-19.9%",
                                   0.20<=zctaPov& zctaPov<=1 ~ "20-100%"), 
                                   levels=c("0-4.9%","5-9.9%","10-19.9%","20-100%")),
                       qINDPOV=cut(zctaPov, wtd.quantile(zctaPov, weights=pop_total,
                                           probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T),
                       qICEwnhinc=cut(ICEwnhinc, wtd.quantile(ICEwnhinc, weights=pop_total,
                                              probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T),
                       qcrowd = cut(zctaCrowd, wtd.quantile(zctaCrowd, weights=pop_total,
                                              probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T),
                       qpercColor = cut(percColor, wtd.quantile(percColor, weights=pop_total,
                                                  probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T),
                       qpercBlack = cut(percBlack, wtd.quantile(percBlack, weights=pop_total,
                                                  probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T))


# link with ABSM dataset
# use d.acs2018.ma which is centered on year 2016
ma.merge2020 <- left_join(ma.death2020, d.acs2018.ma, by="GEOID")
ma.merge2019 <- left_join(ma.death2019, d.acs2018.ma, by="GEOID") 
ma.merge2018 <- left_join(ma.death2018, d.acs2018.ma, by="GEOID") 
ma.merge2017 <- left_join(ma.death2017, d.acs2018.ma, by="GEOID") 
ma.merge2016 <- left_join(ma.death2016, d.acs2018.ma, by="GEOID") 
ma.merge2015 <- left_join(ma.death2015, d.acs2018.ma, by="GEOID")
  
# combine 2015 to 2019 data
ma.merge1519 <- bind_rows(ma.merge2019, ma.merge2018, ma.merge2017, ma.merge2016, ma.merge2015) %>%
                mutate(year=as.numeric(year.x)) %>% select(-year.x, -year.y)
              
# these are ZIP codes that appear in the FACT OF DEATH file
# but have no corresponding ZCTA in the ACS files
# These likely represent weird zip codes that are not ZCTAs 
# (e.g. post office boxes)
d.acs2018.weird <- anti_join(ma.allzips, d.acs2018, by="GEOID")
d.cases.weird2020 <- inner_join(d.acs2018.weird, ma.death2020,by="GEOID")
d.cases.weird1519 <- inner_join(d.acs2018.weird, ma.merge1519,by="GEOID")

# read in the year 2000 standard million for age standardization
# We obtained this from SEER: available at https://seer.cancer.gov/stdpopulations/
# SEER gives 19 age categories, which we collapse into categories matching what is available in 
# the US Census American Community Survey
seer.age19 <- read_fwf("stdpop.19ages.txt",
                       fwf_widths(c(3,3,8),
                                  c("standard","age","std_raw"))) %>%
  filter(standard=="201") %>%
  mutate(agecat=recode(age,
                       '000'="Age0-4",                   
                       '001'="Age0-4",
                       '002'="Age5-9",
                       '003'="Age10-14",
                       '004'="Age15-19",
                       '005'="Age20-24",
                       '006'="Age25-29",
                       '007'="Age30-34",
                       '008'="Age35-44",
                       '009'="Age35-44",
                       '010'="Age45-54",
                       '011'="Age45-54",
                       '012'="Age55-64",
                       '013'="Age55-64",
                       '014'="Age65-74",
                       '015'="Age65-74",
                       '016'="Age75-84",
                       '017'="Age75-84",
                       '018'="Age85+"),
         std.pop=as.numeric(std_raw)) %>%
      group_by(agecat) %>% summarise(std=sum(std.pop))

ma.pop.for.agestd <- left_join(ma.acs.pop2, d.acs2018.ma, by="GEOID")
these.agecat <- paste0('Age',c('0-4','5-9','10-14','15-19','20-24','25-29',
                                       '30-34','35-44','45-54','55-64','65-74','75-84','85+'))


# check the total deaths for 2000 and 2015-2019 data
sum(table(ma.merge2020$oneweek))
sum(table(ma.merge1519$oneweek))

# Function to summarize excess deaths, age standardized rates, rate difference, and rate ratios
# by categories of ZCTA ABSMs
f.ts.agestd <- function(this.absm){
  absm <- enquo(this.absm)
  
  # get levels of the absm  
  num.lev.absm <- length(levels(d.acs2018.ma[[paste(enexpr(this.absm))]]))
  absm.levels <- levels(d.acs2018.ma[[paste(enexpr(this.absm))]])
  
  # make a template for twoweek by absm
  tweek <- data.frame(agecat=rep(these.agecat, num.lev.absm*17),
                      oneweek=rep(rep(c(1:17), each=13), num.lev.absm),
                      absm.name=factor(rep(levels(d.acs2018.ma[[paste(enexpr(this.absm))]]), each=17*13)),
                                levels=absm.levels)
  names(tweek)[3] <- paste(enexpr(this.absm))

  # sum the number of deaths over a weekly period, within categories of absm
  this.data2020 <- ma.merge2020 %>% group_by(agecat, oneweek, !!absm) %>%
                   summarise(numdeaths=n())
  # aggregate denominators in the 2020 file
  this.denom2020 <-  ma.pop.for.agestd %>% 
                     group_by(agecat, !!absm) %>%
                     summarise(denom=sum(acs.pop)) %>%
                     right_join(tweek, by=c("agecat",paste(enexpr(this.absm))))
  
  # merge numerators and denominators for 2020
  merge2020 <- right_join(this.data2020, this.denom2020, by=c("oneweek","agecat",paste(enquo(absm))[2]))
  
  # sum the average number of deaths over a weekly period over 5 years of data and within categories of absm
  this.data1519 <- ma.merge1519 %>% group_by(agecat, oneweek, !!absm) %>%
    summarise(numdeaths=n()/5)
  
  # aggregate denominators in the 2015-2019 file
  this.denom1519 <- ma.pop.for.agestd %>% 
                    group_by(agecat, !!absm) %>%
                    summarise(denom=sum(acs.pop)) %>%     
                    right_join(tweek, by=c("agecat",paste(enexpr(this.absm))))
  
  # merge numerators and denominators for 2015-2019
  # divide by five so this is average number of deaths and average population over 5 years
  merge1519 <- right_join(this.data1519, this.denom1519, by=c("oneweek","agecat",paste(enquo(absm))[2]))
  
  # compute excess on a weekly basis
  # compute rates per 100000 person-years
  # to do this, calculate for person-weeks and re-multiply to get person-years.
  # Note that we adjust the size of the person-time denominator
  # for oneweek=9 which corresponds to the week containing February 29 when there is a leap year.
  this.ma.compare <- full_join(merge2020, merge1519, 
                               by=c("oneweek", "agecat", paste(enquo(absm))[2]),
                               suffix=c(".2020",".1519")) %>%
                     left_join(seer.age19, by="agecat") %>%
                     filter(!is.na(!!absm)) %>%
    mutate(denom.2020=ifelse(oneweek==9, denom.2020*15/14, denom.2020),
           denom.1519=ifelse(oneweek==9, denom.1519*71/70, denom.1519),
           rate2020=std*numdeaths.2020/(denom.2020/52.17857),
           var.rate2020 = std^2*numdeaths.2020/(denom.2020/52.17857)^2,
           rate1519=std*numdeaths.1519/(denom.1519/52.17857),
           var.rate1519 = std^2*numdeaths.1519/(denom.1519/52.17857)^2,
           adjdeath.2020 = std*numdeaths.2020,
           adjdeath.1519 = std*numdeaths.1519,
           w.sq = std^2) %>%
           group_by(oneweek, !!absm) %>%
    summarise(raw.deaths1519 = sum(numdeaths.1519,na.rm=T),
              raw.deaths2020 = sum(numdeaths.2020,na.rm=T),
              raw.pop1519 = sum(denom.1519,na.rm=T),
              raw.pop2020 = sum(denom.2020,na.rm=T),
              sum.w.iri2020 = sum(rate2020,na.rm=T),
              sum.w.iri1519 = sum(rate1519,na.rm=T),
              sum.w.var.iri2020 = sum(var.rate2020,na.rm=T),
              sum.w.var.iri1519 = sum(var.rate1519,na.rm=T),
              sum.wt = sum(std),
              sum.wt.sq = sum(w.sq),
              sum.w.death2020 = sum(adjdeath.2020,na.rm=T),
              sum.w.death1519 = sum(adjdeath.1519,na.rm=T)) %>%
    mutate(ir.std.2020 = sum.w.iri2020/sum.wt,
           var.ir.std.2020 = sum.w.var.iri2020/sum.wt.sq,
           ir.std.2020.lo95 = ir.std.2020 - 1.96*sqrt(var.ir.std.2020),
           ir.std.2020.up95 = ir.std.2020 + 1.96*sqrt(var.ir.std.2020),
           ir.std.1519 = sum.w.iri1519/sum.wt,
           var.ir.std.1519 = sum.w.var.iri1519/sum.wt.sq,
           ir.std.1519.lo95 = ir.std.1519 - 1.96*sqrt(var.ir.std.1519),
           ir.std.1519.up95 = ir.std.1519 + 1.96*sqrt(var.ir.std.1519),
           death.std.2020 = sum.w.death2020/sum.wt,
           death.std.1519 = sum.w.death1519/sum.wt,
           death.std.excess = death.std.2020 - death.std.1519,
           ird.std = ir.std.2020 - ir.std.1519,
           var.ird.std = var.ir.std.2020 + var.ir.std.1519,
           ird.std.lo95 = ird.std - 1.96*sqrt(var.ird.std),
           ird.std.up95 = ird.std + 1.95*sqrt(var.ird.std),
           irr.std = ir.std.2020/ir.std.1519,
           var.irr.std = (var.ir.std.2020/ir.std.2020^2) + (var.ir.std.1519/ir.std.1519^2),
           irr.std.lo95 = exp(log(irr.std) - 1.96*sqrt(var.irr.std)),
           irr.std.up95 = exp(log(irr.std) + 1.95*sqrt(var.irr.std)),
           raw.excess = raw.deaths2020 - raw.deaths1519) %>% 
           ungroup() %>%
          select(oneweek, !!absm, raw.excess, raw.deaths1519, raw.pop1519, death.std.1519, 
                ir.std.1519, ir.std.1519.lo95, ir.std.1519.up95, 
                 raw.deaths2020, raw.pop2020, death.std.2020, 
                ir.std.2020, ir.std.2020.lo95, ir.std.2020.up95,
                death.std.excess, ird.std, ird.std.lo95, ird.std.up95,
                irr.std, irr.std.lo95, irr.std.up95)
  
  return(this.ma.compare)
}


# The ABSM variable tends to be coerced from factor to character,
# so there is extra code here to restore the factor levels.
ma.std.compare.apINDPOV <- f.ts.agestd(apINDPOV)
ma.std.compare.apINDPOV <- mutate(ma.std.compare.apINDPOV,
                                  qval = factor(apINDPOV,
                                                    levels=levels(d.acs2018.ma$apINDPOV)),
                                  qval.order = as.numeric(qval),
                                  qabsm="apINDPOV") %>%
                           arrange(oneweek, qval)

ma.std.compare.qcrowd <- f.ts.agestd(qcrowd)
ma.std.compare.qcrowd <- mutate(ma.std.compare.qcrowd,
                                qval = factor(qcrowd,
                                                    levels=levels(d.acs2018.ma$qcrowd)),
  qval.order = as.numeric(qval),
                                qabsm="qcrowd") %>%
                         arrange(oneweek, qval)

ma.std.compare.qpercColor <- f.ts.agestd(qpercColor)
ma.std.compare.qpercColor <- mutate(ma.std.compare.qpercColor, 
                                    qval = factor(qpercColor,
                                levels=levels(d.acs2018.ma$qpercColor)),
                                qval.order = as.numeric(qval),
                                qabsm="qpercColor") %>%
                         arrange(oneweek, qval)

ma.std.compare.qpercBlack <- f.ts.agestd(qpercBlack)
ma.std.compare.qpercBlack <- mutate(ma.std.compare.qpercBlack,
                                    qval = factor(qpercBlack,
                                                    levels=levels(d.acs2018.ma$qpercBlack)),
                                    qval.order = as.numeric(qval),
                                    qabsm="qpercBlack") %>%
                         arrange(oneweek, qval)

ma.std.compare.qICEwnhinc <- f.ts.agestd(qICEwnhinc)
ma.std.compare.qICEwnhinc <- mutate(ma.std.compare.qICEwnhinc,
                                    qval = factor(qICEwnhinc,
                                                    levels=levels(d.acs2018.ma$qICEwnhinc)),
                                    qval.order = as.numeric(qval),
                                    qabsm="qICEwnhinc") %>%
                         arrange(oneweek, qval)

# combine all the results into a file to re-sort
ma.std.compare.all <- bind_rows(ma.std.compare.apINDPOV,
                              ma.std.compare.qcrowd,
                              ma.std.compare.qpercColor,
                              ma.std.compare.qpercBlack,
                              ma.std.compare.qICEwnhinc) %>%
                      mutate(ir.std.1519 = 100000*ir.std.1519,
                             ir.std.1519.lo95 = 100000*ir.std.1519.lo95,
                             ir.std.1519.up95 = 100000*ir.std.1519.up95,
                             ir.std.2020 = 100000*ir.std.2020,
                             ir.std.2020.lo95 = 100000*ir.std.2020.lo95,
                             ir.std.2020.up95 = 100000*ir.std.2020.up95,
                             ird.std = 100000*ird.std,
                             ird.std.lo95 = 100000*ird.std.lo95,
                             ird.std.up95 = 100000*ird.std.up95,
                             raw.diff = raw.deaths2020 - raw.deaths1519) %>%
                      select(qabsm, qval, oneweek, raw.excess, ir.std.1519,
                             ir.std.1519.lo95, ir.std.1519.up95,
                             ir.std.2020, ir.std.2020.lo95, ir.std.2020.up95,
                             ird.std, ird.std.lo95, ird.std.up95, 
                             irr.std, irr.std.lo95, irr.std.up95, qval.order) %>%
                      mutate(qabsm = factor(qabsm)) 

# output all of the results to a csv file.
write.csv(arrange(ma.std.compare.all, qabsm, qval.order, oneweek), "ma_zcta_oneweek_april28.csv")

# create labels for the weekly periods for graphing
oneweek.labels <- c("01-01,\n01-07",
                    "01-08,\n01-14",
                    "01-15,\n01-21",
                    "01-22,\n01-28",
                    "01-29,\n02-04",
                    "02-05,\n02-11",
                    "02-12,\n02-18",
                    "02-19,\n02-25",
                    "02-26,\n03-03",
                    "03-04,\n03-10",
                    "03-11,\n03-17",
                    "03-18,\n03-25",
                    "03-25,\n03-31",
                    "04-01,\n04-07",
                    "04-08,\n04-14",
                    "04-15,\n04-21",
                    "04-22,\n04-28")


library(gridExtra)
library(grid)
library(ggpubr)

# plot in the style of the Boston Globe graphics
f.globestyle <- function(sumobject, year, which.rate, this.absm, absm.label, numlev){
  rate <- enquo(which.rate)
  absm <- enquo(this.absm)
  
  this.ylim <- 100000*c(min(sumobject$ir.std.1519, sumobject$ir.std.2020),
                        max(sumobject$ir.std.1519, sumobject$ir.std.2020))
  
  this.plot <- ggplot(sumobject, aes(x=oneweek, y=100000*!!rate, 
                                     group=!!absm, color=!!absm)) +
    geom_point() +
    geom_line() +
    ylim(this.ylim) +
    scale_x_discrete(name="Weeks",limits=oneweek.labels) +
    scale_color_manual(name=paste(enexpr(absm.label)), values=these.colors[c((6-numlev):5)]) +
    ylab("Rate") +
    ggtitle(paste0("By ", absm.label, ", ", year)) +
    theme(axis.text.x=element_text(size=4),
          axis.text.y=element_text(size=8),
          axis.title.x=element_text(size=8),
          axis.title.y=element_text(size=8),
          plot.title = element_text(size = 9),
          legend.position="none")
  
  return(this.plot)
}


these.colors <- brewer.pal(5, "Set1")


globe.apINDPOV.1519 <- f.globestyle(ma.std.compare.apINDPOV, year="2015-2019", which.rate=ir.std.1519,
                                    apINDPOV, "% poverty", numlev=4)
globe.apINDPOV.2020 <- f.globestyle(ma.std.compare.apINDPOV, year="2020", which.rate=ir.std.2020,
                                    apINDPOV, "% poverty", numlev=4)

globe.qcrowd.1519 <- f.globestyle(ma.std.compare.qcrowd, year="2015-2019", which.rate=ir.std.1519,
                                  qcrowd, "% crowding", numlev=5)
globe.qcrowd.2020 <- f.globestyle(ma.std.compare.qcrowd, year="2020", which.rate=ir.std.2020,
                                  qcrowd, "% crowding", numlev=5)

globe.qpercColor.1519 <- f.globestyle(ma.std.compare.qpercColor, year="2015-2019", which.rate=ir.std.1519,
                                      qpercColor, "% population of color", numlev=5)
globe.qpercColor.2020 <- f.globestyle(ma.std.compare.qpercColor, year="2020", which.rate=ir.std.2020,
                                      qpercColor, "% population of color", numlev=5)

these.colors <- rev(these.colors)
globe.qICEwnhinc.1519 <- f.globestyle(ma.std.compare.qICEwnhinc, year="2015-2019", which.rate=ir.std.1519,
                                      qICEwnhinc, "Index of Concentration at the Extremes", numlev=5)
globe.qICEwnhinc.2020 <- f.globestyle(ma.std.compare.qICEwnhinc, year="2020", which.rate=ir.std.2020,
                                      qICEwnhinc, "Index of Concentration at the Extremes", numlev=5)

# arrange the plots on a grid
grid_arrange_shared_legend <- function(..., ncol = length(list(...)), nrow = 1, 
                                       position = c("bottom", "right")) {
  
  plots <- list(...)
  position <- match.arg(position)
  g <- ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  lwidth <- sum(legend$width)
  gl <- lapply(plots, function(x) x + theme(legend.position="none"))
  gl <- c(gl, ncol = ncol, nrow = nrow)
  
  combined <- switch(position,
                     "bottom" = arrangeGrob(do.call(arrangeGrob, gl),
                                            legend,
                                            ncol = 1,
                                            heights = unit.c(unit(1, "npc") - lheight, lheight)),
                     "right" = arrangeGrob(do.call(arrangeGrob, gl),
                                           legend,
                                           ncol = 2,
                                           widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
  
  return(combined)
  
}


globe.apINDPOV <- grid_arrange_shared_legend(globe.apINDPOV.1519,
                                             globe.apINDPOV.2020,
                                             position="bottom",ncol = 2, nrow = 1)

globe.qcrowd <- grid_arrange_shared_legend(globe.qcrowd.1519,
                                           globe.qcrowd.2020,
                                           position="bottom",ncol = 2, nrow = 1)

globe.qpercColor <- grid_arrange_shared_legend(globe.qpercColor.1519,
                                               globe.qpercColor.2020,
                                               position="bottom",ncol = 2, nrow = 1)
globe.qICEwnhinc <- grid_arrange_shared_legend(globe.qICEwnhinc.1519,
                                               globe.qICEwnhinc.2020,
                                               position="bottom",ncol = 2, nrow = 1)

together <- ggarrange(globe.apINDPOV, globe.qcrowd,
                      globe.qpercColor, globe.qICEwnhinc, ncol=1)

ggsave("globestyle_zcta_april28.pdf", together, width=8, height=9.5)