# Code extracts area-based socioeconomic measures for the US # counties, ZIP code tabulation areas (ZCTAs) and census tracts (CTs) # from the US Census American Community Survey # downloaded from The Public Health Disparities Geocoding Project # https://www.hsph.harvard.edu/thegeocodingproject/covid-19-resources/ # # Area-based socioeconomic measures as used in: # Chen JT, Krieger N. Revealing the unequal burden of COVID-19 by income, race/ethnicity, # and household crowding: US county vs ZIP code analyses. Harvard Center for Population and Development # Studies Working Paper Series, Volume 19, Number 1. April 21, 2020. # (updated: May 5, 2020) # # Chen JT, Waterman PD, Krieger N. COVID-19 and the unequal surge in mortality rates in Massachusetts, # by city/town and ZIP Code measures of poverty, household crowding, race/ethnicity, and racialized economic # segregation. Harvard Center for Population and Development Studies Working Paper Series, Volume 19, Number 2. May 9, 2020. # Jarvis T. Chen, ScD # 15 May 2020 # Version history: # 17 May 2020: Add include.lowest=T option to wtd.quantile call so that lowest category properly includes zeroes. library(tidycensus) library(tidyverse) library(dplyr) library(Hmisc) library(purrr) # setwd("~/") # This code uses the tidycensus package to pull data # from the US Census. # # You can learn more about the Census API here: # https://www.census.gov/data/developers/guidance/api-user-guide.html # # And get an API Key here: # https://api.census.gov/data/key_signup.html # # Set your Census API key here # census_api_key() # WRANGLE THE ABSMs # This function extracts American Community Survey (ACS) data for # a particular year. # This works for whole country for zcta and county. # See below for census tracts. f.extract.acs <- function(this.year, level){ raw.acstotal <- get_acs(geography=level, variables=c("B01003_001E", "B02001_001E", "B02001_002E", "B02001_003E", "B02001_004E", "B02001_005E", "B02001_006E", "B02001_007E", "B02001_008E", "B02001_009E", "B02001_010E", "B03001_001E", "B03001_003E", "B01001H_001E"), year=this.year, output="wide", cache_table=T) %>% mutate(percBlack = B02001_003E/B02001_001E, percHisp = B03001_003E/B03001_001E, pop_total = B01003_001E, pop_white = B02001_002E, pop_black = B02001_003E, pop_amind = B02001_004E, pop_api = B02001_005E + B02001_006E, pop_hisp = B03001_003E, pop_wnh = B01001H_001E, percColor = (B01003_001E - B01001H_001E)/B01003_001E) %>% select(GEOID, percBlack, percHisp, percColor, pop_total, pop_white, pop_black, pop_amind, pop_api, pop_hisp, pop_wnh) # this code is to compute poverty, ICEwbinc, ICEwnhinc, and crowding from ACS data raw.white <- get_acs(geography=level,table="B19001A", year=this.year,output="wide", cache_table=T) raw.black <- get_acs(geography=level,table="B19001B",year=this.year,output="wide", cache_table=T) raw.all <- get_acs(geography=level, table="B19001", year=this.year,output="wide", cache_table=T) raw.pov <- get_acs(geography=level,table="B17001", year=this.year, output="wide", cache_table=T) raw.crowd <- get_acs(geography=level, table="B25014", year=this.year, output="wide", cache_table=T) raw.wnh <- get_acs(geography=level,table="B19001H", year=this.year, output="wide", cache_table=T) d.merge <- left_join(raw.white, raw.black, by="GEOID") %>% left_join(raw.wnh, by="GEOID") %>% left_join(raw.all, by="GEOID") %>% left_join(raw.pov, by="GEOID") %>% left_join(raw.crowd, by="GEOID") %>% select(c("GEOID","B19001_001E","B19001_002E","B19001_003E","B19001_004E","B19001_005E", "B19001A_014E","B19001A_015E","B19001A_016E","B19001A_017E", "B19001B_002E","B19001B_003E","B19001B_004E","B19001B_005E", "B19001H_002E","B19001H_003E","B19001H_004E","B19001H_005E", "B19001H_014E","B19001H_015E","B19001H_016E","B19001H_017E", "B17001_001E","B17001_002E", "B25014_001E", "B25014_005E","B25014_006E","B25014_007E", "B25014_011E","B25014_012E","B25014_013E")) %>% mutate(ICEwbinc=((B19001A_014E + B19001A_015E + B19001A_016E + B19001A_017E) - (B19001B_002E + B19001B_003E + B19001B_004E + B19001B_005E))/B19001_001E, ICEwnhinc=((B19001H_014E + B19001H_015E + B19001H_016E + B19001H_017E) - (B19001_002E + B19001_003E + B19001_004E + B19001_005E - B19001H_002E - B19001H_003E - B19001H_004E - B19001H_005E))/B19001_001E, poverty = B17001_002E/B17001_001E, crowding = (B25014_005E + B25014_006E + B25014_007E + B25014_011E + B25014_012E + B25014_013E) / B25014_001E, year=this.year) %>% select(GEOID, ICEwbinc, ICEwnhinc, poverty, crowding, year) # this codes creates categories of the area-based socioeconomic measures # variables with prefix "q" indicate quintiles, defined by the # distribution over areas, weighted by population size d.merge2 <- left_join(raw.acstotal, d.merge, by="GEOID") %>% mutate(GEOID=as.character(GEOID), apINDPOV=factor(case_when(0<=poverty & poverty<0.05 ~ "0-4.9%", 0.05<=poverty & poverty<0.10 ~ "5-9.9%", 0.10<=poverty & poverty<0.2 ~ "10-19.9%", 0.20<=poverty & poverty<=1 ~ "20-100%"), levels=c("0-4.9%","5-9.9%","10-19.9%","20-100%")), qINDPOV=cut(poverty, wtd.quantile(poverty, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qICEwnhinc=cut(ICEwnhinc, wtd.quantile(ICEwnhinc, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qICEwbinc=cut(ICEwbinc, wtd.quantile(ICEwbinc, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qcrowd = cut(crowding, wtd.quantile(crowding, weights=pop_total, probs=c(0,0.4,0.6,0.8,1)), include.lowest=T), qpercColor = cut(percColor, wtd.quantile(percColor, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qpercBlack = cut(percBlack, wtd.quantile(percBlack, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T)) return(d.merge2) } d.acs2018.zcta <- f.extract.acs(this.year=2018, level="zcta") d.acs2018.county <-f.extract.acs(this.year=2018, level="county") # The function below extracts ACS data for census tracts for particular years. # Census tracts need to be extracted one state at a time, # so we use the mapdf() from the purrr package to apply # the get_acs function to ever element of the us vector below # (which is a list of all US states). us <- unique(fips_codes$state)[1:51] f.extract.acs.tracts <- function(this.year){ raw.acstotal <- map_df(us, function(x) { get_acs(geography="tract", variables=c("B01003_001E", "B02001_001E", "B02001_002E", "B02001_003E", "B02001_004E", "B02001_005E", "B02001_006E", "B02001_007E", "B02001_008E", "B02001_009E", "B02001_010E", "B03001_001E", "B03001_003E", "B01001H_001E"), year=this.year, state=x, output="wide", cache_table=T)}) %>% mutate(percBlack = B02001_003E/B02001_001E, percHisp = B03001_003E/B03001_001E, pop_total = B01003_001E, pop_white = B02001_002E, pop_black = B02001_003E, pop_amind = B02001_004E, pop_api = B02001_005E + B02001_006E, pop_hisp = B03001_003E, pop_wnh = B01001H_001E, percColor = (B01003_001E - B01001H_001E)/B01003_001E) %>% select(GEOID, percBlack, percHisp, percColor, pop_total, pop_white, pop_black, pop_amind, pop_api, pop_hisp, pop_wnh) raw.white <- map_df(us, function(x){ get_acs(geography="tract",table="B19001A", state=x, year=this.year,output="wide", cache_table=T)}) raw.black <- map_df(us, function(x){ get_acs(geography="tract",table="B19001B",state=x, year=this.year,output="wide", cache_table=T)}) raw.all <- map_df(us, function(x){ get_acs(geography="tract", table="B19001", state=x, year=this.year,output="wide", cache_table=T)}) raw.pov <- map_df(us, function(x){ get_acs(geography="tract",table="B17001", state=x, year=this.year, output="wide", cache_table=T)}) raw.crowd <- map_df(us, function(x){ get_acs(geography="tract", table="B25014", state=x, year=this.year, output="wide", cache_table=T)}) raw.wnh <- map_df(us, function(x){ get_acs(geography="tract",table="B19001H", state=x, year=this.year, output="wide", cache_table=T)}) d.merge <- left_join(raw.white, raw.black, by="GEOID") %>% left_join(raw.wnh, by="GEOID") %>% left_join(raw.all, by="GEOID") %>% left_join(raw.pov, by="GEOID") %>% left_join(raw.crowd, by="GEOID") %>% select(c("GEOID","B19001_001E","B19001_002E","B19001_003E","B19001_004E","B19001_005E", "B19001A_014E","B19001A_015E","B19001A_016E","B19001A_017E", "B19001B_002E","B19001B_003E","B19001B_004E","B19001B_005E", "B19001H_002E","B19001H_003E","B19001H_004E","B19001H_005E", "B19001H_014E","B19001H_015E","B19001H_016E","B19001H_017E", "B17001_001E","B17001_002E", "B25014_001E", "B25014_005E","B25014_006E","B25014_007E", "B25014_011E","B25014_012E","B25014_013E")) %>% mutate(ICEwbinc=((B19001A_014E + B19001A_015E + B19001A_016E + B19001A_017E) - (B19001B_002E + B19001B_003E + B19001B_004E + B19001B_005E))/B19001_001E, ICEwnhinc=((B19001H_014E + B19001H_015E + B19001H_016E + B19001H_017E) - (B19001_002E + B19001_003E + B19001_004E + B19001_005E - B19001H_002E - B19001H_003E - B19001H_004E - B19001H_005E))/B19001_001E, poverty = B17001_002E/B17001_001E, crowding = (B25014_005E + B25014_006E + B25014_007E + B25014_011E + B25014_012E + B25014_013E) / B25014_001E, year=this.year) %>% select(GEOID, ICEwbinc, ICEwnhinc, poverty, crowding, year) d.merge2 <- left_join(raw.acstotal, d.merge, by="GEOID") %>% mutate(GEOID=as.character(GEOID), apINDPOV=factor(case_when(0<=poverty & poverty<0.05 ~ "0-4.9%", 0.05<=poverty & poverty<0.10 ~ "5-9.9%", 0.10<=poverty & poverty<0.2 ~ "10-19.9%", 0.20<=poverty & poverty<=1 ~ "20-100%"), levels=c("0-4.9%","5-9.9%","10-19.9%","20-100%")), qINDPOV=cut(poverty, wtd.quantile(poverty, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qICEwnhinc=cut(ICEwnhinc, wtd.quantile(ICEwnhinc, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qICEwbinc=cut(ICEwbinc, wtd.quantile(ICEwbinc, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qcrowd = cut(crowding, wtd.quantile(crowding, weights=pop_total, probs=c(0,0.4,0.6,0.8,1)), include.lowest=T), qpercColor = cut(percColor, wtd.quantile(percColor, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T), qpercBlack = cut(percBlack, wtd.quantile(percBlack, weights=pop_total, probs=c(0,0.2,0.4,0.6,0.8,1)), include.lowest=T)) return(d.merge2) } d.acs2018.tract <- f.extract.acs.tracts(this.year=2018) # write the data out to csv files write.csv(d.acs2018.tract, "ACS2014_2018_tract.csv") write.csv(d.acs2018.zcta, "ACS2014_2018_zcta.csv") write.csv(d.acs2018.county, "ACS2014_2018_county.csv")