###### 1 # Load up the R packages we need for the Census data install.packages("tidyverse") install.packages("tidycensus") library(tidycensus) library(tidyverse) census_api_key("cc2610e9d3d6c8c926494884bcbe30a647038dac") ###### 2 # Let's get the population of each state with the Census API for the # year 2010. pop <- get_decennial(geography = "state", variables = "P001001", year = 2010) glimpse(pop) ###### 3 # We can get more than one attribute at once, this is the # population from before, and the population identified in # rural areas. rural <- get_decennial(geography = "state", variables = c("P001001", "P002005"), year = 2010, output = "wide") glimpse(rural) ###### 4 # Now, let's plot this as a scatter plot. First, we add a new # column for the fraction of population that is in rural areas, # then reorder the states by this fraction, and finally add in # the labels for the graph. rural %>% mutate(prop_rural = P002005/P001001) %>% ggplot(aes(x = prop_rural, y = fct_reorder(NAME, prop_rural))) + geom_point() + labs(title = "Rural Population in US States in 2010", subtitle = "Maine and Vermont are the most rural states", caption = "Source: US Census", x = "Rural Population Proportion", y = NULL) ###### 5 # The new piece we want to add today is the geometry of each state # (or whichever locale we query from the Census). You can see in # this table that there is a new column filled with polygons for the # shapes of the states. These are denoted with GPS coordinates. rural <- get_decennial(geography = "state", variables = c("P001001", "P002005"), year = 2010, output = "wide", geometry = TRUE) glimpse(rural) ###### 6 # Using a new plot type, the geom_sf(), we can draw these shapes # and fill them in based on the proportion of rural population. rural %>% mutate(prop_rural = P002005/P001001) %>% ggplot(aes(fill = 100 * prop_rural)) + geom_sf() + scale_fill_viridis_c(option = "plasma", direction = -1) + labs(title = "Rural geography of the United States", caption = "Source: Census 2010", fill = "Percent Rural") + theme_void() ###### 7 # The above plot is hard to see because of the distant states and Puerto Rico. # We can filter them out by adding in the second line. rural %>% mutate(prop_rural = P002005/P001001) %>% filter(! NAME %in% c("Alaska", "Hawaii", "Puerto Rico")) %>% ggplot(aes(fill = 100 * prop_rural)) + geom_sf() + scale_fill_viridis_c(option = "plasma", direction = -1) + labs(title = "Rural geography of the United States", caption = "Source: Census 2010", fill = "Percent Rural") + theme_void() ###### 8 # Or we can use a different map projection, and shift Alaska and Hawaii # into a better location for graphing rural_shifted <- get_decennial(geography = "state", variables = c("P001001", "P002005"), year = 2010, output = "wide", geometry = TRUE, shift_geo = TRUE) %>% rename(state = NAME) %>% mutate(prop_rural = P002005/P001001) rural_shifted %>% ggplot(aes(fill = prop_rural * 100)) + geom_sf() + scale_fill_viridis_c(option = "plasma", direction = -1) + labs(title = "Rural geography of the United States", caption = "Source: Census 2010", fill = "Percent Rural") + theme_void() ###### 9 # Let's dive in to one state, West Virginia, and look at some data # at the county level. First is the median income. wv <- get_acs(geography = "county", variables = c(medincome = "B19013_001"), state = "WV", year = 2018, geometry = TRUE) wv wv %>% ggplot(aes(fill = estimate)) + geom_sf() + scale_fill_viridis_c(option = "plasma", direction = -1) + labs(title = "Household income by county in West Virginia", subtitle = "2014-2018 American Community Survey", fill = "Household income") + theme_void() ###### 9 # And second is the rural population proportion. We can note the correlation # between these two maps. rural_wv <- get_decennial(geography = "county", variables = c("P001001", "P002005"), year = 2010, output = "wide", state = "WV", geometry = TRUE) rural_wv %>% mutate(prop_rural = P002005/P001001) %>% ggplot(aes(fill = 100 * prop_rural)) + geom_sf() + scale_fill_viridis_c(option = "plasma", direction = -1) + labs(title = "Rural geography of West Virginia", caption = "Source: Census 2010", fill = "Percent Rural") + theme_void() ###### 10 # Finally, we can see how to split up and draw multiple maps # with a facet wrapping. Here we look at the racial makeup of # Harris county in Texas, the home of Houston. racevars <- c(White = "B02001_002", Black = "B02001_003", Asian = "B02001_005", Hispanic = "B03003_003") harris <- get_acs(geography = "tract", variables = racevars, year = 2018, state = "TX", county = "Harris County", geometry = TRUE, summary_var = "B02001_001") harris %>% mutate(Percent = 100 * (estimate / summary_est)) %>% ggplot(aes(fill = Percent, color = Percent)) + facet_wrap(~ variable) + geom_sf() + scale_fill_viridis_c(direction = -1) + scale_color_viridis_c(direction = -1) + labs(title = "Racial geography of Harris County, Texas", caption = "Source: American Community Survey 2014-2018") + theme_void() ###### 11 # Your turn! # First, go back and edit these plots and queries to personalize # the graphs to your home state and county. # Second, search the Census for other interesting attributes that you # would like to graph, and edit the queries to use them.