diff --git a/docs/404.html b/docs/404.html index edcc01b..ff4f6eb 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index bf1afcc..af3a11b 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ diff --git a/docs/articles/basic-usage.html b/docs/articles/basic-usage.html index 86c16a2..317d494 100644 --- a/docs/articles/basic-usage.html +++ b/docs/articles/basic-usage.html @@ -40,7 +40,7 @@ @@ -117,10 +117,10 @@
-library(tidycensus)
-library(tidyverse)
-
-census_api_key("YOUR API KEY GOES HERE")
library(tidycensus)
+library(tidyverse)
+
+census_api_key("YOUR API KEY GOES HERE")
There are two major functions implemented in
tidycensus: get_decennial()
, which grants
access to the 2000, 2010, and 2020 decennial US Census APIs, and
@@ -130,21 +130,21 @@
-age20 <- get_decennial(geography = "state",
- variables = "P13_001N",
- year = 2020,
- sumfile = "dhc")
-
-head(age20)
## # A tibble: 6 × 4
-## GEOID NAME variable value
-## <chr> <chr> <chr> <dbl>
-## 1 09 Connecticut P13_001N 41.1
-## 2 10 Delaware P13_001N 41.1
-## 3 11 District of Columbia P13_001N 33.9
-## 4 12 Florida P13_001N 43
-## 5 13 Georgia P13_001N 37.5
-## 6 15 Hawaii P13_001N 40.8
+age20 <- get_decennial(geography = "state",
+ variables = "P13_001N",
+ year = 2020,
+ sumfile = "dhc")
+
+head(age20)
+## # A tibble: 6 × 4
+## GEOID NAME variable value
+## <chr> <chr> <chr> <dbl>
+## 1 09 Connecticut P13_001N 41.1
+## 2 10 Delaware P13_001N 41.1
+## 3 11 District of Columbia P13_001N 33.9
+## 4 12 Florida P13_001N 43
+## 5 13 Georgia P13_001N 37.5
+## 6 15 Hawaii P13_001N 40.8
The function returns a tibble with four columns by default:
GEOID
, which is an identifier for the geographical unit
associated with the row; NAME
, which is a descriptive name
@@ -158,9 +158,9 @@
As the function has returned a tidy object, we can visualize it quickly with ggplot2:
-age20 %>%
- ggplot(aes(x = value, y = reorder(NAME, value))) +
- geom_point()
age20 %>%
+ ggplot(aes(x = value, y = reorder(NAME, value))) +
+ geom_point()
-v17 <- load_variables(2017, "acs5", cache = TRUE)
-
-View(v17)
v17 <- load_variables(2017, "acs5", cache = TRUE)
+
+View(v17)
By filtering for “median age” variable IDs corresponding to that query can be browsed interactively. For the 5-year ACS detailed tables @@ -480,29 +480,29 @@
-vt <- get_acs(geography = "county",
- variables = c(medincome = "B19013_001"),
- state = "VT",
- year = 2021)
-
-vt
## # A tibble: 14 × 5
-## GEOID NAME variable estimate moe
-## <chr> <chr> <chr> <dbl> <dbl>
-## 1 50001 Addison County, Vermont medincome 77978 3393
-## 2 50003 Bennington County, Vermont medincome 63448 3413
-## 3 50005 Caledonia County, Vermont medincome 55159 3974
-## 4 50007 Chittenden County, Vermont medincome 81957 2521
-## 5 50009 Essex County, Vermont medincome 48194 3577
-## 6 50011 Franklin County, Vermont medincome 68476 3297
-## 7 50013 Grand Isle County, Vermont medincome 85154 7894
-## 8 50015 Lamoille County, Vermont medincome 66016 4777
-## 9 50017 Orange County, Vermont medincome 67906 2710
-## 10 50019 Orleans County, Vermont medincome 58037 3153
-## 11 50021 Rutland County, Vermont medincome 59751 2133
-## 12 50023 Washington County, Vermont medincome 70128 3014
-## 13 50025 Windham County, Vermont medincome 59195 2060
-## 14 50027 Windsor County, Vermont medincome 63787 2209
+vt <- get_acs(geography = "county",
+ variables = c(medincome = "B19013_001"),
+ state = "VT",
+ year = 2021)
+
+vt
+## # A tibble: 14 × 5
+## GEOID NAME variable estimate moe
+## <chr> <chr> <chr> <dbl> <dbl>
+## 1 50001 Addison County, Vermont medincome 77978 3393
+## 2 50003 Bennington County, Vermont medincome 63448 3413
+## 3 50005 Caledonia County, Vermont medincome 55159 3974
+## 4 50007 Chittenden County, Vermont medincome 81957 2521
+## 5 50009 Essex County, Vermont medincome 48194 3577
+## 6 50011 Franklin County, Vermont medincome 68476 3297
+## 7 50013 Grand Isle County, Vermont medincome 85154 7894
+## 8 50015 Lamoille County, Vermont medincome 66016 4777
+## 9 50017 Orange County, Vermont medincome 67906 2710
+## 10 50019 Orleans County, Vermont medincome 58037 3153
+## 11 50021 Rutland County, Vermont medincome 59751 2133
+## 12 50023 Washington County, Vermont medincome 70128 3014
+## 13 50025 Windham County, Vermont medincome 59195 2060
+## 14 50027 Windsor County, Vermont medincome 63787 2209
The output is similar to a call to get_decennial()
, but
instead of a value
column, get_acs
returns
estimate
and moe
columns for the ACS estimate
@@ -513,15 +513,15 @@
-vt %>%
- mutate(NAME = gsub(" County, Vermont", "", NAME)) %>%
- ggplot(aes(x = estimate, y = reorder(NAME, estimate))) +
- geom_errorbarh(aes(xmin = estimate - moe, xmax = estimate + moe)) +
- geom_point(color = "red", size = 3) +
- labs(title = "Household income by county in Vermont",
- subtitle = "2017-2021 American Community Survey",
- y = "",
- x = "ACS estimate (bars represent margin of error)")
vt %>%
+ mutate(NAME = gsub(" County, Vermont", "", NAME)) %>%
+ ggplot(aes(x = estimate, y = reorder(NAME, estimate))) +
+ geom_errorbarh(aes(xmin = estimate - moe, xmax = estimate + moe)) +
+ geom_point(color = "red", size = 3) +
+ labs(title = "Household income by county in Vermont",
+ subtitle = "2017-2021 American Community Survey",
+ y = "",
+ x = "ACS estimate (bars represent margin of error)")
diff --git a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png
index 1e2b0e8..3af9d96 100644
Binary files a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png differ
diff --git a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png
index 70d9334..6b01d21 100644
Binary files a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png differ
diff --git a/docs/articles/index.html b/docs/articles/index.html
index d05ecbb..e9ae161 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -17,7 +17,7 @@
diff --git a/docs/articles/margins-of-error.html b/docs/articles/margins-of-error.html
index a191a42..378a005 100644
--- a/docs/articles/margins-of-error.html
+++ b/docs/articles/margins-of-error.html
@@ -40,7 +40,7 @@
@@ -134,27 +134,27 @@ get_acs
:
-library(tidycensus)
-library(tidyverse)
-
-vars <- paste0("B01001_0", c(20:25, 44:49))
-
-ramsey <- get_acs(geography = "tract",
- variables = vars,
- state = "MN",
- county = "Ramsey",
- year = 2016)
-
-head(ramsey %>% select(-NAME))
## # A tibble: 6 × 4
-## GEOID variable estimate moe
-## <chr> <chr> <dbl> <dbl>
-## 1 27123030100 B01001_020 51 27
-## 2 27123030100 B01001_021 92 40
-## 3 27123030100 B01001_022 48 28
-## 4 27123030100 B01001_023 8 13
-## 5 27123030100 B01001_024 51 52
-## 6 27123030100 B01001_025 23 18
+library(tidycensus)
+library(tidyverse)
+
+vars <- paste0("B01001_0", c(20:25, 44:49))
+
+ramsey <- get_acs(geography = "tract",
+ variables = vars,
+ state = "MN",
+ county = "Ramsey",
+ year = 2016)
+
+head(ramsey %>% select(-NAME))
+## # A tibble: 6 × 4
+## GEOID variable estimate moe
+## <chr> <chr> <dbl> <dbl>
+## 1 27123030100 B01001_020 51 27
+## 2 27123030100 B01001_021 92 40
+## 3 27123030100 B01001_022 48 28
+## 4 27123030100 B01001_023 8 13
+## 5 27123030100 B01001_024 51 52
+## 6 27123030100 B01001_025 23 18
We can see that in two instances the margin of error exceeds the estimate. One way to address this is through data aggregation. While the specific group estimates in this Census tract may be unreliable, the @@ -169,21 +169,21 @@
moe_sum
to calculate the margin of error around a
derived estimate for Census tract population over age 65.
-ramsey65 <- ramsey %>%
- group_by(GEOID) %>%
- summarize(sumest = sum(estimate),
- summoe = moe_sum(moe, estimate))
-
-head(ramsey65)
## # A tibble: 6 × 3
-## GEOID sumest summoe
-## <chr> <dbl> <dbl>
-## 1 27123030100 677 124.
-## 2 27123030201 899 201.
-## 3 27123030202 149 53.8
-## 4 27123030300 783 154.
-## 5 27123030400 423 131.
-## 6 27123030500 396 111.
+ramsey65 <- ramsey %>%
+ group_by(GEOID) %>%
+ summarize(sumest = sum(estimate),
+ summoe = moe_sum(moe, estimate))
+
+head(ramsey65)
+## # A tibble: 6 × 3
+## GEOID sumest summoe
+## <chr> <dbl> <dbl>
+## 1 27123030100 677 124.
+## 2 27123030201 899 201.
+## 3 27123030202 149 53.8
+## 4 27123030300 783 154.
+## 5 27123030400 423 131.
+## 6 27123030500 396 111.
The margins of error for this aggregate population are more reasonable. However, the Census Bureau does issue this warning:
diff --git a/docs/articles/other-datasets.html b/docs/articles/other-datasets.html index cbe6c40..6493e7e 100644 --- a/docs/articles/other-datasets.html +++ b/docs/articles/other-datasets.html @@ -40,7 +40,7 @@@@ -155,39 +155,39 @@Components of change populati component. For example, we can request all components of change variables for US states in 2022:
--library(tidycensus) -library(tidyverse) -library(tigris) -options(tigris_use_cache = TRUE) - -us_components <- get_estimates(geography = "state", product = "components", year = 2022) - -us_components
+## # A tibble: 676 × 5 -## GEOID NAME variable year value -## <chr> <chr> <chr> <int> <dbl> -## 1 01 Alabama BIRTHS 2022 58280 -## 2 01 Alabama DEATHS 2022 66870 -## 3 01 Alabama NATURALCHG 2022 -8590 -## 4 01 Alabama INTERNATIONALMIG 2022 4597 -## 5 01 Alabama DOMESTICMIG 2022 28609 -## 6 01 Alabama NETMIG 2022 33206 -## 7 01 Alabama RESIDUAL 2022 -166 -## 8 01 Alabama RBIRTH 2022 11.5 -## 9 01 Alabama RDEATH 2022 13.2 -## 10 01 Alabama RNATURALCHG 2022 -1.70 -## # ℹ 666 more rows
library(tidycensus) +library(tidyverse) +library(tigris) +options(tigris_use_cache = TRUE) + +us_components <- get_estimates(geography = "state", product = "components", year = 2022) + +us_components
+## # A tibble: 676 × 5 +## GEOID NAME variable year value +## <chr> <chr> <chr> <int> <dbl> +## 1 01 Alabama BIRTHS 2022 58280 +## 2 01 Alabama DEATHS 2022 66870 +## 3 01 Alabama NATURALCHG 2022 -8590 +## 4 01 Alabama INTERNATIONALMIG 2022 4597 +## 5 01 Alabama DOMESTICMIG 2022 28609 +## 6 01 Alabama NETMIG 2022 33206 +## 7 01 Alabama RESIDUAL 2022 -166 +## 8 01 Alabama RBIRTH 2022 11.5 +## 9 01 Alabama RDEATH 2022 13.2 +## 10 01 Alabama RNATURALCHG 2022 -1.70 +## # ℹ 666 more rows
The variables included in the components of change product consist of both estimates of counts and rates. Rates are preceded by an
R
in the variable name and are calculated per 1000 residents.--unique(us_components$variable)
+## [1] "BIRTHS" "DEATHS" "NATURALCHG" -## [4] "INTERNATIONALMIG" "DOMESTICMIG" "NETMIG" -## [7] "RESIDUAL" "RBIRTH" "RDEATH" -## [10] "RNATURALCHG" "RINTERNATIONALMIG" "RDOMESTICMIG" -## [13] "RNETMIG"
unique(us_components$variable)
+## [1] "BIRTHS" "DEATHS" "NATURALCHG" +## [4] "INTERNATIONALMIG" "DOMESTICMIG" "NETMIG" +## [7] "RESIDUAL" "RBIRTH" "RDEATH" +## [10] "RNATURALCHG" "RINTERNATIONALMIG" "RDOMESTICMIG" +## [13] "RNETMIG"
Available geographies include
"us"
,"state"
,"county"
,"metropolitan statistical area/micropolitan statistical area"
@@ -203,67 +203,67 @@Components of change populati package to shift and rescale counties outside the continental US for national mapping.
--net_migration <- get_estimates(geography = "county", - variables = "RNETMIG", - year = 2022, - geometry = TRUE, - resolution = "20m") %>% - shift_geometry() - -net_migration
+## Simple feature collection with 3144 features and 5 fields -## Geometry type: GEOMETRY -## Dimension: XY -## Bounding box: xmin: -3112200 ymin: -1697728 xmax: 2258154 ymax: 1558935 -## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic -## # A tibble: 3,144 × 6 -## GEOID NAME variable year value geometry -## <chr> <chr> <chr> <int> <dbl> <MULTIPOLYGON [m]> -## 1 17127 Massac County, Illinois RNETMIG 2022 2.01 (((620306.9 994.4699, 62… -## 2 27017 Carlton County, Minnes… RNETMIG 2022 9.57 (((225299.4 1038545, 283… -## 3 37181 Vance County, North Ca… RNETMIG 2022 4.13 (((1544259 32180.06, 154… -## 4 47079 Henry County, Tennessee RNETMIG 2022 12.6 (((663474.2 -85746.62, 6… -## 5 06021 Glenn County, Californ… RNETMIG 2022 -14.1 (((-2253309 578922.1, -2… -## 6 17093 Kendall County, Illino… RNETMIG 2022 12.5 (((610436.7 496523.1, 63… -## 7 19095 Iowa County, Iowa RNETMIG 2022 -2.30 (((305057 494717.2, 3435… -## 8 22003 Allen Parish, Louisiana RNETMIG 2022 -9.26 (((274501 -766852.4, 289… -## 9 18055 Greene County, Indiana RNETMIG 2022 10.8 (((748725.5 221905.9, 76… -## 10 33001 Belknap County, New Ha… RNETMIG 2022 8.18 (((1931026 926777.5, 193… -## # ℹ 3,134 more rows
net_migration <- get_estimates(geography = "county", + variables = "RNETMIG", + year = 2022, + geometry = TRUE, + resolution = "20m") %>% + shift_geometry() + +net_migration
+## Simple feature collection with 3144 features and 5 fields +## Geometry type: GEOMETRY +## Dimension: XY +## Bounding box: xmin: -3112200 ymin: -1697728 xmax: 2258154 ymax: 1558935 +## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic +## # A tibble: 3,144 × 6 +## GEOID NAME variable year value geometry +## <chr> <chr> <chr> <int> <dbl> <MULTIPOLYGON [m]> +## 1 17127 Massac County, Illinois RNETMIG 2022 2.01 (((620306.9 994.4699, 62… +## 2 27017 Carlton County, Minnes… RNETMIG 2022 9.57 (((225299.4 1038545, 283… +## 3 37181 Vance County, North Ca… RNETMIG 2022 4.13 (((1544259 32180.06, 154… +## 4 47079 Henry County, Tennessee RNETMIG 2022 12.6 (((663474.2 -85746.62, 6… +## 5 06021 Glenn County, Californ… RNETMIG 2022 -14.1 (((-2253309 578922.1, -2… +## 6 17093 Kendall County, Illino… RNETMIG 2022 12.5 (((610436.7 496523.1, 63… +## 7 19095 Iowa County, Iowa RNETMIG 2022 -2.30 (((305057 494717.2, 3435… +## 8 22003 Allen Parish, Louisiana RNETMIG 2022 -9.26 (((274501 -766852.4, 289… +## 9 18055 Greene County, Indiana RNETMIG 2022 10.8 (((748725.5 221905.9, 76… +## 10 33001 Belknap County, New Ha… RNETMIG 2022 8.18 (((1931026 926777.5, 193… +## # ℹ 3,134 more rows
We’ll next use tidyverse tools to generate a
groups
column that bins the net migration rates into comprehensible categories, and plot the result usinggeom_sf()
and ggplot2.+-order = c("-15 and below", "-15 to -5", "-5 to +5", "+5 to +15", "+15 and up") - -net_migration <- net_migration %>% - mutate(groups = case_when( - value > 15 ~ "+15 and up", - value > 5 ~ "+5 to +15", - value > -5 ~ "-5 to +5", - value > -15 ~ "-15 to -5", - TRUE ~ "-15 and below" - )) %>% - mutate(groups = factor(groups, levels = order)) - -state_overlay <- states( - cb = TRUE, - resolution = "20m" -) %>% - filter(GEOID != "72") %>% - shift_geometry() - -ggplot() + - geom_sf(data = net_migration, aes(fill = groups, color = groups), size = 0.1) + - geom_sf(data = state_overlay, fill = NA, color = "black", size = 0.1) + - scale_fill_brewer(palette = "PuOr", direction = -1) + - scale_color_brewer(palette = "PuOr", direction = -1, guide = FALSE) + - coord_sf(datum = NA) + - theme_minimal(base_family = "Roboto") + - labs(title = "Net migration per 1000 residents by county", - subtitle = "US Census Bureau 2022 Population Estimates", - fill = "Rate", - caption = "Data acquired with the R tidycensus package | @kyle_e_walker")
order = c("-15 and below", "-15 to -5", "-5 to +5", "+5 to +15", "+15 and up") + +net_migration <- net_migration %>% + mutate(groups = case_when( + value > 15 ~ "+15 and up", + value > 5 ~ "+5 to +15", + value > -5 ~ "-5 to +5", + value > -15 ~ "-15 to -5", + TRUE ~ "-15 and below" + )) %>% + mutate(groups = factor(groups, levels = order)) + +state_overlay <- states( + cb = TRUE, + resolution = "20m" +) %>% + filter(GEOID != "72") %>% + shift_geometry() + +ggplot() + + geom_sf(data = net_migration, aes(fill = groups, color = groups), size = 0.1) + + geom_sf(data = state_overlay, fill = NA, color = "black", size = 0.1) + + scale_fill_brewer(palette = "PuOr", direction = -1) + + scale_color_brewer(palette = "PuOr", direction = -1, guide = FALSE) + + coord_sf(datum = NA) + + theme_minimal(base_family = "Roboto") + + labs(title = "Net migration per 1000 residents by county", + subtitle = "US Census Bureau 2022 Population Estimates", + fill = "Rate", + caption = "Data acquired with the R tidycensus package | @kyle_e_walker")
@@ -282,53 +282,53 @@+Estimates of population charact However, by specifying
breakdown_labels = TRUE
, the function will return the appropriate labels instead. For example:--la_age_hisp <- get_estimates(geography = "county", - product = "characteristics", - breakdown = c("SEX", "AGEGROUP", "HISP"), - breakdown_labels = TRUE, - state = "CA", - county = "Los Angeles", - year = 2022) - -la_age_hisp
+## # A tibble: 114 × 7 -## GEOID NAME year SEX AGEGROUP HISP value -## <chr> <chr> <int> <chr> <fct> <chr> <dbl> -## 1 06037 Los Angeles County, California 2022 Male All ages Both… 4.83e6 -## 2 06037 Los Angeles County, California 2022 Male All ages Non-… 2.44e6 -## 3 06037 Los Angeles County, California 2022 Male All ages Hisp… 2.39e6 -## 4 06037 Los Angeles County, California 2022 Male Age 0 to 4 yea… Both… 2.49e5 -## 5 06037 Los Angeles County, California 2022 Male Age 0 to 4 yea… Non-… 1.06e5 -## 6 06037 Los Angeles County, California 2022 Male Age 0 to 4 yea… Hisp… 1.43e5 -## 7 06037 Los Angeles County, California 2022 Male Age 5 to 9 yea… Both… 2.84e5 -## 8 06037 Los Angeles County, California 2022 Male Age 5 to 9 yea… Non-… 1.20e5 -## 9 06037 Los Angeles County, California 2022 Male Age 5 to 9 yea… Hisp… 1.63e5 -## 10 06037 Los Angeles County, California 2022 Male Age 10 to 14 y… Both… 2.98e5 -## # ℹ 104 more rows
la_age_hisp <- get_estimates(geography = "county", + product = "characteristics", + breakdown = c("SEX", "AGEGROUP", "HISP"), + breakdown_labels = TRUE, + state = "CA", + county = "Los Angeles", + year = 2022) + +la_age_hisp
## # A tibble: 114 × 7 +## GEOID NAME year SEX AGEGROUP HISP value +## <chr> <chr> <int> <chr> <fct> <chr> <dbl> +## 1 06037 Los Angeles County, California 2022 Male All ages Both… 4.83e6 +## 2 06037 Los Angeles County, California 2022 Male All ages Non-… 2.44e6 +## 3 06037 Los Angeles County, California 2022 Male All ages Hisp… 2.39e6 +## 4 06037 Los Angeles County, California 2022 Male Age 0 to 4 yea… Both… 2.49e5 +## 5 06037 Los Angeles County, California 2022 Male Age 0 to 4 yea… Non-… 1.06e5 +## 6 06037 Los Angeles County, California 2022 Male Age 0 to 4 yea… Hisp… 1.43e5 +## 7 06037 Los Angeles County, California 2022 Male Age 5 to 9 yea… Both… 2.84e5 +## 8 06037 Los Angeles County, California 2022 Male Age 5 to 9 yea… Non-… 1.20e5 +## 9 06037 Los Angeles County, California 2022 Male Age 5 to 9 yea… Hisp… 1.63e5 +## 10 06037 Los Angeles County, California 2022 Male Age 10 to 14 y… Both… 2.98e5 +## # ℹ 104 more rows
With some additional data wrangling, the returned format facilitates analysis and visualization. For example, we can compare population pyramids for Hispanic and non-Hispanic populations in Los Angeles County:
+-compare <- filter(la_age_hisp, str_detect(AGEGROUP, "^Age"), - HISP != "Both Hispanic Origins", - SEX != "Both sexes") %>% - mutate(value = ifelse(SEX == "Male", -value, value)) - -ggplot(compare, aes(x = AGEGROUP, y = value, fill = SEX)) + - geom_bar(stat = "identity", width = 1) + - theme_minimal(base_family = "Roboto") + - scale_y_continuous(labels = function(y) paste0(abs(y / 1000), "k")) + - scale_x_discrete(labels = function(x) gsub("Age | years", "", x)) + - scale_fill_manual(values = c("darkred", "navy")) + - coord_flip() + - facet_wrap(~HISP) + - labs(x = "", - y = "2022 Census Bureau population estimate", - title = "Population structure by Hispanic origin", - subtitle = "Los Angeles County, California", - fill = "", - caption = "Data source: US Census Bureau population estimates & tidycensus R package")
compare <- filter(la_age_hisp, str_detect(AGEGROUP, "^Age"), + HISP != "Both Hispanic Origins", + SEX != "Both sexes") %>% + mutate(value = ifelse(SEX == "Male", -value, value)) + +ggplot(compare, aes(x = AGEGROUP, y = value, fill = SEX)) + + geom_bar(stat = "identity", width = 1) + + theme_minimal(base_family = "Roboto") + + scale_y_continuous(labels = function(y) paste0(abs(y / 1000), "k")) + + scale_x_discrete(labels = function(x) gsub("Age | years", "", x)) + + scale_fill_manual(values = c("darkred", "navy")) + + coord_flip() + + facet_wrap(~HISP) + + labs(x = "", + y = "2022 Census Bureau population estimate", + title = "Population structure by Hispanic origin", + subtitle = "Los Angeles County, California", + fill = "", + caption = "Data source: US Census Bureau population estimates & tidycensus R package")
@@ -368,25 +368,25 @@Using
get_flows()
Here we get county-to-county flow data for Westchester County, NY:--wch_flows <- get_flows( - geography = "county", - state = "NY", - county = "Westchester", - year = 2018 - ) - -wch_flows %>% - filter(!is.na(GEOID2)) %>% - head()
+## # A tibble: 6 × 7 -## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe -## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> -## 1 36119 01089 Westchester County, New York Madison Co… MOVEDIN 0 28 -## 2 36119 01089 Westchester County, New York Madison Co… MOVEDOUT 26 41 -## 3 36119 01089 Westchester County, New York Madison Co… MOVEDNET -26 41 -## 4 36119 01095 Westchester County, New York Marshall C… MOVEDIN 0 28 -## 5 36119 01095 Westchester County, New York Marshall C… MOVEDOUT 35 55 -## 6 36119 01095 Westchester County, New York Marshall C… MOVEDNET -35 55
wch_flows <- get_flows( + geography = "county", + state = "NY", + county = "Westchester", + year = 2018 + ) + +wch_flows %>% + filter(!is.na(GEOID2)) %>% + head()
+## # A tibble: 6 × 7 +## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe +## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> +## 1 36119 01089 Westchester County, New York Madison Co… MOVEDIN 0 28 +## 2 36119 01089 Westchester County, New York Madison Co… MOVEDOUT 26 41 +## 3 36119 01089 Westchester County, New York Madison Co… MOVEDNET -26 41 +## 4 36119 01095 Westchester County, New York Marshall C… MOVEDIN 0 28 +## 5 36119 01095 Westchester County, New York Marshall C… MOVEDOUT 35 55 +## 6 36119 01095 Westchester County, New York Marshall C… MOVEDNET -35 55
With the default setting of
get_flows()
, data is returned in a “tidy” or long format. Notice that for each pair of places, there are three rows returned with one row for each variable @@ -400,38 +400,38 @@Using
get_flows()
One simple question we can answer with this data is, to which county did the most people move from Westchester? -+## # A tibble: 6 × 7 -## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe -## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> -## 1 36119 09001 Westchester County, New York Fairfield … MOVEDOUT 3916 778 -## 2 36119 36061 Westchester County, New York New York C… MOVEDOUT 3328 596 -## 3 36119 36005 Westchester County, New York Bronx Coun… MOVEDOUT 2063 418 -## 4 36119 36027 Westchester County, New York Dutchess C… MOVEDOUT 1870 454 -## 5 36119 36079 Westchester County, New York Putnam Cou… MOVEDOUT 1318 324 -## 6 36119 36081 Westchester County, New York Queens Cou… MOVEDOUT 1082 240
wch_flows %>% + filter(variable == "MOVEDOUT") %>% + arrange(desc(estimate)) %>% + head()
+## # A tibble: 6 × 7 +## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe +## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> +## 1 36119 09001 Westchester County, New York Fairfield … MOVEDOUT 3916 778 +## 2 36119 36061 Westchester County, New York New York C… MOVEDOUT 3328 596 +## 3 36119 36005 Westchester County, New York Bronx Coun… MOVEDOUT 2063 418 +## 4 36119 36027 Westchester County, New York Dutchess C… MOVEDOUT 1870 454 +## 5 36119 36079 Westchester County, New York Putnam Cou… MOVEDOUT 1318 324 +## 6 36119 36081 Westchester County, New York Queens Cou… MOVEDOUT 1082 240
The
-MOVEDOUT
variable only estimates the number of people that moved out of Westchester County and doesn’t account for the number of people that moved in to Westchester from each county. If you are interested in net migration (moved in - moved out), you can use theMOVEDNET
variable.+## # A tibble: 6 × 7 -## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe -## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> -## 1 36119 09001 Westchester County, New York Fairfield … MOVEDNET -1768 958 -## 2 36119 36027 Westchester County, New York Dutchess C… MOVEDNET -1119 497 -## 3 36119 06037 Westchester County, New York Los Angele… MOVEDNET -486 339 -## 4 36119 12099 Westchester County, New York Palm Beach… MOVEDNET -450 182 -## 5 36119 25021 Westchester County, New York Norfolk Co… MOVEDNET -358 351 -## 6 36119 36079 Westchester County, New York Putnam Cou… MOVEDNET -340 407
wch_flows %>% + filter(variable == "MOVEDNET") %>% + arrange(estimate) %>% + head()
+## # A tibble: 6 × 7 +## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe +## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> +## 1 36119 09001 Westchester County, New York Fairfield … MOVEDNET -1768 958 +## 2 36119 36027 Westchester County, New York Dutchess C… MOVEDNET -1119 497 +## 3 36119 06037 Westchester County, New York Los Angele… MOVEDNET -486 339 +## 4 36119 12099 Westchester County, New York Palm Beach… MOVEDNET -450 182 +## 5 36119 25021 Westchester County, New York Norfolk Co… MOVEDNET -358 351 +## 6 36119 36079 Westchester County, New York Putnam Cou… MOVEDNET -340 407
You may have noticed that there are some destination geographies that are not other counties. For people that moved into to Westchester from outside the United States, the Migration Flows data reports the region @@ -442,18 +442,18 @@
Using
get_flows()
MOVEDNET isNA
. TheGEOID
of non-US places is alsoNA
. -+## # A tibble: 6 × 7 -## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe -## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> -## 1 36119 NA Westchester County, New York Africa MOVEDIN 419 411 -## 2 36119 NA Westchester County, New York Africa MOVEDOUT NA NA -## 3 36119 NA Westchester County, New York Africa MOVEDNET NA NA -## 4 36119 NA Westchester County, New York Asia MOVEDIN 2267 436 -## 5 36119 NA Westchester County, New York Asia MOVEDOUT NA NA -## 6 36119 NA Westchester County, New York Asia MOVEDNET NA NA
wch_flows %>% + filter(is.na(GEOID2)) %>% + head()
+## # A tibble: 6 × 7 +## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe +## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> +## 1 36119 NA Westchester County, New York Africa MOVEDIN 419 411 +## 2 36119 NA Westchester County, New York Africa MOVEDOUT NA NA +## 3 36119 NA Westchester County, New York Africa MOVEDNET NA NA +## 4 36119 NA Westchester County, New York Asia MOVEDIN 2267 436 +## 5 36119 NA Westchester County, New York Asia MOVEDOUT NA NA +## 6 36119 NA Westchester County, New York Asia MOVEDNET NA NA
-Demographic characteristics @@ -463,25 +463,25 @@
Demographic characteristics
-la_flows <- get_flows( - geography = "metropolitan statistical area", - breakdown = "RACE", - breakdown_labels = TRUE, - msa = 31080, # los angeles msa fips code - year = 2015 - ) - -# net migration between la and san francisco -la_flows %>% - filter(str_detect(FULL2_NAME, "San Fran"), variable == "MOVEDNET")
+## # A tibble: 5 × 9 -## GEOID1 GEOID2 FULL1_NAME FULL2_NAME RACE RACE_label variable estimate moe -## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> -## 1 31080 41860 Los Angeles… San Franc… 00 All races MOVEDNET -2433 1585 -## 2 31080 41860 Los Angeles… San Franc… 01 White alo… MOVEDNET -1077 1096 -## 3 31080 41860 Los Angeles… San Franc… 02 Black or … MOVEDNET 98 378 -## 4 31080 41860 Los Angeles… San Franc… 03 Asian alo… MOVEDNET -580 778 -## 5 31080 41860 Los Angeles… San Franc… 04 Other rac… MOVEDNET -874 549
la_flows <- get_flows( + geography = "metropolitan statistical area", + breakdown = "RACE", + breakdown_labels = TRUE, + msa = 31080, # los angeles msa fips code + year = 2015 + ) + +# net migration between la and san francisco +la_flows %>% + filter(str_detect(FULL2_NAME, "San Fran"), variable == "MOVEDNET")
+## # A tibble: 5 × 9 +## GEOID1 GEOID2 FULL1_NAME FULL2_NAME RACE RACE_label variable estimate moe +## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> +## 1 31080 41860 Los Angeles… San Franc… 00 All races MOVEDNET -2433 1585 +## 2 31080 41860 Los Angeles… San Franc… 01 White alo… MOVEDNET -1077 1096 +## 3 31080 41860 Los Angeles… San Franc… 02 Black or … MOVEDNET 98 378 +## 4 31080 41860 Los Angeles… San Franc… 03 Asian alo… MOVEDNET -580 778 +## 5 31080 41860 Los Angeles… San Franc… 04 Other rac… MOVEDNET -874 549
Note that the demographic characteristics must be specified in the
breakdown
argument ofget_flows()
(not thevariable
argument). For each dataset there are three or @@ -505,31 +505,31 @@Mapping migration flowscentroid2 column.
--phx_flows <- get_flows( - geography = "metropolitan statistical area", - msa = 38060, - year = 2018, - geometry = TRUE - ) - -phx_flows %>% - head()
+## Simple feature collection with 6 features and 7 fields -## Active geometry column: centroid1 -## Geometry type: POINT -## Dimension: XY -## Bounding box: xmin: -112.0705 ymin: 33.18571 xmax: -112.0705 ymax: 33.18571 -## Geodetic CRS: NAD83 -## # A tibble: 6 × 9 -## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe -## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> -## 1 38060 NA Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDIN 21602 1464 -## 2 38060 NA Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDOUT 21192 1559 -## 3 38060 NA Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDNET 410 2186 -## 4 38060 NA Phoenix-Mesa-Scottsdale, AZ … Africa MOVEDIN 1078 385 -## 5 38060 NA Phoenix-Mesa-Scottsdale, AZ … Africa MOVEDOUT NA NA -## 6 38060 NA Phoenix-Mesa-Scottsdale, AZ … Africa MOVEDNET NA NA -## # ℹ 2 more variables: centroid1 <POINT [°]>, centroid2 <POINT [°]>
phx_flows <- get_flows( + geography = "metropolitan statistical area", + msa = 38060, + year = 2018, + geometry = TRUE + ) + +phx_flows %>% + head()
+## Simple feature collection with 6 features and 7 fields +## Active geometry column: centroid1 +## Geometry type: POINT +## Dimension: XY +## Bounding box: xmin: -112.0705 ymin: 33.18571 xmax: -112.0705 ymax: 33.18571 +## Geodetic CRS: NAD83 +## # A tibble: 6 × 9 +## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe +## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> +## 1 38060 NA Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDIN 21602 1464 +## 2 38060 NA Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDOUT 21192 1559 +## 3 38060 NA Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDNET 410 2186 +## 4 38060 NA Phoenix-Mesa-Scottsdale, AZ … Africa MOVEDIN 1078 385 +## 5 38060 NA Phoenix-Mesa-Scottsdale, AZ … Africa MOVEDOUT NA NA +## 6 38060 NA Phoenix-Mesa-Scottsdale, AZ … Africa MOVEDNET NA NA +## # ℹ 2 more variables: centroid1 <POINT [°]>, centroid2 <POINT [°]>
With the centroids attached to each pair of places, it is straightforward to map the migration flows. Here, we look at the most common origin MSAs for people moving to Phoenix-Mesa-Scottsdale, AZ. To @@ -537,30 +537,30 @@
Mapping migration flows. To use mapdeck, you’ll need a Mapbox account and access token.
diff --git a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png index d3613b1..01e462d 100644 Binary files a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png index 9913c3e..b827da0 100644 Binary files a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/pums-data.html b/docs/articles/pums-data.html index a84625c..f1270e0 100644 --- a/docs/articles/pums-data.html +++ b/docs/articles/pums-data.html @@ -40,7 +40,7 @@ @@ -172,27 +172,27 @@+-library(mapdeck) - -top_move_in <- phx_flows %>% - filter(!is.na(GEOID2), variable == "MOVEDIN") %>% - slice_max(n = 25, order_by = estimate) %>% - mutate( - width = estimate / 500, - tooltip = paste0( - scales::comma(estimate * 5, 1), - " people moved from ", str_remove(FULL2_NAME, "Metro Area"), - " to ", str_remove(FULL1_NAME, "Metro Area"), " between 2014 and 2018" - ) - ) - -top_move_in %>% - mapdeck(style = mapdeck_style("dark"), pitch = 45) %>% - add_arc( - origin = "centroid1", - destination = "centroid2", - stroke_width = "width", - auto_highlight = TRUE, - highlight_colour = "#8c43facc", - tooltip = "tooltip" - )
library(mapdeck) + +top_move_in <- phx_flows %>% + filter(!is.na(GEOID2), variable == "MOVEDIN") %>% + slice_max(n = 25, order_by = estimate) %>% + mutate( + width = estimate / 500, + tooltip = paste0( + scales::comma(estimate * 5, 1), + " people moved from ", str_remove(FULL2_NAME, "Metro Area"), + " to ", str_remove(FULL1_NAME, "Metro Area"), " between 2014 and 2018" + ) + ) + +top_move_in %>% + mapdeck(style = mapdeck_style("dark"), pitch = 45) %>% + add_arc( + origin = "centroid1", + destination = "centroid2", + stroke_width = "width", + auto_highlight = TRUE, + highlight_colour = "#8c43facc", + tooltip = "tooltip" + )
PUMS data dictionaries
+-install.packages(c("survey", "srvyr"))
install.packages(c("survey", "srvyr"))
+-library(tidyverse) -library(tidycensus) - -pums_vars_2018 <- pums_variables %>% - filter(year == 2018, survey == "acs1")
library(tidyverse) +library(tidycensus) + +pums_vars_2018 <- pums_variables %>% + filter(year == 2018, survey == "acs1")
-
pums_variables
contains both the variables as well as their possible values. So let’s just look at the unique variables.+## # A tibble: 513 × 4 -## var_code var_label data_type level -## <chr> <chr> <chr> <chr> -## 1 SERIALNO Housing unit/GQ person serial number chr NA -## 2 DIVISION Division code based on 2010 Census definitions chr NA -## 3 PUMA Public use microdata area code (PUMA) based on 2010 Census definition (areas … chr NA -## 4 REGION Region code based on 2010 Census definitions chr NA -## 5 ST State Code based on 2010 Census definitions chr NA -## # ℹ 508 more rows
pums_vars_2018 %>% + distinct(var_code, var_label, data_type, level)
+## # A tibble: 513 × 4 +## var_code var_label data_type level +## <chr> <chr> <chr> <chr> +## 1 SERIALNO Housing unit/GQ person serial number chr NA +## 2 DIVISION Division code based on 2010 Census definitions chr NA +## 3 PUMA Public use microdata area code (PUMA) based on 2010 Census definition (areas … chr NA +## 4 REGION Region code based on 2010 Census definitions chr NA +## 5 ST State Code based on 2010 Census definitions chr NA +## # ℹ 508 more rows
If you’re new to PUMS data, this is a good dataset to browse to get a feel for what variables are available.
@@ -212,18 +212,18 @@Person vs. housing unit
--pums_vars_2018 %>% - distinct(var_code, var_label, data_type, level) %>% - filter(level == "person")
+## # A tibble: 279 × 4 -## var_code var_label data_type level -## <chr> <chr> <chr> <chr> -## 1 SPORDER Person number num person -## 2 PWGTP Person's weight num person -## 3 AGEP Age num person -## 4 CIT Citizenship status chr person -## 5 CITWP Year of naturalization write-in num person -## # ℹ 274 more rows
pums_vars_2018 %>% + distinct(var_code, var_label, data_type, level) %>% + filter(level == "person")
+## # A tibble: 279 × 4 +## var_code var_label data_type level +## <chr> <chr> <chr> <chr> +## 1 SPORDER Person number num person +## 2 PWGTP Person's weight num person +## 3 AGEP Age num person +## 4 CIT Citizenship status chr person +## 5 CITWP Year of naturalization write-in num person +## # ℹ 274 more rows
It is important to be mindful of whether the variables you choose to analyze are person- or household-level variables.
@@ -238,23 +238,23 @@Using
get_pums()
to d getPUMA
,SEX
,AGEP
, andSCHL
variables for Vermont from the 2018 1-year ACS.+-vt_pums <- get_pums( - variables = c("PUMA", "SEX", "AGEP", "SCHL"), - state = "VT", - survey = "acs1", - year = 2018 - )
vt_pums <- get_pums( + variables = c("PUMA", "SEX", "AGEP", "SCHL"), + state = "VT", + survey = "acs1", + year = 2018 + )
--vt_pums
+## # A tibble: 6,436 × 9 -## SERIALNO SPORDER WGTP PWGTP AGEP PUMA ST SCHL SEX -## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> -## 1 2018GQ0000859 1 0 61 19 00200 50 19 1 -## 2 2018GQ0001119 1 0 67 80 00200 50 11 2 -## 3 2018GQ0001888 1 0 177 82 00400 50 16 2 -## 4 2018GQ0002438 1 0 17 17 00100 50 16 2 -## 5 2018GQ0003293 1 0 68 20 00400 50 19 2 -## # ℹ 6,431 more rows
vt_pums
+## # A tibble: 6,436 × 9 +## SERIALNO SPORDER WGTP PWGTP AGEP PUMA ST SCHL SEX +## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> +## 1 2018GQ0002083 1 0 59 20 00100 50 19 2 +## 2 2018GQ0002131 1 0 2 51 00400 50 16 1 +## 3 2018GQ0002587 1 0 52 18 00400 50 16 1 +## 4 2018GQ0003751 1 0 34 19 00100 50 19 2 +## 5 2018GQ0003853 1 0 129 20 00400 50 19 1 +## # ℹ 6,431 more rows
We get 6436 rows and 9 columns. In addition to the variables we specified,
get_pums()
also always returnsSERIALNO
,SPORDER
,WGTP
, @@ -269,24 +269,24 @@Using
get_pums()
to drecode = TRUE
inget_pums()
to return additional columns with the values of these variables recoded.+-vt_pums_recoded <- get_pums( - variables = c("PUMA", "SEX", "AGEP", "SCHL"), - state = "VT", - survey = "acs1", - year = 2018, - recode = TRUE - )
vt_pums_recoded <- get_pums( + variables = c("PUMA", "SEX", "AGEP", "SCHL"), + state = "VT", + survey = "acs1", + year = 2018, + recode = TRUE + )
--vt_pums_recoded
+## # A tibble: 6,436 × 12 -## SERIALNO SPORDER WGTP PWGTP AGEP PUMA ST SCHL SEX ST_label SCHL_label SEX_label -## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <ord> <ord> <ord> -## 1 2018GQ0000859 1 0 61 19 00200 50 19 1 Vermont/VT 1 or more years o… Male -## 2 2018GQ0001119 1 0 67 80 00200 50 11 2 Vermont/VT Grade 8 Female -## 3 2018GQ0001888 1 0 177 82 00400 50 16 2 Vermont/VT Regular high scho… Female -## 4 2018GQ0002438 1 0 17 17 00100 50 16 2 Vermont/VT Regular high scho… Female -## 5 2018GQ0003293 1 0 68 20 00400 50 19 2 Vermont/VT 1 or more years o… Female -## # ℹ 6,431 more rows
vt_pums_recoded
+## # A tibble: 6,436 × 12 +## SERIALNO SPORDER WGTP PWGTP AGEP PUMA ST SCHL SEX ST_label SCHL_label SEX_label +## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <ord> <ord> <ord> +## 1 2018GQ0001230 1 0 3 26 00300 50 20 1 Vermont/VT Associate's degree Male +## 2 2018GQ0002135 1 0 64 18 00100 50 19 2 Vermont/VT 1 or more years o… Female +## 3 2018GQ0002999 1 0 62 20 00400 50 19 2 Vermont/VT 1 or more years o… Female +## 4 2018GQ0004077 1 0 16 94 00200 50 21 2 Vermont/VT Bachelor's degree Female +## 5 2018GQ0006486 1 0 26 20 00400 50 19 1 Vermont/VT 1 or more years o… Male +## # ℹ 6,431 more rows
-Analyzing PUMS data @@ -300,26 +300,26 @@
Analyzing PUMS data
-sum(vt_pums_recoded$PWGTP)
+## [1] 626299
sum(vt_pums_recoded$PWGTP)
+## [1] 626299
Another convenient approach to weighting PUMS data is to use the
-wt
argument indplyr::count()
. Here, we calculate the population by sex for each PUMA in Vermont (there are only four in the whole state!).+## # A tibble: 8 × 3 -## PUMA SEX_label n -## <chr> <ord> <dbl> -## 1 00100 Male 108732 -## 2 00100 Female 111564 -## 3 00200 Male 73768 -## 4 00200 Female 73416 -## 5 00300 Male 61601 -## 6 00300 Female 64806 -## 7 00400 Male 65675 -## 8 00400 Female 66737
vt_pums_recoded %>% + count(PUMA, SEX_label, wt = PWGTP)
+## # A tibble: 8 × 3 +## PUMA SEX_label n +## <chr> <ord> <dbl> +## 1 00100 Male 108732 +## 2 00100 Female 111564 +## 3 00200 Male 73768 +## 4 00200 Female 73416 +## 5 00300 Male 61601 +## 6 00300 Female 64806 +## 7 00400 Male 65675 +## 8 00400 Female 66737
Many of the variables included in the PUMS data are categorical and we might want to group some categories together and estimate the proportion of the population with these characteristics. In this @@ -328,27 +328,27 @@
Analyzing PUMS data
--vt_pums_recoded %>% - mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% - group_by(PUMA, SEX_label) %>% - summarize( - total_pop = sum(PWGTP), - mean_age = weighted.mean(AGEP, PWGTP), - ba_above = sum(PWGTP[ba_above == TRUE & AGEP >= 25]), - ba_above_pct = ba_above / sum(PWGTP[AGEP >= 25]) - )
+## # A tibble: 8 × 6 -## # Groups: PUMA [4] -## PUMA SEX_label total_pop mean_age ba_above ba_above_pct -## <chr> <ord> <dbl> <dbl> <dbl> <dbl> -## 1 00100 Male 108732 38.2 34113 0.469 -## 2 00100 Female 111564 40.4 36873 0.473 -## 3 00200 Male 73768 41.4 15831 0.303 -## 4 00200 Female 73416 43.6 20248 0.367 -## 5 00300 Male 61601 43.7 14869 0.326 -## 6 00300 Female 64806 45.0 21527 0.434 -## 7 00400 Male 65675 41.9 12788 0.278 -## 8 00400 Female 66737 44.6 18980 0.391
vt_pums_recoded %>% + mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% + group_by(PUMA, SEX_label) %>% + summarize( + total_pop = sum(PWGTP), + mean_age = weighted.mean(AGEP, PWGTP), + ba_above = sum(PWGTP[ba_above == TRUE & AGEP >= 25]), + ba_above_pct = ba_above / sum(PWGTP[AGEP >= 25]) + )
+## # A tibble: 8 × 6 +## # Groups: PUMA [4] +## PUMA SEX_label total_pop mean_age ba_above ba_above_pct +## <chr> <ord> <dbl> <dbl> <dbl> <dbl> +## 1 00100 Male 108732 38.2 34113 0.469 +## 2 00100 Female 111564 40.4 36873 0.473 +## 3 00200 Male 73768 41.4 15831 0.303 +## 4 00200 Female 73416 43.6 20248 0.367 +## 5 00300 Male 61601 43.7 14869 0.326 +## 6 00300 Female 64806 45.0 21527 0.434 +## 7 00400 Male 65675 41.9 12788 0.278 +## 8 00400 Female 66737 44.6 18980 0.391
+Calculating standard errors @@ -379,18 +379,18 @@
Calculating standard errors
-vt_pums_rep_weights <- get_pums( - variables = c("PUMA", "SEX", "AGEP", "SCHL"), - state = "VT", - survey = "acs1", - year = 2018, - recode = TRUE, - rep_weights = "person" - )
vt_pums_rep_weights <- get_pums( + variables = c("PUMA", "SEX", "AGEP", "SCHL"), + state = "VT", + survey = "acs1", + year = 2018, + recode = TRUE, + rep_weights = "person" + )
To easily convert this data frame to a survey or srvyr object, we can use the
to_survey()
function.+-vt_survey_design <- to_survey(vt_pums_rep_weights)
vt_survey_design <- to_survey(vt_pums_rep_weights)
By default,
to_survey()
converts a data frame to atbl_svy
object by using person replicate weights. You can change the arguments into_survey
if you are analyzing @@ -402,61 +402,61 @@Calculating standard errors
--library(srvyr, warn.conflicts = FALSE) - -vt_survey_design %>% - survey_count(PUMA, SEX_label)
+## # A tibble: 8 × 4 -## PUMA SEX_label n n_se -## <chr> <ord> <dbl> <dbl> -## 1 00100 Male 108732 1123. -## 2 00100 Female 111564 1360. -## 3 00200 Male 73768 754. -## 4 00200 Female 73416 809. -## 5 00300 Male 61601 491. -## 6 00300 Female 64806 578. -## 7 00400 Male 65675 702. -## 8 00400 Female 66737 776.
library(srvyr, warn.conflicts = FALSE) + +vt_survey_design %>% + survey_count(PUMA, SEX_label)
+## # A tibble: 8 × 4 +## PUMA SEX_label n n_se +## <chr> <ord> <dbl> <dbl> +## 1 00100 Male 108732 1123. +## 2 00100 Female 111564 1360. +## 3 00200 Male 73768 754. +## 4 00200 Female 73416 809. +## 5 00300 Male 61601 491. +## 6 00300 Female 64806 578. +## 7 00400 Male 65675 702. +## 8 00400 Female 66737 776.
The srvyr syntax is very similar to standard dplyr syntax, so this should look familiar; we’ve swapped out
count()
forsurvey_count()
and we don’t need awt
argument because we defined the weights when we set up the srvyr object.The equivalent estimate using survey syntax looks like this:
-+## PUMA SEX_labelMale SEX_labelFemale se1 se2 -## 00100 00100 108732 111564 1122.9765 1360.3476 -## 00200 00200 73768 73416 754.0906 808.7364 -## 00300 00300 61601 64806 490.5432 578.3529 -## 00400 00400 65675 66737 702.0659 775.9812
survey::svyby(~SEX_label, ~PUMA, design = vt_survey_design, survey::svytotal)
+## PUMA SEX_labelMale SEX_labelFemale se1 se2 +## 00100 00100 108732 111564 1122.9765 1360.3476 +## 00200 00200 73768 73416 754.0906 808.7364 +## 00300 00300 61601 64806 490.5432 578.3529 +## 00400 00400 65675 66737 702.0659 775.9812
We can also repeat the estimate we did above and calculate the percentage of people that are 25 and up with a bachelor’s degree, while this time returning the upper and lower bounds of the confidence interval for these estimates. This time though, we have to subset our data frame to only those 25 and older before we summarize.
--vt_survey_design %>% - mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% - filter(AGEP >= 25) %>% - group_by(PUMA, SEX_label) %>% - summarize( - age_25_up = survey_total(vartype = "ci"), - ba_above_n = survey_total(ba_above, vartype = "ci"), - ba_above_pct = survey_mean(ba_above, vartype = "ci") - )
+## # A tibble: 8 × 11 -## # Groups: PUMA [4] -## PUMA SEX_label age_25_up age_25_up_low age_25_up_upp ba_above_n ba_above_n_low ba_above_n_upp -## <chr> <ord> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -## 1 00100 Male 72680 70216. 75144. 34113 29913. 38313. -## 2 00100 Female 77966 75671. 80261. 36873 32202. 41544. -## 3 00200 Male 52278 50826. 53730. 15831 13327. 18335. -## 4 00200 Female 55162 53643. 56681. 20248 17679. 22817. -## 5 00300 Male 45634 44743. 46525. 14869 12638. 17100. -## 6 00300 Female 49546 48576. 50516. 21527 19010. 24044. -## 7 00400 Male 45960 45067. 46853. 12788 10699. 14877. -## 8 00400 Female 48601 47783. 49419. 18980 16540. 21420. -## # ℹ 3 more variables: ba_above_pct <dbl>, ba_above_pct_low <dbl>, ba_above_pct_upp <dbl>
vt_survey_design %>% + mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% + filter(AGEP >= 25) %>% + group_by(PUMA, SEX_label) %>% + summarize( + age_25_up = survey_total(vartype = "ci"), + ba_above_n = survey_total(ba_above, vartype = "ci"), + ba_above_pct = survey_mean(ba_above, vartype = "ci") + )
+## # A tibble: 8 × 11 +## # Groups: PUMA [4] +## PUMA SEX_label age_25_up age_25_up_low age_25_up_upp ba_above_n ba_above_n_low ba_above_n_upp +## <chr> <ord> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +## 1 00100 Male 72680 70216. 75144. 34113 29913. 38313. +## 2 00100 Female 77966 75671. 80261. 36873 32202. 41544. +## 3 00200 Male 52278 50826. 53730. 15831 13327. 18335. +## 4 00200 Female 55162 53643. 56681. 20248 17679. 22817. +## 5 00300 Male 45634 44743. 46525. 14869 12638. 17100. +## 6 00300 Female 49546 48576. 50516. 21527 19010. 24044. +## 7 00400 Male 45960 45067. 46853. 12788 10699. 14877. +## 8 00400 Female 48601 47783. 49419. 18980 16540. 21420. +## # ℹ 3 more variables: ba_above_pct <dbl>, ba_above_pct_low <dbl>, ba_above_pct_upp <dbl>
Modeling with PUMS data @@ -467,85 +467,85 @@
Modeling with PUMS data
+-vt_pums_to_model <- get_pums( - variables = c("PUMA", "WAGP", "JWMNP", "JWTR", "COW", "ESR"), - state = "VT", - survey = "acs5", - year = 2018, - rep_weights = "person" - )
vt_pums_to_model <- get_pums( + variables = c("PUMA", "WAGP", "JWMNP", "JWTR", "COW", "ESR"), + state = "VT", + survey = "acs5", + year = 2018, + rep_weights = "person" + )
Now, we filter out observations that aren’t relevant, do a little recoding of the class of worker variable, and finally convert the data frame to a survey design object.
+-vt_model_sd <- vt_pums_to_model %>% - filter( - ESR == 1, # civilian employed - JWTR != 11, # does not work at home - WAGP > 0, # earned wages last year - JWMNP > 0 # commute more than zero min - ) %>% - mutate( - emp_type = case_when( - COW %in% c("1", "2") ~ "private", - COW %in% c("3", "4", "5") ~ "public", - TRUE ~ "self" - ) - ) %>% - to_survey()
vt_model_sd <- vt_pums_to_model %>% + filter( + ESR == 1, # civilian employed + JWTR != 11, # does not work at home + WAGP > 0, # earned wages last year + JWMNP > 0 # commute more than zero min + ) %>% + mutate( + emp_type = case_when( + COW %in% c("1", "2") ~ "private", + COW %in% c("3", "4", "5") ~ "public", + TRUE ~ "self" + ) + ) %>% + to_survey()
Let’s quickly check out some summary stats using
srvyr
.--vt_model_sd %>% - summarize( - n = survey_total(1), - mean_wage = survey_mean(WAGP), - median_wage = survey_median(WAGP), - mean_commute = survey_mean(JWMNP), - median_commute = survey_median(JWMNP) - )
+## # A tibble: 1 × 10 -## n n_se mean_wage mean_wage_se median_wage median_wage_se mean_commute mean_commute_se -## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -## 1 282733 1933. 44601. 437. 35000 251. 23.3 0.233 -## # ℹ 2 more variables: median_commute <dbl>, median_commute_se <dbl>
vt_model_sd %>% + summarize( + n = survey_total(1), + mean_wage = survey_mean(WAGP), + median_wage = survey_median(WAGP), + mean_commute = survey_mean(JWMNP), + median_commute = survey_median(JWMNP) + )
+## # A tibble: 1 × 10 +## n n_se mean_wage mean_wage_se median_wage median_wage_se mean_commute mean_commute_se +## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +## 1 282733 1933. 44601. 437. 35000 251. 23.3 0.233 +## # ℹ 2 more variables: median_commute <dbl>, median_commute_se <dbl>
--vt_model_sd %>% - survey_count(emp_type)
+## # A tibble: 3 × 3 -## emp_type n n_se -## <chr> <dbl> <dbl> -## 1 private 224760 2001. -## 2 public 42966 1374. -## 3 self 15007 756.
vt_model_sd %>% + survey_count(emp_type)
+## # A tibble: 3 × 3 +## emp_type n n_se +## <chr> <dbl> <dbl> +## 1 private 224760 2001. +## 2 public 42966 1374. +## 3 self 15007 756.
And now we’re ready to fit a simple linear regression model.
--model <- survey::svyglm(log(JWMNP) ~ log(WAGP) + emp_type + PUMA, design = vt_model_sd) -summary(model)
+## -## Call: -## survey::svyglm(formula = log(JWMNP) ~ log(WAGP) + emp_type + -## PUMA, design = vt_model_sd) -## -## Survey design: -## Called via srvyr -## -## Coefficients: -## Estimate Std. Error t value Pr(>|t|) -## (Intercept) 1.699601 0.102181 16.633 < 2e-16 *** -## log(WAGP) 0.113913 0.009548 11.931 < 2e-16 *** -## emp_typepublic -0.052845 0.027420 -1.927 0.0578 . -## emp_typeself -0.306227 0.050016 -6.123 4.20e-08 *** -## PUMA00200 0.007800 0.023943 0.326 0.7455 -## PUMA00300 0.023629 0.025111 0.941 0.3498 -## PUMA00400 -0.117300 0.026028 -4.507 2.46e-05 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -## -## (Dispersion parameter for gaussian family taken to be 9328.921) -## -## Number of Fisher Scoring iterations: 2
model <- survey::svyglm(log(JWMNP) ~ log(WAGP) + emp_type + PUMA, design = vt_model_sd) +summary(model)
+## +## Call: +## survey::svyglm(formula = log(JWMNP) ~ log(WAGP) + emp_type + +## PUMA, design = vt_model_sd) +## +## Survey design: +## Called via srvyr +## +## Coefficients: +## Estimate Std. Error t value Pr(>|t|) +## (Intercept) 1.699601 0.102181 16.633 < 2e-16 *** +## log(WAGP) 0.113913 0.009548 11.931 < 2e-16 *** +## emp_typepublic -0.052845 0.027420 -1.927 0.0578 . +## emp_typeself -0.306227 0.050016 -6.123 4.20e-08 *** +## PUMA00200 0.007800 0.023943 0.326 0.7455 +## PUMA00300 0.023629 0.025111 0.941 0.3498 +## PUMA00400 -0.117300 0.026028 -4.507 2.46e-05 *** +## --- +## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +## +## (Dispersion parameter for gaussian family taken to be 9328.921) +## +## Number of Fisher Scoring iterations: 2
+Mapping PUMS data @@ -555,40 +555,40 @@
Mapping PUMS data
-ne_states <- c("VT", "NH", "ME", "MA", "CT", "RI") -ne_pumas <- map(ne_states, tigris::pumas, class = "sf", cb = TRUE, year = 2018) %>% - reduce(rbind)
ne_states <- c("VT", "NH", "ME", "MA", "CT", "RI") +ne_pumas <- map(ne_states, tigris::pumas, class = "sf", cb = TRUE, year = 2018) %>% + reduce(rbind)
Next we download the income-to-poverty ratio from the PUMS dataset and calculate the percentage of population below 200% of the poverty line for each PUMA.
+-ne_pums <- get_pums( - variables = c("PUMA", "POVPIP"), - state = ne_states, - survey = "acs1", - year = 2018 - ) - -ne_pov <- ne_pums %>% - group_by(ST, PUMA) %>% - summarize( - total_pop = sum(PWGTP), - pct_in_pov = sum(PWGTP[POVPIP < 200]) / total_pop - )
ne_pums <- get_pums( + variables = c("PUMA", "POVPIP"), + state = ne_states, + survey = "acs1", + year = 2018 + ) + +ne_pov <- ne_pums %>% + group_by(ST, PUMA) %>% + summarize( + total_pop = sum(PWGTP), + pct_in_pov = sum(PWGTP[POVPIP < 200]) / total_pop + )
And now we can make a choropleth map by joining the PUMA boundaries with the PUMS data.
+-ne_pumas %>% - left_join(ne_pov, by = c("STATEFP10" = "ST", "PUMACE10" = "PUMA")) %>% - ggplot(aes(fill = pct_in_pov)) + - geom_sf() + - scale_fill_viridis_b( - name = NULL, - option = "magma", - labels = scales::label_percent(1) - ) + - labs(title = "Percentage of population below 200% of the poverty line") + - theme_void()
ne_pumas %>% + left_join(ne_pov, by = c("STATEFP10" = "ST", "PUMACE10" = "PUMA")) %>% + ggplot(aes(fill = pct_in_pov)) + + geom_sf() + + scale_fill_viridis_b( + name = NULL, + option = "magma", + labels = scales::label_percent(1) + ) + + labs(title = "Percentage of population below 200% of the poverty line") + + theme_void()
@@ -633,57 +633,57 @@+Verification of PUMS estimates
-wy_relp <- get_pums( - variables = "RELP", - state = "Wyoming", - survey = "acs1", - year = 2018, - rep_weights = "person" -) - -ut_ten <- get_pums( - variables = "TEN", - state = "Utah", - survey = "acs1", - year = 2018, - rep_weights = "housing" -) - -hi_age <- get_pums( - variables = "AGEP", - state = "Hawaii", - survey = "acs1", - year = 2018, - rep_weights = "person" -)
wy_relp <- get_pums( + variables = "RELP", + state = "Wyoming", + survey = "acs1", + year = 2018, + rep_weights = "person" +) + +ut_ten <- get_pums( + variables = "TEN", + state = "Utah", + survey = "acs1", + year = 2018, + rep_weights = "housing" +) + +hi_age <- get_pums( + variables = "AGEP", + state = "Hawaii", + survey = "acs1", + year = 2018, + rep_weights = "person" +)
-+## # A tibble: 1 × 3 -## RELP n n_se -## <chr> <dbl> <dbl> -## 1 16 7018 2.64
wy_relp %>% + to_survey() %>% + survey_count(RELP) %>% + filter(RELP == "16")
+## # A tibble: 1 × 3 +## RELP n n_se +## <chr> <dbl> <dbl> +## 1 16 7018 2.64
--ut_ten %>% - distinct(SERIALNO, .keep_all = TRUE) %>% - to_survey(type = "housing") %>% - survey_count(TEN) %>% - filter(TEN == 2)
+## # A tibble: 1 × 3 -## TEN n n_se -## <chr> <dbl> <dbl> -## 1 2 209632 3972.
ut_ten %>% + distinct(SERIALNO, .keep_all = TRUE) %>% + to_survey(type = "housing") %>% + survey_count(TEN) %>% + filter(TEN == 2)
+## # A tibble: 1 × 3 +## TEN n n_se +## <chr> <dbl> <dbl> +## 1 2 209632 3972.
--hi_age %>% - filter(between(AGEP, 0, 4)) %>% - to_survey() %>% - summarize(age_0_4 = survey_total(1))
+## # A tibble: 1 × 2 -## age_0_4 age_0_4_se -## <dbl> <dbl> -## 1 86452 944.
hi_age %>% + filter(between(AGEP, 0, 4)) %>% + to_survey() %>% + summarize(age_0_4 = survey_total(1))
+## # A tibble: 1 × 2 +## age_0_4 age_0_4_se +## <dbl> <dbl> +## 1 86452 944.
3 for 3 – yay!
diff --git a/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png b/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png index 4fbeb77..570de5f 100644 Binary files a/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png and b/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png differ diff --git a/docs/articles/spatial-data.html b/docs/articles/spatial-data.html index fc66a07..12fd817 100644 --- a/docs/articles/spatial-data.html +++ b/docs/articles/spatial-data.html @@ -40,7 +40,7 @@@@ -124,39 +124,39 @@Spatial data in tidycensus
The following example shows median household income from the 2016-2020 ACS for Census tracts in Orange County, California:
--library(tidycensus) -library(tidyverse) -options(tigris_use_cache = TRUE) - -orange <- get_acs( - state = "CA", - county = "Orange", - geography = "tract", - variables = "B19013_001", - geometry = TRUE, - year = 2020 -) - -head(orange)
+## Simple feature collection with 6 features and 5 fields -## Geometry type: MULTIPOLYGON -## Dimension: XY -## Bounding box: xmin: -118.0369 ymin: 33.69354 xmax: -117.7822 ymax: 33.85749 -## Geodetic CRS: NAD83 -## GEOID NAME variable -## 1 06059086701 Census Tract 867.01, Orange County, California B19013_001 -## 2 06059075901 Census Tract 759.01, Orange County, California B19013_001 -## 3 06059075303 Census Tract 753.03, Orange County, California B19013_001 -## 4 06059052527 Census Tract 525.27, Orange County, California B19013_001 -## 5 06059110109 Census Tract 1101.09, Orange County, California B19013_001 -## 6 06059087106 Census Tract 871.06, Orange County, California B19013_001 -## estimate moe geometry -## 1 86922 11391 MULTIPOLYGON (((-117.9762 3... -## 2 78846 10972 MULTIPOLYGON (((-117.8618 3... -## 3 123654 21900 MULTIPOLYGON (((-117.8824 3... -## 4 135097 10971 MULTIPOLYGON (((-117.8035 3... -## 5 107463 12665 MULTIPOLYGON (((-118.0369 3... -## 6 45327 8700 MULTIPOLYGON (((-117.9414 3...
library(tidycensus) +library(tidyverse) +options(tigris_use_cache = TRUE) + +orange <- get_acs( + state = "CA", + county = "Orange", + geography = "tract", + variables = "B19013_001", + geometry = TRUE, + year = 2020 +) + +head(orange)
+## Simple feature collection with 6 features and 5 fields +## Geometry type: MULTIPOLYGON +## Dimension: XY +## Bounding box: xmin: -118.0096 ymin: 33.77397 xmax: -117.7905 ymax: 33.93992 +## Geodetic CRS: NAD83 +## GEOID NAME variable +## 1 06059110603 Census Tract 1106.03, Orange County, California B19013_001 +## 2 06059011503 Census Tract 115.03, Orange County, California B19013_001 +## 3 06059001102 Census Tract 11.02, Orange County, California B19013_001 +## 4 06059021812 Census Tract 218.12, Orange County, California B19013_001 +## 5 06059001301 Census Tract 13.01, Orange County, California B19013_001 +## 6 06059088701 Census Tract 887.01, Orange County, California B19013_001 +## estimate moe geometry +## 1 56563 13103 MULTIPOLYGON (((-118.0096 3... +## 2 101800 10306 MULTIPOLYGON (((-117.8984 3... +## 3 99286 18207 MULTIPOLYGON (((-117.9765 3... +## 4 133494 8958 MULTIPOLYGON (((-117.8184 3... +## 5 75994 18045 MULTIPOLYGON (((-117.9766 3... +## 6 54759 7682 MULTIPOLYGON (((-117.9673 3...
Our object
orange
looks much like the basic tidycensus output, but with ageometry
list-column describing the geometry of each feature, using the @@ -169,10 +169,10 @@Spatial data in tidycensus
thegeom_sf
functionality currently in the development version of ggplot2:+-orange %>% - ggplot(aes(fill = estimate)) + - geom_sf(color = NA) + - scale_fill_viridis_c(option = "magma")
orange %>% + ggplot(aes(fill = estimate)) + + geom_sf(color = NA) + + scale_fill_viridis_c(option = "magma")
Please note that the UTM Zone 11N coordinate system (
26911
) is appropriate for Southern California but may not @@ -195,37 +195,37 @@Faceted mapping
--racevars <- c(White = "P2_005N", - Black = "P2_006N", - Asian = "P2_008N", - Hispanic = "P2_002N") - -harris <- get_decennial( - geography = "tract", - variables = racevars, - state = "TX", - county = "Harris County", - geometry = TRUE, - summary_var = "P2_001N", - year = 2020, - sumfile = "pl" -) - -head(harris)
+## Simple feature collection with 6 features and 5 fields -## Geometry type: MULTIPOLYGON -## Dimension: XY -## Bounding box: xmin: -95.46502 ymin: 29.53424 xmax: -95.09005 ymax: 29.96492 -## Geodetic CRS: NAD83 -## # A tibble: 6 × 6 -## GEOID NAME variable value summary_value geometry -## <chr> <chr> <chr> <dbl> <dbl> <MULTIPOLYGON [°]> -## 1 48201341203 Census Tra… White 1503 2355 (((-95.10641 29.54594, -… -## 2 48201341203 Census Tra… Black 177 2355 (((-95.10641 29.54594, -… -## 3 48201341203 Census Tra… Asian 54 2355 (((-95.10641 29.54594, -… -## 4 48201341203 Census Tra… Hispanic 492 2355 (((-95.10641 29.54594, -… -## 5 48201550601 Census Tra… White 265 6673 (((-95.46502 29.96456, -… -## 6 48201550601 Census Tra… Black 2156 6673 (((-95.46502 29.96456, -…
racevars <- c(White = "P2_005N", + Black = "P2_006N", + Asian = "P2_008N", + Hispanic = "P2_002N") + +harris <- get_decennial( + geography = "tract", + variables = racevars, + state = "TX", + county = "Harris County", + geometry = TRUE, + summary_var = "P2_001N", + year = 2020, + sumfile = "pl" +) + +head(harris)
+## Simple feature collection with 6 features and 5 fields +## Geometry type: MULTIPOLYGON +## Dimension: XY +## Bounding box: xmin: -95.51535 ymin: 29.80887 xmax: -95.3994 ymax: 29.92537 +## Geodetic CRS: NAD83 +## # A tibble: 6 × 6 +## GEOID NAME variable value summary_value geometry +## <chr> <chr> <chr> <dbl> <dbl> <MULTIPOLYGON [°]> +## 1 48201530200 Census Tra… White 2057 3766 (((-95.45086 29.81984, -… +## 2 48201530200 Census Tra… Black 127 3766 (((-95.45086 29.81984, -… +## 3 48201530200 Census Tra… Asian 239 3766 (((-95.45086 29.81984, -… +## 4 48201530200 Census Tra… Hispanic 1154 3766 (((-95.45086 29.81984, -… +## 5 48201534002 Census Tra… White 388 5653 (((-95.51398 29.92533, -… +## 6 48201534002 Census Tra… Black 685 5653 (((-95.51398 29.92533, -…
We notice that there are four entries for each Census tract, with each entry representing one of our requested variables. The
summary_value
column represents the value of the summary @@ -237,14 +237,14 @@Faceted mapping
+-harris %>% - mutate(percent = 100 * (value / summary_value)) %>% - ggplot(aes(fill = percent)) + - facet_wrap(~variable) + - geom_sf(color = NA) + - theme_void() + - scale_fill_viridis_c() + - labs(fill = "% of population\n(2020 Census)")
harris %>% + mutate(percent = 100 * (value / summary_value)) %>% + ggplot(aes(fill = percent)) + + facet_wrap(~variable) + + geom_sf(color = NA) + + theme_void() + + scale_fill_viridis_c() + + labs(fill = "% of population\n(2020 Census)")
@@ -261,21 +261,21 @@Detailed shorelin example, take this example of median household income by Census tract in Manhattan (New York County), NY:
+-library(tidycensus) -library(tidyverse) -options(tigris_use_cache = TRUE) - -ny <- get_acs(geography = "tract", - variables = "B19013_001", - state = "NY", - county = "New York", - year = 2020, - geometry = TRUE) - -ggplot(ny, aes(fill = estimate)) + - geom_sf() + - theme_void() + - scale_fill_viridis_c(labels = scales::dollar)
library(tidycensus) +library(tidyverse) +options(tigris_use_cache = TRUE) + +ny <- get_acs(geography = "tract", + variables = "B19013_001", + state = "NY", + county = "New York", + year = 2020, + geometry = TRUE) + +ggplot(ny, aes(fill = estimate)) + + geom_sf() + + theme_void() + + scale_fill_viridis_c(labels = scales::dollar)
As illustrated in the graphic, the boundaries of Manhattan include water boundaries - stretching into the Hudson and East Rivers. In turn, @@ -292,25 +292,158 @@
Detailed shorelin first transformed to a projected coordinate reference system to improve performance.
+-library(tigris) -library(sf) - -ny_erase <- get_acs( - geography = "tract", - variables = "B19013_001", - state = "NY", - county = "New York", - year = 2020, - geometry = TRUE, - cb = FALSE -) %>% - st_transform(26918) %>% - erase_water(year = 2020) - -ggplot(ny_erase, aes(fill = estimate)) + - geom_sf() + - theme_void() + - scale_fill_viridis_c(labels = scales::dollar)
library(tigris) +library(sf) + +ny_erase <- get_acs( + geography = "tract", + variables = "B19013_001", + state = "NY", + county = "New York", + year = 2020, + geometry = TRUE, + cb = FALSE +) %>% + st_transform(26918) %>% + erase_water(year = 2020)
++## + | + | | 0% + | + | | 1% + | + |= | 1% + | + |= | 2% + | + |== | 2% + | + |== | 3% + | + |=== | 4% + | + |=== | 5% + | + |==== | 5% + | + |==== | 6% + | + |===== | 6% + | + |===== | 7% + | + |===== | 8% + | + |====== | 8% + | + |====== | 9% + | + |======= | 10% + | + |======= | 11% + | + |======== | 11% + | + |======== | 12% + | + |========= | 12% + | + |========= | 13% + | + |========= | 14% + | + |========== | 14% + | + |========== | 15% + | + |=========== | 15% + | + |=========== | 16% + | + |============ | 17% + | + |============= | 18% + | + |============= | 19% + | + |============== | 20% + | + |=============== | 21% + | + |================ | 22% + | + |================ | 23% + | + |================= | 24% + | + |================= | 25% + | + |================== | 25% + | + |================== | 26% + | + |=================== | 27% + | + |=================== | 28% + | + |==================== | 28% + | + |==================== | 29% + | + |===================== | 29% + | + |===================== | 30% + | + |====================== | 31% + | + |====================== | 32% + | + |======================= | 32% + | + |======================= | 33% + | + |============================== | 43% + | + |============================== | 44% + | + |=============================== | 44% + | + |=============================== | 45% + | + |================================ | 45% + | + |================================= | 47% + | + |==================================== | 51% + | + |==================================== | 52% + | + |======================================== | 57% + | + |======================================== | 58% + | + |============================================== | 65% + | + |===================================================== | 75% + | + |=========================================================== | 85% + | + |============================================================ | 86% + | + |============================================================= | 88% + | + |============================================================== | 88% + | + |================================================================ | 91% + | + |==================================================================== | 97% + | + |======================================================================| 100%
+ggplot(ny_erase, aes(fill = estimate)) + + geom_sf() + + theme_void() + + scale_fill_viridis_c(labels = scales::dollar)
The map appears as before, but instead the polygons now hug the shoreline of Manhattan. Setting the same
year
in @@ -325,9 +458,9 @@Writing to shapefilesst_write function in the sf package: - +
Your tidycensus-obtained dataset can now be used in ArcGIS, QGIS, Tableau, or any other application that reads shapefiles.
diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png index 8efe14c..c8ffd65 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png index 20d0745..ef83408 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png index 3fe76a9..38df762 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png index 7dd9348..fb94b56 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/authors.html b/docs/authors.html index 8ef37ea..e410b18 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ @@ -97,15 +97,15 @@Citation
-Walker K, Herman M (2023). +
Walker K, Herman M (2024). tidycensus: Load US Census Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data Frames. -R package version 1.5, https://walker-data.com/tidycensus/. +R package version 1.6.1, https://walker-data.com/tidycensus/.
@Manual{, title = {tidycensus: Load US Census Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data Frames}, author = {Kyle Walker and Matt Herman}, - year = {2023}, - note = {R package version 1.5}, + year = {2024}, + note = {R package version 1.6.1}, url = {https://walker-data.com/tidycensus/}, }diff --git a/docs/index.html b/docs/index.html index 6c1f644..795118d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -44,7 +44,7 @@ @@ -112,7 +112,7 @@tidycensus is an R package that allows users to interface with a select number of the US Census Bureau’s data APIs and return tidyverse-ready data frames, optionally with simple feature geometry included. Install from CRAN with the following command:
+-install.packages("tidycensus")
install.packages("tidycensus")
tidycensus is designed to help R users get Census data that is pre-prepared for exploration within the tidyverse, and optionally spatially with sf. To learn more about how the package works, plase read through the following articles:
- Basic usage of tidycensus
diff --git a/docs/news/index.html b/docs/news/index.html index 763e3a3..a317a9e 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ @@ -76,7 +76,14 @@Changelog
-+tidycensus 1.5
+tidycensus 1.62024-01-25
+
- Default year in
+get_acs()
andget_pums()
updated to 2022 following full release of the 2022 ACS datasets.- Geometry support for the 118th Congress in
+get_decennial()
whensumfile = "cd118"
is used, and inget_acs()
+- Various bug fixes and performance improvements.
++diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 6d59ec8..a2669e8 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -7,5 +7,5 @@ articles: other-datasets: other-datasets.html pums-data: pums-data.html spatial-data: spatial-data.html -last_built: 2023-09-25T16:08Z +last_built: 2024-02-07T18:01Z diff --git a/docs/reference/acs5_geography.html b/docs/reference/acs5_geography.html index 247884b..6950888 100644 --- a/docs/reference/acs5_geography.html +++ b/docs/reference/acs5_geography.html @@ -25,7 +25,7 @@ @@ -94,12 +94,12 @@tidycensus 1.52023-09-26
- Support added for the new Detailed DHC-A file, which includes the ability to query for detailed population groups. This functionality is now enabled for Summary Files 2 and 4 in past years as well with the
pop_group
argument.- Full geometry support enabled for the 2022 1-year American Community Survey.
- Various bug fixes and performance improvements, including the pesky ‘Error in UseMethod(“gather”) no applicable method for ’gather’ applied to an object of class “character”’ problem.
@@ -177,19 +184,19 @@tidycensus 0.4
Bug fixed that was causing GEOIDs for some states to be converted to NA on certain Linux platforms
A new parameter,
shift_geo
, allows tidycensus users to get US state and county geometry originally obtained with the albersusa R package with Alaska and Hawaii shifted and re-scaled for better cartographic display of the entire US.+--library(tidycensus) -library(tidyverse) -library(viridis) - -us_county_income <- get_acs(geography = "county", variables = "B19013_001", - shift_geo = TRUE, geometry = TRUE) - -ggplot(us_county_income) + - geom_sf(aes(fill = estimate), color = NA) + - coord_sf(datum = NA) + - theme_minimal() + - scale_fill_viridis_c()
+library(tidycensus) +library(tidyverse) +library(viridis) + +us_county_income <- get_acs(geography = "county", variables = "B19013_001", + shift_geo = TRUE, geometry = TRUE) + +ggplot(us_county_income) + + geom_sf(aes(fill = estimate), color = NA) + + coord_sf(datum = NA) + + theme_minimal() + + scale_fill_viridis_c()
Dataset used to identify geography availability in the 5-year ACS Detailed T
Format
-An object of class
+tbl_df
(inherits fromtbl
,data.frame
) with 11112 rows and 3 columns.An object of class
tbl_df
(inherits fromtbl
,data.frame
) with 12228 rows and 3 columns.@@ -81,15 +81,15 @@Details
diff --git a/docs/reference/as_dot_density.html b/docs/reference/as_dot_density.html index 50480d5..214ee58 100644 --- a/docs/reference/as_dot_density.html +++ b/docs/reference/as_dot_density.html @@ -17,7 +17,7 @@Convert polygon geometry to dots for dot-density mapping
-+as_dot_density( - input_data, - value, - values_per_dot, - group = NULL, - erase_water = FALSE, - area_threshold = NULL, - water_year = 2020 -)
as_dot_density( + input_data, + value, + values_per_dot, + group = NULL, + erase_water = FALSE, + area_threshold = NULL, + water_year = 2020 +)
@@ -136,51 +136,51 @@@@ -89,7 +89,7 @@Details
diff --git a/docs/reference/census_api_key.html b/docs/reference/census_api_key.html index 8f77715..0fc94cd 100644 --- a/docs/reference/census_api_key.html +++ b/docs/reference/census_api_key.html @@ -21,7 +21,7 @@Examples
-if (FALSE) { - -library(tidycensus) -library(ggplot2) - -# Identify variables for mapping -race_vars <- c( - Hispanic = "P2_002N", - White = "P2_005N", - Black = "P2_006N", - Asian = "P2_008N" -) - -# Get data from tidycensus -baltimore_race <- get_decennial( - geography = "tract", - variables = race_vars, - state = "MD", - county = "Baltimore city", - geometry = TRUE, - year = 2020 -) - -# Convert data to dots -baltimore_dots <- as_dot_density( - baltimore_race, - value = "value", - values_per_dot = 100, - group = "variable" -) - -# Use one set of polygon geometries as a base layer -baltimore_base <- baltimore_race[baltimore_race$variable == "Hispanic", ] - -# Map with ggplot2 -ggplot() + - geom_sf(data = baltimore_base, - fill = "white", - color = "grey") + - geom_sf(data = baltimore_dots, - aes(color = variable), - size = 0.01) + - theme_void() - -} +
if (FALSE) { + +library(tidycensus) +library(ggplot2) + +# Identify variables for mapping +race_vars <- c( + Hispanic = "P2_002N", + White = "P2_005N", + Black = "P2_006N", + Asian = "P2_008N" +) + +# Get data from tidycensus +baltimore_race <- get_decennial( + geography = "tract", + variables = race_vars, + state = "MD", + county = "Baltimore city", + geometry = TRUE, + year = 2020 +) + +# Convert data to dots +baltimore_dots <- as_dot_density( + baltimore_race, + value = "value", + values_per_dot = 100, + group = "variable" +) + +# Use one set of polygon geometries as a base layer +baltimore_base <- baltimore_race[baltimore_race$variable == "Hispanic", ] + +# Map with ggplot2 +ggplot() + + geom_sf(data = baltimore_base, + fill = "white", + color = "grey") + + geom_sf(data = baltimore_dots, + aes(color = variable), + size = 0.01) + + theme_void() + +}
Install a CENSUS API Key in Your
.Renviron
File for Repeated Us-+census_api_key(key, overwrite = FALSE, install = FALSE)
census_api_key(key, overwrite = FALSE, install = FALSE)
@@ -109,23 +109,23 @@@@ -81,7 +81,7 @@Arguments
diff --git a/docs/reference/check_ddhca_groups.html b/docs/reference/check_ddhca_groups.html index 7f87062..b69b310 100644 --- a/docs/reference/check_ddhca_groups.html +++ b/docs/reference/check_ddhca_groups.html @@ -17,7 +17,7 @@Examples
--if (FALSE) { -census_api_key("111111abc", install = TRUE) -# First time, reload your environment so you can use the key without restarting R. -readRenviron("~/.Renviron") -# You can check it with: -Sys.getenv("CENSUS_API_KEY") -} - -if (FALSE) { -# If you need to overwrite an existing key: -census_api_key("111111abc", overwrite = TRUE, install = TRUE) -# First time, relead your environment so you can use the key without restarting R. -readRenviron("~/.Renviron") -# You can check it with: -Sys.getenv("CENSUS_API_KEY") -} +
+if (FALSE) { +census_api_key("111111abc", install = TRUE) +# First time, reload your environment so you can use the key without restarting R. +readRenviron("~/.Renviron") +# You can check it with: +Sys.getenv("CENSUS_API_KEY") +} + +if (FALSE) { +# If you need to overwrite an existing key: +census_api_key("111111abc", overwrite = TRUE, install = TRUE) +# First time, relead your environment so you can use the key without restarting R. +readRenviron("~/.Renviron") +# You can check it with: +Sys.getenv("CENSUS_API_KEY") +}
Check to see if a given geography / population group combination is availabl
-+check_ddhca_groups(geography, pop_group, state = NULL, county = NULL)
check_ddhca_groups(geography, pop_group, state = NULL, county = NULL)
diff --git a/docs/reference/county_laea.html b/docs/reference/county_laea.html index 5dc261c..576b449 100644 --- a/docs/reference/county_laea.html +++ b/docs/reference/county_laea.html @@ -18,7 +18,7 @@@@ -83,9 +83,9 @@County geometry with Alaska and Hawaii shifted and re-scaled
diff --git a/docs/reference/fips_codes.html b/docs/reference/fips_codes.html index dcb9c42..b959846 100644 --- a/docs/reference/fips_codes.html +++ b/docs/reference/fips_codes.html @@ -26,7 +26,7 @@@@ -96,7 +96,7 @@Dataset with FIPS codes for US states and counties
diff --git a/docs/reference/get_acs.html b/docs/reference/get_acs.html index 3067fc7..f4b3d29 100644 --- a/docs/reference/get_acs.html +++ b/docs/reference/get_acs.html @@ -17,7 +17,7 @@@@ -81,26 +81,26 @@Obtain data and feature geometry for the American Community Survey
-+get_acs( - geography, - variables = NULL, - table = NULL, - cache_table = FALSE, - year = 2021, - output = "tidy", - state = NULL, - county = NULL, - zcta = NULL, - geometry = FALSE, - keep_geo_vars = FALSE, - shift_geo = FALSE, - summary_var = NULL, - key = NULL, - moe_level = 90, - survey = "acs5", - show_call = FALSE, - ... -)
get_acs( + geography, + variables = NULL, + table = NULL, + cache_table = FALSE, + year = 2022, + output = "tidy", + state = NULL, + county = NULL, + zcta = NULL, + geometry = FALSE, + keep_geo_vars = FALSE, + shift_geo = FALSE, + summary_var = NULL, + key = NULL, + moe_level = 90, + survey = "acs5", + show_call = FALSE, + ... +)
@@ -131,8 +131,8 @@@@ -81,26 +81,26 @@Arguments
year +available from 2009 through 2022; 1-year ACS data is available from 2005 +through 2022, with the exception of 2020. Defaults to 2022. The year, or endyear, of the ACS sample. 5-year ACS data is -available from 2009 through 2021; 1-year ACS data is available from 2005 -through 2021, with the exception of 2020. Defaults to 2021.
output @@ -219,35 +219,35 @@Value
diff --git a/docs/reference/get_decennial.html b/docs/reference/get_decennial.html index a902b96..36d9db3 100644 --- a/docs/reference/get_decennial.html +++ b/docs/reference/get_decennial.html @@ -17,7 +17,7 @@Examples
-if (FALSE) { -library(tidycensus) -library(tidyverse) -library(viridis) -census_api_key("YOUR KEY GOES HERE") - -tarr <- get_acs(geography = "tract", variables = "B19013_001", - state = "TX", county = "Tarrant", geometry = TRUE, year = 2020) - -ggplot(tarr, aes(fill = estimate, color = estimate)) + - geom_sf() + - coord_sf(crs = 26914) + - scale_fill_viridis(option = "magma") + - scale_color_viridis(option = "magma") - - -vt <- get_acs(geography = "county", variables = "B19013_001", state = "VT", year = 2019) - -vt %>% -mutate(NAME = gsub(" County, Vermont", "", NAME)) %>% - ggplot(aes(x = estimate, y = reorder(NAME, estimate))) + - geom_errorbar(aes(xmin = estimate - moe, xmax = estimate + moe), width = 0.3, size = 0.5) + - geom_point(color = "red", size = 3) + - labs(title = "Household income by county in Vermont", - subtitle = "2015-2019 American Community Survey", - y = "", - x = "ACS estimate (bars represent margin of error)") - -} +
if (FALSE) { +library(tidycensus) +library(tidyverse) +library(viridis) +census_api_key("YOUR KEY GOES HERE") + +tarr <- get_acs(geography = "tract", variables = "B19013_001", + state = "TX", county = "Tarrant", geometry = TRUE, year = 2020) + +ggplot(tarr, aes(fill = estimate, color = estimate)) + + geom_sf() + + coord_sf(crs = 26914) + + scale_fill_viridis(option = "magma") + + scale_color_viridis(option = "magma") + + +vt <- get_acs(geography = "county", variables = "B19013_001", state = "VT", year = 2019) + +vt %>% +mutate(NAME = gsub(" County, Vermont", "", NAME)) %>% + ggplot(aes(x = estimate, y = reorder(NAME, estimate))) + + geom_errorbar(aes(xmin = estimate - moe, xmax = estimate + moe), width = 0.3, size = 0.5) + + geom_point(color = "red", size = 3) + + labs(title = "Household income by county in Vermont", + subtitle = "2015-2019 American Community Survey", + y = "", + x = "ACS estimate (bars represent margin of error)") + +}
Obtain data and feature geometry for the decennial US Census
-+get_decennial( - geography, - variables = NULL, - table = NULL, - cache_table = FALSE, - year = 2020, - sumfile = NULL, - state = NULL, - county = NULL, - geometry = FALSE, - output = "tidy", - keep_geo_vars = FALSE, - shift_geo = FALSE, - summary_var = NULL, - pop_group = NULL, - pop_group_label = FALSE, - key = NULL, - show_call = FALSE, - ... -)
get_decennial( + geography, + variables = NULL, + table = NULL, + cache_table = FALSE, + year = 2020, + sumfile = NULL, + state = NULL, + county = NULL, + geometry = FALSE, + output = "tidy", + keep_geo_vars = FALSE, + shift_geo = FALSE, + summary_var = NULL, + pop_group = NULL, + pop_group_label = FALSE, + key = NULL, + show_call = FALSE, + ... +)
@@ -213,24 +213,24 @@@@ -81,27 +81,27 @@Value
diff --git a/docs/reference/get_estimates.html b/docs/reference/get_estimates.html index dd6ebee..647f222 100644 --- a/docs/reference/get_estimates.html +++ b/docs/reference/get_estimates.html @@ -17,7 +17,7 @@Examples
-if (FALSE) { -# Plot of race/ethnicity by county in Illinois for 2010 -library(tidycensus) -library(tidyverse) -library(viridis) -census_api_key("YOUR KEY GOES HERE") -vars10 <- c("P005003", "P005004", "P005006", "P004003") - -il <- get_decennial(geography = "county", variables = vars10, year = 2010, - summary_var = "P001001", state = "IL", geometry = TRUE) %>% - mutate(pct = 100 * (value / summary_value)) - -ggplot(il, aes(fill = pct, color = pct)) + - geom_sf() + - facet_wrap(~variable) - - -} +
if (FALSE) { +# Plot of race/ethnicity by county in Illinois for 2010 +library(tidycensus) +library(tidyverse) +library(viridis) +census_api_key("YOUR KEY GOES HERE") +vars10 <- c("P005003", "P005004", "P005006", "P004003") + +il <- get_decennial(geography = "county", variables = vars10, year = 2010, + summary_var = "P001001", state = "IL", geometry = TRUE) %>% + mutate(pct = 100 * (value / summary_value)) + +ggplot(il, aes(fill = pct, color = pct)) + + geom_sf() + + facet_wrap(~variable) + + +}
Get data from the US Census Bureau Population Estimates Program
-+get_estimates( - geography = c("us", "region", "division", "state", "county", "county subdivision", - "place/balance (or part)", "place", "consolidated city", "place (or part)", - "metropolitan statistical area/micropolitan statistical area", "cbsa", - "metropolitan division", "combined statistical area"), - product = NULL, - variables = NULL, - breakdown = NULL, - breakdown_labels = FALSE, - year = 2022, - state = NULL, - county = NULL, - time_series = FALSE, - output = "tidy", - geometry = FALSE, - keep_geo_vars = FALSE, - shift_geo = FALSE, - key = NULL, - show_call = FALSE, - ... -)
get_estimates( + geography = c("us", "region", "division", "state", "county", "county subdivision", + "place/balance (or part)", "place", "consolidated city", "place (or part)", + "metropolitan statistical area/micropolitan statistical area", "cbsa", + "metropolitan division", "combined statistical area"), + product = NULL, + variables = NULL, + breakdown = NULL, + breakdown_labels = FALSE, + year = 2022, + state = NULL, + county = NULL, + time_series = FALSE, + output = "tidy", + geometry = FALSE, + keep_geo_vars = FALSE, + shift_geo = FALSE, + key = NULL, + show_call = FALSE, + ... +)
diff --git a/docs/reference/get_flows.html b/docs/reference/get_flows.html index 709559b..14e5cbc 100644 --- a/docs/reference/get_flows.html +++ b/docs/reference/get_flows.html @@ -20,7 +20,7 @@@@ -86,21 +86,21 @@Obtain data and feature geometry for American Community Survey Migration
-+get_flows( - geography, - variables = NULL, - breakdown = NULL, - breakdown_labels = FALSE, - year = 2018, - output = "tidy", - state = NULL, - county = NULL, - msa = NULL, - geometry = FALSE, - key = NULL, - moe_level = 90, - show_call = FALSE -)
get_flows( + geography, + variables = NULL, + breakdown = NULL, + breakdown_labels = FALSE, + year = 2018, + output = "tidy", + state = NULL, + county = NULL, + msa = NULL, + geometry = FALSE, + key = NULL, + moe_level = 90, + show_call = FALSE +)
@@ -206,31 +206,31 @@@@ -81,7 +81,7 @@Value
diff --git a/docs/reference/get_pop_groups.html b/docs/reference/get_pop_groups.html index 62070dd..8ce9ecc 100644 --- a/docs/reference/get_pop_groups.html +++ b/docs/reference/get_pop_groups.html @@ -17,7 +17,7 @@Examples
-if (FALSE) { -get_flows( - geography = "county", - state = "VT", - county = c("Washington", "Chittenden") - ) - -get_flows( - geography = "county subdivision", - breakdown = "RACE", - breakdown_labels = TRUE, - state = "NY", - county = "Westchester", - output = "wide", - year = 2015 - ) - -get_flows( - geography = "metropolitan statistical area", - variables = c("POP1YR", "POP1YRAGO"), - geometry = TRUE, - output = "wide", - show_call = TRUE - ) -} +
if (FALSE) { +get_flows( + geography = "county", + state = "VT", + county = c("Washington", "Chittenden") + ) + +get_flows( + geography = "county subdivision", + breakdown = "RACE", + breakdown_labels = TRUE, + state = "NY", + county = "Westchester", + output = "wide", + year = 2015 + ) + +get_flows( + geography = "metropolitan statistical area", + variables = c("POP1YR", "POP1YRAGO"), + geometry = TRUE, + output = "wide", + show_call = TRUE + ) +}
Get available population groups for a given Decennial Census year and summar
-+get_pop_groups(year, sumfile)
get_pop_groups(year, sumfile)
diff --git a/docs/reference/get_pums.html b/docs/reference/get_pums.html index bfb7780..28598df 100644 --- a/docs/reference/get_pums.html +++ b/docs/reference/get_pums.html @@ -17,7 +17,7 @@@@ -81,19 +81,19 @@Load data from the American Community Survey Public Use Microdata Series API
-+get_pums( - variables = NULL, - state = NULL, - puma = NULL, - year = 2021, - survey = "acs5", - variables_filter = NULL, - rep_weights = NULL, - recode = FALSE, - return_vacant = FALSE, - show_call = FALSE, - key = NULL -)
get_pums( + variables = NULL, + state = NULL, + puma = NULL, + year = 2022, + survey = "acs5", + variables_filter = NULL, + rep_weights = NULL, + recode = FALSE, + return_vacant = FALSE, + show_call = FALSE, + key = NULL +)
@@ -116,7 +116,7 @@diff --git a/docs/reference/interpolate_pw.html b/docs/reference/interpolate_pw.html index 34947c2..3bb64e0 100644 --- a/docs/reference/interpolate_pw.html +++ b/docs/reference/interpolate_pw.html @@ -17,7 +17,7 @@ @@ -81,16 +81,16 @@Arguments
year @@ -143,7 +143,7 @@ The data year of the 1-year ACS sample or the endyear of the -5-year sample. Defaults to 2020. Please note that 1-year data for 2020 is not available +5-year sample. Defaults to 2022. Please note that 1-year data for 2020 is not available in tidycensus, so users requesting 1-year data should supply a different year.
Arguments
recode +Available for 2017 - 2022 data. Defaults to FALSE. If TRUE, recodes variable values using Census data dictionary and creates a new
*_label
column for each variable that is recoded. -Available for 2017 - 2021 data. Defaults to FALSE.return_vacant @@ -175,13 +175,13 @@Value
diff --git a/docs/reference/index.html b/docs/reference/index.html index a750546..42fce35 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@Examples
-if (FALSE) { -get_pums(variables = "AGEP", state = "VT") -get_pums(variables = "AGEP", state = "multiple", puma = c("UT" = 35008, "NV" = 00403)) -get_pums(variables = c("AGEP", "ANC1P"), state = "VT", recode = TRUE) -get_pums(variables = "AGEP", state = "VT", survey = "acs1", rep_weights = "person") -} - +
if (FALSE) { +get_pums(variables = "AGEP", state = "VT") +get_pums(variables = "AGEP", state = "multiple", puma = c("UT" = 35008, "NV" = 00403)) +get_pums(variables = c("AGEP", "ANC1P"), state = "VT", recode = TRUE) +get_pums(variables = "AGEP", state = "VT", survey = "acs1", rep_weights = "person") +} +
Use population-weighted interpolation to transfer information from one set o
-+interpolate_pw( - from, - to, - to_id = NULL, - extensive, - weights, - weight_column = NULL, - weight_placement = c("surface", "centroid"), - crs = NULL -)
interpolate_pw( + from, + to, + to_id = NULL, + extensive, + weights, + weight_column = NULL, + weight_placement = c("surface", "centroid"), + crs = NULL +)
@@ -142,50 +142,50 @@@@ -81,19 +81,19 @@Details
diff --git a/docs/reference/load_variables.html b/docs/reference/load_variables.html index 7334b31..391b4b1 100644 --- a/docs/reference/load_variables.html +++ b/docs/reference/load_variables.html @@ -17,7 +17,7 @@Examples
-if (FALSE) { -# Example: interpolating work-from-home from 2011-2015 ACS -# to 2020 shapes -library(tidycensus) -library(tidyverse) -library(tigris) -options(tigris_use_cache = TRUE) - -wfh_15 <- get_acs( - geography = "tract", - variables = "B08006_017", - year = 2015, - state = "AZ", - county = "Maricopa", - geometry = TRUE -) %>% -select(estimate) - -wfh_20 <- get_acs( - geography = "tract", - variables = "B08006_017", - year = 2020, - state = "AZ", - county = "Maricopa", - geometry = TRUE - ) - -maricopa_blocks <- blocks( - "AZ", - "Maricopa", - year = 2020 -) - -wfh_15_to_20 <- interpolate_pw( - from = wfh_15, - to = wfh_20, - to_id = "GEOID", - weights = maricopa_blocks, - weight_column = "POP20", - crs = 26949, - extensive = TRUE -) - -} +
if (FALSE) { +# Example: interpolating work-from-home from 2011-2015 ACS +# to 2020 shapes +library(tidycensus) +library(tidyverse) +library(tigris) +options(tigris_use_cache = TRUE) + +wfh_15 <- get_acs( + geography = "tract", + variables = "B08006_017", + year = 2015, + state = "AZ", + county = "Maricopa", + geometry = TRUE +) %>% +select(estimate) + +wfh_20 <- get_acs( + geography = "tract", + variables = "B08006_017", + year = 2020, + state = "AZ", + county = "Maricopa", + geometry = TRUE + ) + +maricopa_blocks <- blocks( + "AZ", + "Maricopa", + year = 2020 +) + +wfh_15_to_20 <- interpolate_pw( + from = wfh_15, + to = wfh_20, + to_id = "GEOID", + weights = maricopa_blocks, + weight_column = "POP20", + crs = 26949, + extensive = TRUE +) + +}
Load variables from a decennial Census or American Community Survey dataset
-+load_variables( - year, - dataset = c("sf1", "sf2", "sf3", "sf4", "pl", "dhc", "dp", "ddhca", "as", "gu", "mp", - "vi", "acsse", "dpas", "dpgu", "dpmp", "dpvi", "dhcvi", "dhcgu", "dhcvi", "dhcas", - "acs1", "acs3", "acs5", "acs1/profile", "acs3/profile", "acs5/profile", - "acs1/subject", "acs3/subject", "acs5/subject", "acs1/cprofile", "acs5/cprofile", - "sf2profile", "sf3profile", "sf4profile", "aian", "aianprofile", "cd110h", "cd110s", - "cd110hprofile", "cd110sprofile", "sldh", "slds", "sldhprofile", "sldsprofile", - "cqr", "cd113", "cd113profile", - "cd115", "cd115profile", "cd116", "plnat", - "cd118"), - cache = FALSE -)
load_variables( + year, + dataset = c("sf1", "sf2", "sf3", "sf4", "pl", "dhc", "dp", "ddhca", "as", "gu", "mp", + "vi", "acsse", "dpas", "dpgu", "dpmp", "dpvi", "dhcvi", "dhcgu", "dhcvi", "dhcas", + "acs1", "acs3", "acs5", "acs1/profile", "acs3/profile", "acs5/profile", + "acs1/subject", "acs3/subject", "acs5/subject", "acs1/cprofile", "acs5/cprofile", + "sf2profile", "sf3profile", "sf4profile", "aian", "aianprofile", "cd110h", "cd110s", + "cd110hprofile", "cd110sprofile", "sldh", "slds", "sldhprofile", "sldsprofile", + "cqr", "cd113", "cd113profile", + "cd115", "cd115profile", "cd116", "plnat", + "cd118"), + cache = FALSE +)
@@ -140,10 +140,10 @@@@ -92,7 +92,7 @@Details
diff --git a/docs/reference/mig_recodes.html b/docs/reference/mig_recodes.html index ae3b9da..a214cd5 100644 --- a/docs/reference/mig_recodes.html +++ b/docs/reference/mig_recodes.html @@ -24,7 +24,7 @@Dataset with Migration Flows characteristic recodes
diff --git a/docs/reference/moe_product.html b/docs/reference/moe_product.html index d17a8f8..bdb56f4 100644 --- a/docs/reference/moe_product.html +++ b/docs/reference/moe_product.html @@ -17,7 +17,7 @@@@ -81,7 +81,7 @@Calculate the margin of error for a derived product
-+moe_product(est1, est2, moe1, moe2)
moe_product(est1, est2, moe1, moe2)
diff --git a/docs/reference/moe_prop.html b/docs/reference/moe_prop.html index b5dfa40..993473b 100644 --- a/docs/reference/moe_prop.html +++ b/docs/reference/moe_prop.html @@ -17,7 +17,7 @@@@ -81,7 +81,7 @@Calculate the margin of error for a derived proportion
-+moe_prop(num, denom, moe_num, moe_denom)
moe_prop(num, denom, moe_num, moe_denom)
diff --git a/docs/reference/moe_ratio.html b/docs/reference/moe_ratio.html index 70efae2..6a1e401 100644 --- a/docs/reference/moe_ratio.html +++ b/docs/reference/moe_ratio.html @@ -17,7 +17,7 @@@@ -81,7 +81,7 @@Calculate the margin of error for a derived ratio
-+moe_ratio(num, denom, moe_num, moe_denom)
moe_ratio(num, denom, moe_num, moe_denom)
diff --git a/docs/reference/moe_sum.html b/docs/reference/moe_sum.html index ce43146..8bf7dec 100644 --- a/docs/reference/moe_sum.html +++ b/docs/reference/moe_sum.html @@ -17,7 +17,7 @@@@ -81,7 +81,7 @@Calculate the margin of error for a derived sum
-+moe_sum(moe, estimate = NULL, na.rm = FALSE)
moe_sum(moe, estimate = NULL, na.rm = FALSE)
diff --git a/docs/reference/pums_variables.html b/docs/reference/pums_variables.html index a44ecab..1591c7a 100644 --- a/docs/reference/pums_variables.html +++ b/docs/reference/pums_variables.html @@ -33,7 +33,7 @@@@ -110,12 +110,12 @@Dataset with PUMS variables and codes
Format
-An object of class
+tbl_df
(inherits fromtbl
,data.frame
) with 47803 rows and 12 columns.An object of class
tbl_df
(inherits fromtbl
,data.frame
) with 58576 rows and 12 columns.@@ -81,7 +81,7 @@Details
diff --git a/docs/reference/significance.html b/docs/reference/significance.html index 30c701e..2d316b9 100644 --- a/docs/reference/significance.html +++ b/docs/reference/significance.html @@ -17,7 +17,7 @@Evaluate whether the difference in two estimates is statistically significan
-+significance(est1, est2, moe1, moe2, clevel = 0.9)
significance(est1, est2, moe1, moe2, clevel = 0.9)
diff --git a/docs/reference/state_laea.html b/docs/reference/state_laea.html index c7056d3..fdcc53c 100644 --- a/docs/reference/state_laea.html +++ b/docs/reference/state_laea.html @@ -18,7 +18,7 @@@@ -83,9 +83,9 @@State geometry with Alaska and Hawaii shifted and re-scaled
diff --git a/docs/reference/summary_files.html b/docs/reference/summary_files.html index 6a9eda0..ba7e98e 100644 --- a/docs/reference/summary_files.html +++ b/docs/reference/summary_files.html @@ -17,7 +17,7 @@@@ -81,7 +81,7 @@Identify summary files for a given decennial Census year
-+summary_files(year)
summary_files(year)
diff --git a/docs/reference/tidycensus.html b/docs/reference/tidycensus.html index 092f036..aca9086 100644 --- a/docs/reference/tidycensus.html +++ b/docs/reference/tidycensus.html @@ -17,7 +17,7 @@diff --git a/docs/reference/to_survey.html b/docs/reference/to_survey.html index 28dd7bc..5c2305b 100644 --- a/docs/reference/to_survey.html +++ b/docs/reference/to_survey.html @@ -22,7 +22,7 @@ @@ -91,12 +91,12 @@Convert a data frame returned by get_pums() to a survey object
- +@@ -118,7 +118,7 @@Arguments
design +Currently the only option is The survey design to use when creating a survey object. -Currently the only option is code"rep_weights"/.
"rep_weights"
.