diff --git a/.Rhistory b/.Rhistory index bd7215c..302ab0f 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,291 +1,3 @@ -} -if (province) { -if (todialect != "none") { -if (todialect != "dia_super") { -stop("Invalid input: please choose a valid converting transformation.") -} -} else { -if (!( -convert_to %in% c( -"name", -"code", -"area", -"nameToabbre", -"codeToabbre", -"abbreToname", -"abbreTocode", -"abbreToarea" -) -)) { -stop("Invalid input: please choose a valid converting method.") -} -} -} else if (todialect != "none") { -if (!(todialect %in% c("dia_group", "dia_sub_group"))) { -stop("Invalid input: please choose a valid converting transformation.") -} -} else { -if (!(convert_to %in% c("name", "code", "area"))) { -stop("Invalid input: please choose a valid converting method.") -} -} -if (!(incompleteName %in% c("none", "from", "to", "both"))) { -stop( -"Invalid input: the options of `incompleteName` are one of 'none', 'from', 'to', and 'both'." -) -} -if (!(incompleteName == "to") & data_input == "code") { -stop("Invalid input: can not complete administrative codes.") -} -if (!is.logical(province)) { -stop("Invalid input: param `zhixiashi` must be logical class.") -} -if (!is.character(todialect)) { -stop("Invalid input: param `todialect` must be character class.") -} -# if (language_zone & !grepl('_name', data_input, fixed = TRUE)) -# stop( -# 'Invalid input: current version is not supported sname or code as language_zone input.' -# ) -if (!is.logical(zhixiashi)) { -stop("Invalid input: param `zhixiashi` must be logical class.") -} -if (!is.logical(topinyin)) { -stop("Invalid input: param `topinyin` must be logical class.") -} -if (topinyin & convert_to == "code" & todialect == FALSE) { -stop("Invalid input: can not translate administrative codes to pinyin.") -} -if (province) { -# 1 Section of province-level converting -if (todialect != "none") { -# 1-1 If convert language zone -if (is.numeric(data_input)) { -year_from <- "prov_code" -} -if (is.character(data_input)) { -year_from <- "prov_name" -} -ls_index <- switch(todialect, -"dia_super" = { -year_to <- "prov_language" -c(year_from, year_to) -} -) -} else { -# 1-2 If not convert language zone -prov_table <- region_table %>% -select(prov_code:`1999_nickname`) %>% -distinct() -# Because province nicknames changed in 1999 -year_from <- ifelse(year_from < 1999, 1998, 1999) -year_to <- ifelse(year_to < 1999, 1998, 1999) -ls_index <- switch(convert_to, -"name" = { -year_to <- "prov_name" -c(year_from, year_to) -}, -"code" = { -year_to <- "prov_code" -c(year_from, year_to) -}, -"area" = { -year_to <- "area" -c(year_from, year_to) -}, -"nameToabbre" = { -year_from <- "prov_name" -year_to <- paste0(year_to, "_nickname") -c(year_from, year_to) -}, -"codeToabbre" = { -year_from <- "prov_code" -year_to <- paste0(year_to, "_nickname") -c(year_from, year_to) -}, -"abbreToname" = { -year_from <- paste0(year_to, "_nickname") -year_to <- "prov_name" -c(year_from, year_to) -}, -"abbreTocode" = { -year_from <- paste0(year_to, "_nickname") -year_to <- "prov_code" -c(year_from, year_to) -}, -"abbreToarea" = { -year_from <- paste0(year_from, "_nickname") -year_to <- "area" -c(year_from, year_to) -} -) -} -} else { -# 2 Section of prefectural-level converting -if (todialect != "none") { -# 2-1 If convert language zone -if (is.numeric(data_input)) { -year_from <- paste0(year_from, "_code") -} -if (is.character(data_input)) { -year_from <- paste0(year_from, "_name") -} -ls_index <- switch(todialect, -"dia_group" = { -year_to <- "pref_language" -c(year_from, year_to) -}, -"dia_sub_group" = { -year_to <- "dia_sub_language" -c(year_from, year_to) -} -) -} else { -# 2-2 If not convert language zone -if (is.numeric(data_input)) { -year_from <- paste0(year_from, "_code") -} -if (is.character(data_input)) { -year_from <- paste0(year_from, "_name") -} -ls_index <- switch(convert_to, -"code" = { -year_to <- paste0(year_to, "_code") -c(year_from, year_to) -}, -"area" = { -year_to <- "area" -c(year_from, year_to) -}, -"name" = { -year_to <- paste0(year_to, "_name") -c(year_from, year_to) -} -) -# Using the Municipal codes for within region codes -if (zhixiashi) { -region_zhixiashi <- region_table %>% -filter(zhixiashi) -region_sname <- region_zhixiashi %>% -select(ends_with("_sname")) -region_name <- region_zhixiashi %>% -select(ends_with("_name")) -region_code <- region_zhixiashi %>% -select(ends_with("_code")) -# replacing the prefectural names and codes with provincial names and codes -region_sname2 <- -replicate(ncol(region_sname), region_zhixiashi$prov_name) %>% -as.data.frame() -names(region_sname2) <- names(region_sname) -region_name2 <- -replicate(ncol(region_name), region_zhixiashi$prov_name) %>% -as.data.frame() -names(region_name2) <- names(region_name) -region_code2 <- -replicate(ncol(region_code), region_zhixiashi$prov_code) %>% -as.data.frame() -names(region_code2) <- names(region_code) -region_zhixiashi <- -bind_cols(region_sname2, region_name2, region_code2) -region_zhixiashi <- -region_zhixiashi[, order(colnames(region_zhixiashi))] -region_province <- region_table %>% -filter(!zhixiashi) -region_province <- -region_province[, order(colnames(region_province))] -region_table <- bind_rows(region_zhixiashi, region_province) -} -} -} -# When using sname instead of the official name -ls_index <- case_when( -incompleteName == "both" ~ gsub("_name", "_sname", ls_index), -incompleteName == "from" ~ c(gsub("_name", "_sname", ls_index[1]), ls_index[2]), -incompleteName == "to" ~ c(ls_index[1], gsub("_name", "_sname", ls_index[2])), -incompleteName == "none" ~ ls_index -) -# Updating the year_from/to after the evaluation of `incompleteName` -if (incompleteName != "none") { -year_from <- ls_index[1] -year_to <- ls_index[2] -} -# Convert the input to a data.frame for later merging -df_input <- data_input %>% as.data.frame() -names(df_input) <- ls_index[1] -data_output <- -select(region_table, !!ls_index) %>% -distinct() %>% -left_join(df_input, .) %>% -# using left_join to keep the order of the input data -pull(!!year_to) -# Because '2pinyin' can not be used as a variable name -if (topinyin) { -if (is.character(data_output)) { -data_output <- -py( -char = data_output, -dic = pydic(convert_to = "toneless", dic = "pinyin2") -) -} -} -return(data_output) -} -# after conversion. It's 1999 version -regioncode(data_input = corruption$prefecture_id, -year_from = 2019, -year_to = 1999, -convert_to = "area") -# after conversion. It's 1999 version -regioncode(data_input = corruption$prefecture_id, -year_from = 2019, -year_to = 1999) -regioncode(data_input = corruption$prefecture, -year_from = 2019, -year_to = 1999, -province = F, -convert_to="area", -topinyin=TRUE -) -library(regioncode) -knitr::opts_chunk$set(message = FALSE, warning = FALSE) -library(regioncode) -data("corruption") -# original geocodes. It's 2019 version -corruption$prefecture_id -# after conversion. It's 1999 version -regioncode(data_input = corruption$prefecture_id, -year_from = 2019, -year_to = 1999, -convert_to = "area") -library(regioncode) -knitr::opts_chunk$set(message = FALSE, warning = FALSE) -library(regioncode) -data("corruption") -# original geocodes. It's 2019 version -corruption$prefecture_id -# after conversion. It's 1999 version -regioncode(data_input = corruption$prefecture_id, -year_from = 2019, -year_to = 1999, -convert_to = "area") -# after conversion. It's 1999 version -regioncode(data_input = corruption$prefecture_id, -year_from = 2019, -year_to = 1999) -# original geocodes. It's 2019 version -corruption$prefecture_id -load("D:/Dropbox/Seafile/WW/01_Instruction/Research/R packages/regioncode/R/sysdata.rda") -View(region_table) -View(region_table) -library(regioncode) -data("corruption") -# Original 2019 version -corruption$prefecture_id -# 1999 version -regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set -year_from = 2019, -year_to = 1999) # 1999 version regioncode(data_input = corruption$prefecture_id, convert_to = "code", # default set @@ -510,3 +222,291 @@ year_from = 2019, year_to = 1989) View(region_data) usethis::git_vaccinate() #Adds .DS_Store, .Rproj.user, .Rdata, .Rhistory, and .httr-oauth to your global (a.k.a. user-level) .gitignore. This is good practice as it decreases the chance that you will accidentally leak credentials to GitHub. +library(regioncode) +knitr::opts_chunk$set(message = FALSE, warning = FALSE) +if(!require(regioncode)) install.packages("regioncode") +library(regioncode) +library(tidyverse) +library(regioncode) +data("corruption") +# Original 2019 version +corruption$prefecture_id +# 1989 version +temp <- regioncode(data_input = corruption$prefecture_id, +convert_to = "code", # default set +year_from = 2019, +year_to = 1989) +# tibble( +# code2019 = corruption$prefecture_id, +# code1989 = regioncode(data_input = corruption$prefecture_id, +# convert_to = "code", # default set +# year_from = 2019, +# year_to = 1989), +# name1989 = regioncode(data_input = corruption$prefecture_id, +# convert_to = "name", # default set +# year_from = 2019, +# year_to = 1989) +# ) +load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") +# Original 2019 version +corruption$prefecture_id +# 1989 version +temp <- regioncode(data_input = corruption$prefecture_id, +convert_to = "code", # default set +year_from = 2019, +year_to = 1989) +tibble( +code2019 = corruption$prefecture_id, +code1989 = regioncode(data_input = corruption$prefecture_id, +convert_to = "code", # default set +year_from = 2019, +year_to = 1989), +name1989 = regioncode(data_input = corruption$prefecture_id, +convert_to = "name", # default set +year_from = 2019, +year_to = 1989) +) +tibble( +code2019 = corruption$prefecture_id, +code1989 = regioncode(data_input = corruption$prefecture_id, +convert_to = "code", # default set +year_from = 2019, +year_to = 1989), +name2019 = regioncode(data_input = corruption$prefecture_id, +convert_to = "name", # default set +year_from = 2019, +year_to = 2019), +name1989 = regioncode(data_input = corruption$prefecture_id, +convert_to = "name", # default set +year_from = 2019, +year_to = 1989) +) +?regioncode +# Original full names +corruption$prefecture +# Convert to incomplete names in 1989 +fake_incomplete <- regioncode(data_input = corruption$prefecture, +convert_to = "name", +year_from = 2019, +year_to = 1989, +incomplete_name = "to") +fake_incomplete +# Convert to full names in 2008 +fake_full <- regioncode(data_input = fake_incomplete, +convert_to = "name", +year_from = 1989, +year_to = 2008, +incomplete_name = "to") +fake_full +names_municipality <- c("北京", "天津", "上海", "重庆") +names_municipality <- c("北京", "天津", "上海", "重庆") +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = FALSE) +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = TRUE) +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = TRUE) +names_municipality <- c("北京", "海淀区", "上海", "静安区") +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = FALSE) +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = TRUE) +library(reprex) +names_municipality <- c("北京", # Beijing, a municipality +"海淀区", # A district of Beijing +"上海", # Shanghai, a municipality +"静安区", # A district of Shanghai +"济南市") # A prefecture of Shandong +# When `zhixiashi` is FALSE, only the districts are recognized +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = FALSE) +# When `zhixiashi` is TRUE, muncipalities are +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = TRUE) +reprex( +{ +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = FALSE) +# When `zhixiashi` is TRUE, muncipalities are +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = TRUE) +} +) +reprex( +{ +names_municipality <- c("北京", # Beijing, a municipality +"海淀区", # A district of Beijing +"上海", # Shanghai, a municipality +"静安区", # A district of Shanghai +"济南市") # A prefecture of Shandong +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = FALSE) +# When `zhixiashi` is TRUE, muncipalities are +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = TRUE) +} +) +names_municipality <- c("北京", # Beijing, a municipality +"海淀区", # A district of Beijing +"上海", # Shanghai, a municipality +"静安区", # A district of Shanghai +"济南市") # A prefecture of Shandong +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = FALSE) +reprex( +{ +library(regioncode) +names_municipality <- c("北京", # Beijing, a municipality +"海淀区", # A district of Beijing +"上海", # Shanghai, a municipality +"静安区", # A district of Shanghai +"济南市") # A prefecture of Shandong +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = FALSE) +# When `zhixiashi` is TRUE, muncipalities are +regioncode(data_input = names_municipality, +year_from = 2019, +year_to = 2019, +convert_to = "code", +zhixiashi = TRUE) +} +) +?reprex +tibble( +province = corruption$province_id, +prov_name = regioncode(data_input = corruption$province_id, +convert_to = "name", +year_from = 2019, +year_to = 1989, +province = TRUE), +prov_abbre = regioncode(data_input = corruption$province_id, +convert_to = "codeToabbre", +year_from = 2019, +year_to = 1989, +province = TRUE) +) +source("~/.active-rstudio-document", echo=TRUE) +tibble( +province = corruption$province_id, +prov_name = regioncode(data_input = corruption$province_id, +convert_to = "name", +year_from = 2019, +year_to = 2019, +province = TRUE), +prov_abbre = regioncode(data_input = corruption$province_id, +convert_to = "codeToabbre", +year_from = 2019, +year_to = 1989, +province = TRUE) +) +reprex( +{ +library(regioncode) +regioncode(data_input = corruption$province_id, +convert_to = "name", +year_from = 2019, +year_to = 1989, +province = TRUE) +} +) +regioncode(data_input = corruption$prefecture, +year_from = 2019, +year_to = 1989, +convert_to = "name", +to_pinyin = TRUE +) +regioncode(data_input = corruption$prefecture, +year_from = 2019, +year_to = 1989, +convert_to = "name", +incomplete_name = "to", +to_pinyin = TRUE +) +regioncode(data_input = corruption$prefecture, +year_from = 2019, +year_to = 1989, +convert_to = "area", +to_pinyin = TRUE +) +regioncode(data_input = c("山西", "陕西", "内蒙古", "香港"), +year_from = 2019, +year_to = 2008, +convert_to = "name", +incomplete_name = "from", +to_pinyin = TRUE +) +regioncode(data_input = c("山西省", "陕西省", "内蒙古自治区"), +year_from = 2019, +year_to = 2008, +convert_to = "name", +to_pinyin = TRUE +) +reprex( +{ +library(regioncode) +regioncode(data_input = c("山西", "陕西", "内蒙古", "香港"), +year_from = 2019, +year_to = 2008, +convert_to = "name", +incomplete_name = "from", +to_pinyin = TRUE +) +} +) +# Regions with special spelling +regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"), +year_from = 2019, +year_to = 2008, +convert_to = "name", +incomplete_name = "from", +province = TRUE, +to_pinyin = TRUE +) +# Regions with special spelling +regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"), +year_from = 2019, +year_to = 2008, +convert_to = "name", +incomplete_name = "both", +province = TRUE, +to_pinyin = TRUE +) diff --git a/vignettes/regioncode-vignette.R b/vignettes/regioncode-vignette.R index 16f39a8..226b2f3 100644 --- a/vignettes/regioncode-vignette.R +++ b/vignettes/regioncode-vignette.R @@ -2,6 +2,7 @@ knitr::opts_chunk$set(message = FALSE, warning = FALSE) if(!require(regioncode)) install.packages("regioncode") +library(regioncode) library(tidyverse) ## ----code2code---------------------------------------------------------------- @@ -9,33 +10,37 @@ library(regioncode) data("corruption") -# Original 2019 version -corruption$prefecture_id - -# 1989 version -temp <- regioncode(data_input = corruption$prefecture_id, - convert_to = "code", # default set +# Conversion to the 1989 version +regioncode(data_input = corruption$prefecture_id, + convert_to = "code", # default setting year_from = 2019, year_to = 1989) -# tibble( -# code2019 = corruption$prefecture_id, -# code1989 = regioncode(data_input = corruption$prefecture_id, -# convert_to = "code", # default set -# year_from = 2019, -# year_to = 1989), -# name1989 = regioncode(data_input = corruption$prefecture_id, -# convert_to = "name", # default set -# year_from = 2019, -# year_to = 1989) -# ) +# Comparison +tibble( + code2019 = corruption$prefecture_id, + code1989 = regioncode(data_input = corruption$prefecture_id, + convert_to = "code", # default setting + year_from = 2019, + year_to = 1989), + name2019 = regioncode(data_input = corruption$prefecture_id, + convert_to = "name", # default setting + year_from = 2019, + year_to = 2019), + name1989 = regioncode(data_input = corruption$prefecture_id, + convert_to = "name", # default setting + year_from = 2019, + year_to = 1989) +) ## ----code2name---------------------------------------------------------------- -# The original name -corruption$prefecture +# Original name +tibble( + id = corruption$prefecture_id, + name = corruption$prefecture +) # Codes to name - regioncode(data_input = corruption$prefecture_id, convert_to = "name", year_from = 2019, @@ -48,76 +53,130 @@ regioncode(data_input = corruption$prefecture, year_to = 2019) # Name to name of a different year - regioncode(data_input = corruption$prefecture, convert_to = "name", year_from = 2019, year_to = 1989) ## ----incomplete_name---------------------------------------------------------- -# Full, official names +# Original full names corruption$prefecture -regioncode(data_input = corruption$prefecture, +# Conversion to incomplete names in 1989 +fake_incomplete <- regioncode(data_input = corruption$prefecture, convert_to = "name", year_from = 2019, year_to = 1989, incomplete_name = "to") +fake_incomplete -## ----2area-------------------------------------------------------------------- -regioncode(data_input = corruption$prefecture, - year_from = 2019, - year_to = 1989, - convert_to="area") - -## ----language_zone------------------------------------------------------------ -regioncode(data_input = corruption$prefecture, +# Conversion to full names in 2008 +fake_full <- regioncode(data_input = fake_incomplete, + convert_to = "name", + year_from = 1989, + year_to = 2008, + incomplete_name = "from") +fake_full + +## ----municipality------------------------------------------------------------- +names_municipality <- c("北京市", # Beijing, a municipality + "海淀区", # A district of Beijing + "上海市", # Shanghai, a municipality + "静安区", # A district of Shanghai + "济南市") # A prefecture of Shandong + +# When `zhixiashi` is FALSE, only the districts are recognized +regioncode(data_input = names_municipality, year_from = 2019, - year_to = 1989, - to_dialect = "dia_group") + year_to = 2019, + convert_to = "code", + zhixiashi = FALSE) -regioncode(data_input = corruption$prefecture, +# When `zhixiashi` is TRUE, municipalities are recognized +regioncode(data_input = names_municipality, year_from = 2019, - year_to = 1989, - to_dialect = "dia_sub_group") + year_to = 2019, + convert_to = "code", + zhixiashi = TRUE) ## ----rank--------------------------------------------------------------------- -tidyr::tibble( - preference = corruption$prefecture, - rank = regioncode(data_input = corruption$prefecture, - year_from = 2011, +tibble( + city = corruption$prefecture, + rank1989 = regioncode(data_input = corruption$prefecture, + year_from = 2019, year_to = 1989, - convert_to="rank") + convert_to="rank"), + rank2014 = regioncode(data_input = corruption$prefecture, + year_from = 2019, + year_to = 2014, + convert_to = "rank") ) - ## ----pinyin------------------------------------------------------------------- -regioncode(data_input = corruption$prefecture, +tibble( + city = corruption$prefecture, + cityPY = regioncode(data_input = corruption$prefecture, year_from = 2019, year_to = 1989, - convert_to="name", - to_pinyin=TRUE - ) - -regioncode(data_input = corruption$prefecture, + convert_to = "name", + to_pinyin = TRUE + ), + cityIncomplete = regioncode(data_input = corruption$prefecture, year_from = 2019, year_to = 1989, - convert_to="name", + convert_to = "name", incomplete_name = "to", - to_pinyin=TRUE + to_pinyin = TRUE + ), + areaPY = regioncode(data_input = corruption$prefecture, + year_from = 2019, + year_to = 1989, + convert_to = "area", + to_pinyin = TRUE ) +) -regioncode(data_input = corruption$prefecture, +# Regions with special spelling +regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"), year_from = 2019, - year_to = 1989, - convert_to="area", - to_pinyin=TRUE + year_to = 2008, + convert_to = "name", + incomplete_name = "both", + province = TRUE, + to_pinyin = TRUE ) ## ----provinces---------------------------------------------------------------- -regioncode(data_input = corruption$province_id, +tibble( + province = corruption$province_id, + prov_name = regioncode(data_input = corruption$province_id, + convert_to = "name", + year_from = 2019, + year_to = 1989, + province = TRUE), + prov_abbre = regioncode(data_input = corruption$province_id, convert_to = "codeToabbre", year_from = 2019, year_to = 1989, province = TRUE) +) + +## ----2area-------------------------------------------------------------------- +regioncode(data_input = corruption$prefecture, + year_from = 2019, + year_to = 1989, + convert_to = "area") + +## ----language_zone------------------------------------------------------------ +tibble( + city = corruption$prefecture, + dialectGroup = regioncode(data_input = corruption$prefecture, + year_from = 2019, + year_to = 1989, + to_dialect = "dia_group"), + dialectSubGroup = regioncode(data_input = corruption$prefecture, + year_from = 2019, + year_to = 1989, + to_dialect = "dia_sub_group") +) diff --git a/vignettes/regioncode-vignette.html b/vignettes/regioncode-vignette.html index b9a5546..d81e67c 100644 --- a/vignettes/regioncode-vignette.html +++ b/vignettes/regioncode-vignette.html @@ -10,14 +10,11 @@ - - - - + - + -regioncode: Convert Region Names and Division Codes of China Over Years +regioncode: One-Step Solution for Chinese Region Conversions