Merge pull request #178 from ropensci/fix/hydat_path

Update version
ropensci · Aug 20, 2022 · 23f226e · 23f226e
2 parents f9ebbc4 + cc9eaad
commit 23f226e
Show file tree

Hide file tree

Showing 17 changed files with 383 additions and 335 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tidyhydat
 Title: Extract and Tidy Canadian 'Hydrometric' Data
-Version: 0.5.5
+Version: 0.5.6
 Authors@R: c(person("Sam", "Albers", email = "[email protected]", role = c("aut", "cre"),
     comment = c(ORCID = "0000-0002-9270-7884")),
     person("David", "Hutchinson", email = "[email protected]", role = "ctb"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# tidyhydat 0.5.6
+- fixed CRAN document issue
+- fixed bug created by HYDAT database name (#175)
+
 # tidyhydat 0.5.5
 
 ### MINOR IMPROVEMENTS

diff --git a/R/download.R b/R/download.R
@@ -28,7 +28,6 @@
 #'
 
 download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
-
   if(is.null(dl_hydat_here)){
     dl_hydat_here <- hy_dir()
   } else {
@@ -97,7 +96,7 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
 
 
   ## temporary path to save
-  tmp <- tempfile("hydat_")
+  tmp <- tempfile("hydat_", fileext = ".zip")
 
   ## Download the zip file
   res <- httr::GET(url, httr::write_disk(tmp), httr::progress("down"), 
@@ -110,6 +109,12 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
 
   utils::unzip(tmp, exdir = dl_hydat_here, overwrite = TRUE)
 
+  ## rename to consistent name
+  file.rename(
+    list.files(dl_hydat_here, pattern = "\\.sqlite3$", full.names = TRUE),
+    hydat_path
+  )
+
 
   if (file.exists(hydat_path)){
     congrats("HYDAT successfully downloaded")

diff --git a/cran-comments.md b/cran-comments.md
@@ -1,3 +1,25 @@
+tidyhydat 0.5.6
+=========================
+
+There were zero WARNINGS and zero ERRORS.
+
+There was one NOTE: 'Note: found 122 marked UTF-8 strings'. These strings are necessary for testing as the data source that this package accesses includes data with UTF-8 strings (french language accents)
+
+## NEWS
+- fixed CRAN document issue
+- fixed bug created by HYDAT database name (#175)
+
+## Test environments
+* win-builder (via `devtools::check_win_devel()` and `devtools::check_win_release()`)
+* local macOS, R 4.2.1 (via R CMD check --as-cran)
+* ubuntu-20.04, r: 'release' (github actions)
+* ubuntu-20.04, r: 'devel' (github actions)
+* macOS,        r: 'release' (github actions)
+* windows,      r: 'release' (github actions)
+* Fedora Linux, R-devel, clang, gfortran - r-hub
+* Debian Linux, R-release, GCC (debian-gcc-release) - r-hub
+* Windows Server 2008 R2 SP1, R-devel, 32/64 bit - r-hub
+
 tidyhydat 0.5.5
 =========================
 

diff --git a/data-raw/HYDAT_internal_data/allstations.csv b/data-raw/HYDAT_internal_data/allstations.csv
diff --git a/data/allstations.rda b/data/allstations.rda
diff --git a/data/hy_data_symbols.rda b/data/hy_data_symbols.rda
diff --git a/data/hy_data_types.rda b/data/hy_data_types.rda
diff --git a/inst/test_db/tinyhydat.sqlite3 b/inst/test_db/tinyhydat.sqlite3
diff --git a/vignettes/tidyhydat_an_introduction.Rmd b/vignettes/tidyhydat_an_introduction.Rmd
@@ -1,7 +1,7 @@
 ---
 title: "tidyhydat: An Introduction"
 author: "Sam Albers"
-date: "2022-03-17"
+date: "2022-08-19"
 output:
   html_vignette:
      keep_md: true
@@ -42,15 +42,15 @@ hy_daily_flows(station_number = "08LA001")
 ```
 
 ```
-##   Queried from version of HYDAT released on 2022-01-17
-##    Observations:                      30,255
-##    Measurement flags:                 6,020
+##   Queried from version of HYDAT released on 2022-07-21
+##    Observations:                      31,351
+##    Measurement flags:                 6,166
 ##    Parameter(s):                      Flow
-##    Date range:                        1914-01-01 to 2018-12-31 
+##    Date range:                        1914-01-01 to 2021-12-31 
 ##    Station(s) returned:               1
 ##    Stations requested but not returned: 
 ##     All stations returned.
-## # A tibble: 30,255 x 5
+## # A tibble: 31,351 × 5
 ##    STATION_NUMBER Date       Parameter Value Symbol
 ##    <chr>          <date>     <chr>     <dbl> <chr> 
 ##  1 08LA001        1914-01-01 Flow        144 <NA>  
@@ -63,7 +63,8 @@ hy_daily_flows(station_number = "08LA001")
 ##  8 08LA001        1914-01-08 Flow        140 <NA>  
 ##  9 08LA001        1914-01-09 Flow        140 <NA>  
 ## 10 08LA001        1914-01-10 Flow        140 <NA>  
-## # ... with 30,245 more rows
+## # … with 31,341 more rows
+## # ℹ Use `print(n = ...)` to see more rows
 ```
 
 Another method is to use `hy_stations()` to generate your vector which is then given the `station_number` argument. For example, we could take a subset for only those active stations within Prince Edward Island (Province code:PE) and then create vector for `hy_daily_flows()`:
@@ -79,23 +80,24 @@ PEI_stns
 ```
 
 ```
-## [1] "01CA003" "01CB002" "01CB004" "01CB018" "01CC002" "01CC005" "01CC010" "01CC011" "01CD005"
+## [1] "01CA003" "01CB002" "01CB004" "01CB018" "01CC002" "01CC005" "01CC010" "01CC011"
+## [9] "01CD005"
 ```
 
 ```r
 hy_daily_flows(station_number = PEI_stns)
 ```
 
 ```
-##   Queried from version of HYDAT released on 2022-01-17
-##    Observations:                      113,507
-##    Measurement flags:                 20,357
+##   Queried from version of HYDAT released on 2022-07-21
+##    Observations:                      114,605
+##    Measurement flags:                 20,524
 ##    Parameter(s):                      Flow
 ##    Date range:                        1961-08-01 to 2020-12-31 
 ##    Station(s) returned:               9
 ##    Stations requested but not returned: 
 ##     All stations returned.
-## # A tibble: 113,507 x 5
+## # A tibble: 114,605 × 5
 ##    STATION_NUMBER Date       Parameter Value Symbol
 ##    <chr>          <date>     <chr>     <dbl> <chr> 
 ##  1 01CA003        1961-08-01 Flow         NA <NA>  
@@ -108,7 +110,8 @@ hy_daily_flows(station_number = PEI_stns)
 ##  8 01CB002        1961-08-04 Flow         NA <NA>  
 ##  9 01CA003        1961-08-05 Flow         NA <NA>  
 ## 10 01CB002        1961-08-05 Flow         NA <NA>  
-## # ... with 113,497 more rows
+## # … with 114,595 more rows
+## # ℹ Use `print(n = ...)` to see more rows
 ```
 
 We can also merge our station choice and data extraction into one unified pipe which accomplishes a single goal. For example if for some reason we wanted all the stations in Canada that had the name "Canada" in them we unify that selection and data extraction process into a single pipe:
@@ -120,15 +123,15 @@ search_stn_name("canada") %>%
 ```
 
 ```
-##   Queried from version of HYDAT released on 2022-01-17
-##    Observations:                      84,594
-##    Measurement flags:                 25,617
+##   Queried from version of HYDAT released on 2022-07-21
+##    Observations:                      86,056
+##    Measurement flags:                 26,218
 ##    Parameter(s):                      Flow
-##    Date range:                        1918-08-01 to 2020-12-31 
+##    Date range:                        1918-08-01 to 2021-12-31 
 ##    Station(s) returned:               7
 ##    Stations requested but not returned: 
 ##     All stations returned.
-## # A tibble: 84,594 x 5
+## # A tibble: 86,056 × 5
 ##    STATION_NUMBER Date       Parameter Value Symbol
 ##    <chr>          <date>     <chr>     <dbl> <chr> 
 ##  1 01AK001        1918-08-01 Flow      NA    <NA>  
@@ -141,7 +144,8 @@ search_stn_name("canada") %>%
 ##  8 01AK001        1918-08-08 Flow       1.78 <NA>  
 ##  9 01AK001        1918-08-09 Flow       1.5  <NA>  
 ## 10 01AK001        1918-08-10 Flow       1.78 <NA>  
-## # ... with 84,584 more rows
+## # … with 86,046 more rows
+## # ℹ Use `print(n = ...)` to see more rows
 ```
 
 We saw above that if we were only interested in a subset of dates we could use the `start_date` and `end_date` arguments. A date must be supplied to both these arguments in the form of YYYY-MM-DD. If you were interested in all daily flow data from station number "08LA001" for 1981, you would specify all days in 1981 :
@@ -194,7 +198,7 @@ search_stn_name("liard")
 ```
 
 ```
-## # A tibble: 9 x 5
+## # A tibble: 9 × 5
 ##   STATION_NUMBER STATION_NAME                      PROV_TERR_STATE_LOC LATITUDE LONGITUDE
 ##   <chr>          <chr>                             <chr>                  <dbl>     <dbl>
 ## 1 10AA001        LIARD RIVER AT UPPER CROSSING     YT                      60.1     -129.
@@ -214,20 +218,21 @@ search_stn_number("08MF")
 ```
 
 ```
-## # A tibble: 51 x 5
-##    STATION_NUMBER STATION_NAME                           PROV_TERR_STATE_LOC LATITUDE LONGITUDE
-##    <chr>          <chr>                                  <chr>                  <dbl>     <dbl>
-##  1 08MF005        FRASER RIVER AT HOPE                   BC                      49.4     -121.
-##  2 08MF035        FRASER RIVER NEAR AGASSIZ              BC                      49.2     -122.
-##  3 08MF038        FRASER RIVER AT CANNOR                 BC                      49.1     -122.
-##  4 08MF040        FRASER RIVER ABOVE TEXAS CREEK         BC                      50.6     -122.
-##  5 08MF062        COQUIHALLA RIVER BELOW NEEDLE CREEK    BC                      49.5     -121.
-##  6 08MF065        NAHATLATCH RIVER BELOW TACHEWANA CREEK BC                      50.0     -122.
-##  7 08MF068        COQUIHALLA RIVER ABOVE ALEXANDER CREEK BC                      49.4     -121.
-##  8 08MF072        FRASER RIVER AT LAIDLAW                BC                      49.3     -122.
-##  9 08MF073        FRASER RIVER AT HARRISON MILLS         BC                      49.2     -122.
-## 10 08MF001        ANDERSON RIVER NEAR BOSTON BAR         BC                      49.8     -121.
-## # ... with 41 more rows
+## # A tibble: 53 × 5
+##    STATION_NUMBER STATION_NAME                           PROV_TERR_STATE_LOC LATIT…¹ LONGI…²
+##    <chr>          <chr>                                  <chr>                 <dbl>   <dbl>
+##  1 08MF005        FRASER RIVER AT HOPE                   BC                     49.4   -121.
+##  2 08MF035        FRASER RIVER NEAR AGASSIZ              BC                     49.2   -122.
+##  3 08MF038        FRASER RIVER AT CANNOR                 BC                     49.1   -122.
+##  4 08MF040        FRASER RIVER ABOVE TEXAS CREEK         BC                     50.6   -122.
+##  5 08MF062        COQUIHALLA RIVER BELOW NEEDLE CREEK    BC                     49.5   -121.
+##  6 08MF065        NAHATLATCH RIVER BELOW TACHEWANA CREEK BC                     50.0   -122.
+##  7 08MF068        COQUIHALLA RIVER ABOVE ALEXANDER CREEK BC                     49.4   -121.
+##  8 08MF072        FRASER RIVER AT LAIDLAW                BC                     49.3   -122.
+##  9 08MF073        FRASER RIVER AT HARRISON MILLS         BC                     49.2   -122.
+## 10 08MF001        ANDERSON RIVER NEAR BOSTON BAR         BC                     49.8   -121.
+## # … with 43 more rows, and abbreviated variable names ¹LATITUDE, ²LONGITUDE
+## # ℹ Use `print(n = ...)` to see more rows
 ```
 
 ## Using joins 

diff --git a/vignettes/tidyhydat_example_analysis.Rmd b/vignettes/tidyhydat_example_analysis.Rmd
@@ -1,7 +1,7 @@
 ---
 title: "Two examples of using tidyhydat"
 author: "Sam Albers"
-date: "2022-03-17"
+date: "2022-08-19"
 output:
   html_vignette:
      keep_md: true
@@ -60,25 +60,26 @@ hy_stn_data_range()
 ```
 
 ```
-##   Queried from version of HYDAT released on 2022-01-17
-##    Observations:                      12,055
-##    Station(s) returned:               7,923
+##   Queried from version of HYDAT released on 2022-07-21
+##    Observations:                      12,076
+##    Station(s) returned:               7,935
 ##    Stations requested but not returned: 
 ##     All stations returned.
-## # A tibble: 12,055 x 6
+## # A tibble: 12,076 × 6
 ##    STATION_NUMBER DATA_TYPE SED_DATA_TYPE Year_from Year_to RECORD_LENGTH
 ##    <chr>          <chr>     <chr>             <int>   <int>         <int>
 ##  1 01AA002        Q         <NA>               1967    1977            11
 ##  2 01AD001        Q         <NA>               1918    1997            80
-##  3 01AD002        Q         <NA>               1926    2019            94
-##  4 01AD003        H         <NA>               2011    2018             8
-##  5 01AD003        Q         <NA>               1951    2018            68
+##  3 01AD002        Q         <NA>               1926    2020            95
+##  4 01AD003        H         <NA>               2011    2020            10
+##  5 01AD003        Q         <NA>               1951    2020            70
 ##  6 01AD004        H         <NA>               1980    2019            35
 ##  7 01AD004        Q         <NA>               1968    1979            12
 ##  8 01AD005        H         <NA>               1966    1974             9
 ##  9 01AD008        H         <NA>               1972    1974             3
 ## 10 01AD009        H         <NA>               1973    1982            10
-## # ... with 12,045 more rows
+## # … with 12,066 more rows
+## # ℹ Use `print(n = ...)` to see more rows
 ```
 Our objective here is to filter from this data for the station that has the longest record of flow (`DATA_TYPE == "Q"`). You'll also notice this symbol `%>%` which in R is called a [pipe](https://magrittr.tidyverse.org/reference/pipe.html). In code, read it as the word *then*. So for the data_range data we want to grab the data *then* filter it by flow ("Q") in `DATA_TYPE` and then by the maximum value of `RECORD_LENGTH`:
 
@@ -88,12 +89,12 @@ hy_stn_data_range() %>%
 ```
 
 ```
-##   Queried from version of HYDAT released on 2022-01-17
+##   Queried from version of HYDAT released on 2022-07-21
 ##    Observations:                      1
 ##    Station(s) returned:               1
 ##    Stations requested but not returned: 
 ##     All stations returned.
-## # A tibble: 1 x 6
+## # A tibble: 1 × 6
 ##   STATION_NUMBER DATA_TYPE SED_DATA_TYPE Year_from Year_to RECORD_LENGTH
 ##   <chr>          <chr>     <chr>             <int>   <int>         <int>
 ## 1 02HA003        Q         <NA>               1860    2020           161

diff --git a/vignettes/tidyhydat_hydat_db.Rmd b/vignettes/tidyhydat_hydat_db.Rmd
@@ -1,7 +1,7 @@
 ---
 title: "Stepping into the HYDAT Database"
 author: "Dewey Dunnington"
-date: "2022-03-17"
+date: "2022-08-19"
 output: rmarkdown::html_vignette
 vignette: >
   %\VignetteIndexEntry{Stepping into the HYDAT Database}
@@ -38,15 +38,17 @@ To list the tables, use `src_tbls()` from the **dplyr** package.
 
 ```r
 src_tbls(src)
-#>  [1] "AGENCY_LIST"            "ANNUAL_INSTANT_PEAKS"   "ANNUAL_STATISTICS"      "CONCENTRATION_SYMBOLS" 
-#>  [5] "DATA_SYMBOLS"           "DATA_TYPES"             "DATUM_LIST"             "DLY_FLOWS"             
-#>  [9] "DLY_LEVELS"             "MEASUREMENT_CODES"      "OPERATION_CODES"        "PEAK_CODES"            
-#> [13] "PRECISION_CODES"        "REGIONAL_OFFICE_LIST"   "SAMPLE_REMARK_CODES"    "SED_DATA_TYPES"        
-#> [17] "SED_DLY_LOADS"          "SED_DLY_SUSCON"         "SED_SAMPLES"            "SED_SAMPLES_PSD"       
-#> [21] "SED_VERTICAL_LOCATION"  "SED_VERTICAL_SYMBOLS"   "STATIONS"               "STN_DATA_COLLECTION"   
-#> [25] "STN_DATA_RANGE"         "STN_DATUM_CONVERSION"   "STN_DATUM_UNRELATED"    "STN_OPERATION_SCHEDULE"
-#> [29] "STN_REGULATION"         "STN_REMARKS"            "STN_REMARK_CODES"       "STN_STATUS_CODES"      
-#> [33] "VERSION"
+#>  [1] "AGENCY_LIST"            "ANNUAL_INSTANT_PEAKS"   "ANNUAL_STATISTICS"     
+#>  [4] "CONCENTRATION_SYMBOLS"  "DATA_SYMBOLS"           "DATA_TYPES"            
+#>  [7] "DATUM_LIST"             "DLY_FLOWS"              "DLY_LEVELS"            
+#> [10] "MEASUREMENT_CODES"      "OPERATION_CODES"        "PEAK_CODES"            
+#> [13] "PRECISION_CODES"        "REGIONAL_OFFICE_LIST"   "SAMPLE_REMARK_CODES"   
+#> [16] "SED_DATA_TYPES"         "SED_DLY_LOADS"          "SED_DLY_SUSCON"        
+#> [19] "SED_SAMPLES"            "SED_SAMPLES_PSD"        "SED_VERTICAL_LOCATION" 
+#> [22] "SED_VERTICAL_SYMBOLS"   "STATIONS"               "STN_DATA_COLLECTION"   
+#> [25] "STN_DATA_RANGE"         "STN_DATUM_CONVERSION"   "STN_DATUM_UNRELATED"   
+#> [28] "STN_OPERATION_SCHEDULE" "STN_REGULATION"         "STN_REMARKS"           
+#> [31] "STN_REMARK_CODES"       "STN_STATUS_CODES"       "VERSION"
 ```
 
 To inspect any particular table, use the `tbl()` function with the `src` and the table name.
@@ -55,7 +57,7 @@ To inspect any particular table, use the `tbl()` function with the `src` and the
 ```r
 tbl(src, "STN_OPERATION_SCHEDULE")
 #> # Source:   table<STN_OPERATION_SCHEDULE> [?? x 5]
-#> # Database: sqlite 3.37.2 [C:\work\_dev\GitHub_repos\tidyhydat\inst\test_db\tinyhydat.sqlite3]
+#> # Database: sqlite 3.39.2 [/Users/samalbers/_dev/gh_repos/tidyhydat/inst/test_db/tinyhydat.sqlite3]
 #>    STATION_NUMBER DATA_TYPE  YEAR MONTH_FROM MONTH_TO
 #>    <chr>          <chr>     <int> <chr>      <chr>   
 #>  1 01AP003        H          1923 <NA>       <NA>    
@@ -68,7 +70,8 @@ tbl(src, "STN_OPERATION_SCHEDULE")
 #>  8 01AP003        H          1930 <NA>       <NA>    
 #>  9 01AP003        H          1931 <NA>       <NA>    
 #> 10 01AP003        H          1932 <NA>       <NA>    
-#> # ... with more rows
+#> # … with more rows
+#> # ℹ Use `print(n = ...)` to see more rows
 ```
 
 Working with SQL tables in dplyr is much like working with regular data frames, except no data is actually read from the database until necessary. Because some of these tables are large (particularly those containing the actual data), you will want to `filter()` the tables before you `collect()` them (the `collect()` operation loads them into memory as a `data.frame`).
@@ -78,7 +81,7 @@ Working with SQL tables in dplyr is much like working with regular data frames,
 tbl(src, "STN_OPERATION_SCHEDULE") %>%
   filter(STATION_NUMBER == "05AA008") %>%
   collect()
-#> # A tibble: 103 x 5
+#> # A tibble: 103 × 5
 #>    STATION_NUMBER DATA_TYPE  YEAR MONTH_FROM MONTH_TO
 #>    <chr>          <chr>     <int> <chr>      <chr>   
 #>  1 05AA008        H          2012 JAN        DEC     
@@ -91,7 +94,8 @@ tbl(src, "STN_OPERATION_SCHEDULE") %>%
 #>  8 05AA008        H          2019 JAN        DEC     
 #>  9 05AA008        H          2020 JAN        DEC     
 #> 10 05AA008        Q          1910 <NA>       <NA>    
-#> # ... with 93 more rows
+#> # … with 93 more rows
+#> # ℹ Use `print(n = ...)` to see more rows
 ```
 
 When you are finished with the database (i.e., the end of the script), it is good practice to close the connection (you may get a loud red warning if you don't!).

diff --git a/vignettes/vignette-fig-old_rec-1.png b/vignettes/vignette-fig-old_rec-1.png
diff --git a/vignettes/vignette-fig-old_rec_yr-1.png b/vignettes/vignette-fig-old_rec_yr-1.png
diff --git a/vignettes/vignette-fig-pcrtile_plt-1.png b/vignettes/vignette-fig-pcrtile_plt-1.png
diff --git a/vignettes/vignette-fig-tile_plt-1.png b/vignettes/vignette-fig-tile_plt-1.png
diff --git a/vignettes/vignette-fig-unnamed-chunk-8-1.png b/vignettes/vignette-fig-unnamed-chunk-8-1.png