From f246e60c3fc08762aafde73d4db2fe5f6b91a4a5 Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Sat, 3 Aug 2024 12:43:48 -0400 Subject: [PATCH 1/8] Add failing test --- tests/testthat/test-corner-cases.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test-corner-cases.R b/tests/testthat/test-corner-cases.R index f0e3fa4..c66a945 100644 --- a/tests/testthat/test-corner-cases.R +++ b/tests/testthat/test-corner-cases.R @@ -73,3 +73,13 @@ test_that("zero length input (issue #20)", { parse_iso_8601(character(0)), structure(numeric(0), class = c("POSIXct", "POSIXt"), tzone = "UTC")) }) + +test_that("multiple date formats do not cause a warning (issue #36)", { + expect_silent( + parse_iso_8601(c("2020-03", "2020")) + ) + expect_equal( + parse_iso_8601(c("2020-03", "2020")), + as.POSIXct(c("2020-03-01", "2020-01-01"), tz = "UTC") + ) +}) From 5c7105bc75b90c09209f195fb1c4e10827e13fea Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Sat, 3 Aug 2024 13:00:28 -0400 Subject: [PATCH 2/8] Fix year-only formats mixed with other formats so that alignment --- R/parsedate-package.r | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/parsedate-package.r b/R/parsedate-package.r index fa48404..82860a1 100644 --- a/R/parsedate-package.r +++ b/R/parsedate-package.r @@ -220,7 +220,7 @@ parse_iso_parts <- function(mm, default_tz) { ## Years fy <- is.na(date) - date[fy] <- ymd(paste(mm$year, "01", "01")) + date[fy] <- ymd(paste(mm$year[fy], "01", "01")) ## ----------------------------------------------------------------- ## Now the time @@ -240,10 +240,12 @@ parse_iso_parts <- function(mm, default_tz) { frac <- as.numeric(sub(",", ".", mm$frac)) tfs <- !is.na(frac) & mm$sec != "" + # only supporting up to millisecond resolution, rounding subsequent digits date[tfs] <- date[tfs] + milliseconds(round(frac[tfs] * 1000)) tfm <- !is.na(frac) & mm$sec == "" & mm$min != "" sec <- trunc(frac[tfm] * 60) + # only supporting up to millisecond resolution, rounding subsequent digits mil <- round((frac[tfm] * 60 - sec) * 1000) date[tfm] <- date[tfm] + seconds(sec) + milliseconds(mil) From 6b50f3a06dfbf0331508fae99a451bccb638e62f Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Sat, 3 Aug 2024 13:23:21 -0400 Subject: [PATCH 3/8] Ensure that timezone hours and minutes are accurately assigned --- R/parsedate-package.r | 4 ++-- tests/testthat/test-corner-cases.R | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/R/parsedate-package.r b/R/parsedate-package.r index 82860a1..254c946 100644 --- a/R/parsedate-package.r +++ b/R/parsedate-package.r @@ -261,9 +261,9 @@ parse_iso_parts <- function(mm, default_tz) { ftzpm <- mm$tzpm != "" m <- ifelse(mm$tzpm[ftzpm] == "+", -1, 1) ftzpmh <- ftzpm & mm$tzhour != "" - date[ftzpmh] <- date[ftzpmh] + m * hours(mm$tzhour[ftzpmh]) + date[ftzpmh] <- date[ftzpmh] + m[ftzpmh] * hours(mm$tzhour[ftzpmh]) ftzpmm <- ftzpm & mm$tzmin != "" - date[ftzpmm] <- date[ftzpmm] + m * minutes(mm$tzmin[ftzpmm]) + date[ftzpmm] <- date[ftzpmm] + m[ftzpmm] * minutes(mm$tzmin[ftzpmm]) ftzz <- mm$tz == "Z" date[ftzz] <- as.POSIXct(date[ftzz], "UTC") diff --git a/tests/testthat/test-corner-cases.R b/tests/testthat/test-corner-cases.R index c66a945..883e1fa 100644 --- a/tests/testthat/test-corner-cases.R +++ b/tests/testthat/test-corner-cases.R @@ -82,4 +82,25 @@ test_that("multiple date formats do not cause a warning (issue #36)", { parse_iso_8601(c("2020-03", "2020")), as.POSIXct(c("2020-03-01", "2020-01-01"), tz = "UTC") ) + + # a related set of tests that causes a similar issue to #36, but was not + # reported in that issue. This covers when minute and non-minute timezones + # are provided at the same time. + valid_datetimes <- + c( + "2024-08-03T01:02:03Z", # Zulu timezone + "2024-08-03T01:02:03+00", # Zulu timezone as numeric + "2024-08-03T01:02:03-04", # negative hour timezone without minutes + "2024-08-03T01:02:03+04", # positive hour timezone without minutes + "2024-08-03T01:02:03-04:00", # negative hour timezone with minutes + "2024-08-03T01:02:03+04:15", # positive hour timezone with minutes + "2024-08-03T01:02:03.123+04:15" # positive hour timezone with minutes and fractional seconds + ) + expect_silent( + parse_iso_8601(valid_datetimes) + ) + expect_equal( + parse_iso_8601(valid_datetimes), + as.POSIXct(c("2024-08-03 01:02:03", "2024-08-03 01:02:03", "2024-08-02 21:02:03", "2024-08-03 05:02:03", "2024-08-02 21:02:03", "2024-08-02 20:47:03", "2024-08-02 20:47:03.123"), tz = "UTC") + ) }) From d8dd5437b6f4bf4b2d28aff018327124d58cd2a1 Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Sat, 3 Aug 2024 13:26:05 -0400 Subject: [PATCH 4/8] Reformatting (no functional changes) --- R/parsedate-package.r | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/R/parsedate-package.r b/R/parsedate-package.r index 254c946..e0ae8bc 100644 --- a/R/parsedate-package.r +++ b/R/parsedate-package.r @@ -193,8 +193,7 @@ parse_iso_parts <- function(mm, default_tz) { num <- nrow(mm) - ## ----------------------------------------------------------------- - ## Date first + ## Date first ---- date <- .POSIXct(rep(NA_real_, num), tz = "") @@ -222,8 +221,7 @@ parse_iso_parts <- function(mm, default_tz) { fy <- is.na(date) date[fy] <- ymd(paste(mm$year[fy], "01", "01")) - ## ----------------------------------------------------------------- - ## Now the time + ## Now the time ---- th <- mm$hour != "" date[th] <- date[th] + hours(mm$hour[th]) @@ -234,8 +232,7 @@ parse_iso_parts <- function(mm, default_tz) { ts <- mm$sec != "" date[ts] <- date[ts] + seconds(mm$sec[ts]) - ## ----------------------------------------------------------------- - ## Fractional time + ## Fractional time ---- frac <- as.numeric(sub(",", ".", mm$frac)) @@ -255,8 +252,7 @@ parse_iso_parts <- function(mm, default_tz) { mil <- round((((frac[tfh] * 60) - min) * 60 - sec) * 1000) date[tfh] <- date[tfh] + minutes(min) + seconds(sec) + milliseconds(mil) - ## ----------------------------------------------------------------- - ## Time zone + ## Time zone ---- ftzpm <- mm$tzpm != "" m <- ifelse(mm$tzpm[ftzpm] == "+", -1, 1) From b10ab7c40a2cab60baf33ffa5de85751953e9a14 Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Sat, 3 Aug 2024 13:39:34 -0400 Subject: [PATCH 5/8] Notify of updates for mixed date/time formats --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 5ed164b..54d94b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # parsedate (development version) +* Some issues with different formats (e.g. "2000" and "2000-11") could have + given inaccurate results previously. Those are now resolved (#36) + # parsedate 1.3.1 No user visible changes. From 1a5cf13cbf74f99605932e97874aa13b5fb66739 Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Sat, 3 Aug 2024 13:40:32 -0400 Subject: [PATCH 6/8] Comments to document the `iso_regex` --- R/parsedate-package.r | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/parsedate-package.r b/R/parsedate-package.r index e0ae8bc..286fe0c 100644 --- a/R/parsedate-package.r +++ b/R/parsedate-package.r @@ -280,18 +280,31 @@ parse_iso_parts <- function(mm, default_tz) { } iso_regex <- paste0( + # whitespace at the beginning "^\\s*", + # the year "(?[\\+-]?\\d{4}(?!\\d{2}\\b))", + # The dash between year and month "(?:(?-?)", + # The month "(?:(?0[1-9]|1[0-2])", + # The dash between month and day, and the day "(?:\\g{dash}(?[12]\\d|0[1-9]|3[01]))?", + # or the week "|W(?[0-4]\\d|5[0-3])(?:-?(?[1-7]))?", + # or the yearday "|(?00[1-9]|0[1-9]\\d|[12]\\d{2}|3", "(?:[0-5]\\d|6[1-6])))", + # the "T" then the hour "(?