Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue with year-only date alignment #43

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# parsedate (development version)

* Some issues with different formats (e.g. "2000" and "2000-11") could have
given inaccurate results previously. Those are now resolved (#36)
* Breaking change: fractional seconds are now included (#44)

# parsedate 1.3.1

No user visible changes.
Expand Down
58 changes: 36 additions & 22 deletions R/parsedate-package.r
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,13 @@ parse_iso_8601 <- function(dates, default_tz = "UTC") {

parse_iso_parts <- function(mm, default_tz) {

# Ensure that all fractions use periods rather than commas and are numeric
mm$frac <- as.numeric(sub(",", ".", mm$frac))
mm$sec <- as.numeric(sub(",", ".", mm$sec))

num <- nrow(mm)

## -----------------------------------------------------------------
## Date first
## Date first ----

date <- .POSIXct(rep(NA_real_, num), tz = "")

Expand All @@ -220,47 +223,45 @@ parse_iso_parts <- function(mm, default_tz) {

## Years
fy <- is.na(date)
date[fy] <- ymd(paste(mm$year, "01", "01"))
date[fy] <- ymd(paste(mm$year[fy], "01", "01"))

## -----------------------------------------------------------------
## Now the time
## Now the time ----

th <- mm$hour != ""
date[th] <- date[th] + hours(mm$hour[th])

tm <- mm$min != ""
date[tm] <- date[tm] + minutes(mm$min[tm])

ts <- mm$sec != ""
ts <- !is.na(mm$sec)
date[ts] <- date[ts] + seconds(mm$sec[ts])

## -----------------------------------------------------------------
## Fractional time

frac <- as.numeric(sub(",", ".", mm$frac))
## Fractional time ----

tfs <- !is.na(frac) & mm$sec != ""
date[tfs] <- date[tfs] + milliseconds(round(frac[tfs] * 1000))
tfs <- !is.na(mm$frac) & !is.na(mm$sec)
# only supporting up to millisecond resolution, rounding subsequent digits
date[tfs] <- date[tfs] + milliseconds(round(mm$frac[tfs] * 1000))

tfm <- !is.na(frac) & mm$sec == "" & mm$min != ""
sec <- trunc(frac[tfm] * 60)
mil <- round((frac[tfm] * 60 - sec) * 1000)
tfm <- !is.na(mm$frac) & is.na(mm$sec) & mm$min != ""
sec <- trunc(mm$frac[tfm] * 60)
# only supporting up to millisecond resolution, rounding subsequent digits
mil <- round((mm$frac[tfm] * 60 - sec) * 1000)
date[tfm] <- date[tfm] + seconds(sec) + milliseconds(mil)

tfh <- !is.na(frac) & mm$sec == "" & mm$min == ""
min <- trunc(frac[tfh] * 60)
sec <- trunc((frac[tfh] * 60 - min) * 60)
mil <- round((((frac[tfh] * 60) - min) * 60 - sec) * 1000)
tfh <- !is.na(mm$frac) & is.na(mm$sec) & mm$min == ""
min <- trunc(mm$frac[tfh] * 60)
sec <- trunc((mm$frac[tfh] * 60 - min) * 60)
mil <- round((((mm$frac[tfh] * 60) - min) * 60 - sec) * 1000)
date[tfh] <- date[tfh] + minutes(min) + seconds(sec) + milliseconds(mil)

## -----------------------------------------------------------------
## Time zone
## Time zone ----

ftzpm <- mm$tzpm != ""
m <- ifelse(mm$tzpm[ftzpm] == "+", -1, 1)
ftzpmh <- ftzpm & mm$tzhour != ""
date[ftzpmh] <- date[ftzpmh] + m * hours(mm$tzhour[ftzpmh])
ftzpmm <- ftzpm & mm$tzmin != ""
m <- ifelse(mm$tzpm[ftzpmm] == "+", -1, 1)
date[ftzpmm] <- date[ftzpmm] + m * minutes(mm$tzmin[ftzpmm])

ftzz <- mm$tz == "Z"
Expand All @@ -282,18 +283,31 @@ parse_iso_parts <- function(mm, default_tz) {
}

iso_regex <- paste0(
# whitespace at the beginning
"^\\s*",
# the year
"(?<year>[\\+-]?\\d{4}(?!\\d{2}\\b))",
# The dash between year and month
"(?:(?<dash>-?)",
# The month
"(?:(?<month>0[1-9]|1[0-2])",
# The dash between month and day, and the day
"(?:\\g{dash}(?<day>[12]\\d|0[1-9]|3[01]))?",
# or the week
"|W(?<week>[0-4]\\d|5[0-3])(?:-?(?<weekday>[1-7]))?",
# or the yearday
"|(?<yearday>00[1-9]|0[1-9]\\d|[12]\\d{2}|3",
"(?:[0-5]\\d|6[1-6])))",
# the "T" then the hour
"(?<time>[T\\s](?:(?:(?<hour>[01]\\d|2[0-3])",
# the colon then the minute, and allow for the hour:minute to be
# specified as "24:00"
"(?:(?<colon>:?)(?<min>[0-5]\\d))?|24\\:?00)",
# the fraction after the minute
"(?<frac>[\\.,]\\d+(?!:))?)?",
"(?:\\g{colon}(?<sec>[0-5]\\d)(?:[\\.,]\\d+)?)?",
# the colon after the minute and the second with its optional fraction
"(?:\\g{colon}(?<sec>[0-5]\\d(?:[\\.,]\\d+)?))?",
# the timezone as "Z" or "+-hh" or "+-hh:mm" or "+-hhmm"
"(?<tz>[zZ]|(?<tzpm>[\\+-])",
"(?<tzhour>[01]\\d|2[0-3]):?(?<tzmin>[0-5]\\d)?)?)?)?$"
)
Expand Down
61 changes: 61 additions & 0 deletions tests/testthat/test-corner-cases.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,64 @@ test_that("zero length input (issue #20)", {
parse_iso_8601(character(0)),
structure(numeric(0), class = c("POSIXct", "POSIXt"), tzone = "UTC"))
})

test_that("multiple date formats do not cause a warning (issue #36)", {
expect_silent(
parse_iso_8601(c("2020-03", "2020"))
)
expect_equal(
parse_iso_8601(c("2020-03", "2020")),
as.POSIXct(c("2020-03-01", "2020-01-01"), tz = "UTC")
)

# a related set of tests that causes a similar issue to #36, but was not
# reported in that issue. This covers when minute and non-minute timezones
# are provided at the same time.
valid_datetimes <-
c(
"2024-08-03T01:02:03Z", # Zulu timezone
"2024-08-03T01:02:03+00", # Zulu timezone as numeric
"2024-08-03T01:02:03-04", # negative hour timezone without minutes
"2024-08-03T01:02:03+04", # positive hour timezone without minutes
"2024-08-03T01:02:03-04:00", # negative hour timezone with minutes
"2024-08-03T01:02:03+04:15", # positive hour timezone with minutes
"2024-08-03T01:02:03.123+04:15" # positive hour timezone with minutes and fractional seconds
)
expect_silent(
parse_iso_8601(valid_datetimes)
)
expect_equal(
parse_iso_8601(valid_datetimes),
as.POSIXct(c("2024-08-03 01:02:03", "2024-08-03 01:02:03", "2024-08-03 05:02:03", "2024-08-02 21:02:03", "2024-08-03 05:02:03", "2024-08-02 20:47:03", "2024-08-02 20:47:03.123"), tz = "UTC")
)
})

test_that("capture fractional seconds (issue #44)", {
expect_equal(
parse_iso_8601("2024-08-03T01:02:03.123+04:15"),
as.POSIXct("2024-08-02 20:47:03.123", tz = "UTC")
)
})

test_that("capture fractional seconds (issue #44)", {
# This test uses unclass() and %% to work around
# https://github.com/r-lib/testthat/issues/1977
expect_equal(
unclass(parse_iso_8601("1970-08-03T01:02:03.123+04:15")) %% 1,
unclass(as.POSIXct("1970-08-02 20:47:03.123", tz = "UTC")) %% 1
)

# Test all fractional date/time parts without explicit time zones
expect_equal(
parse_iso_8601("1970-08-03T01.5"),
as.POSIXct("1970-08-03 01:30", tz = "UTC")
)
expect_equal(
parse_iso_8601("1970-08-03T01:02.5"),
as.POSIXct("1970-08-03 01:02:30", tz = "UTC")
)
expect_equal(
unclass(parse_iso_8601("1970-08-03T01:02:03.5")) %% 1,
unclass(as.POSIXct("1970-08-03 01:02:30.5", tz = "UTC")) %% 1
)
})
86 changes: 44 additions & 42 deletions tests/testthat/test-iso8601.r
Original file line number Diff line number Diff line change
Expand Up @@ -57,52 +57,54 @@ test_that("Examples from moment.js", {
test_that("Pelago examples", {

tests <- read.table(stringsAsFactors = FALSE, header = FALSE,
col.names = c("input", "output", "sec_frac"),
strip.white = TRUE, row.names = NULL, sep = "|",
textConnection("
2009-12T12:34 | 2009-12-01T12:34:00+00:00
2009 | 2009-01-01T00:00:00+00:00
2009-05-19 | 2009-05-19T00:00:00+00:00
20090519 | 2009-05-19T00:00:00+00:00
2009123 | 2009-05-03T00:00:00+00:00
2009-05 | 2009-05-01T00:00:00+00:00
2009-123 | 2009-05-03T00:00:00+00:00
2009-222 | 2009-08-10T00:00:00+00:00
2009-001 | 2009-01-01T00:00:00+00:00
2009-W01-1 | 2008-12-29T00:00:00+00:00
2009-W51-1 | 2009-12-14T00:00:00+00:00
2009-W511 | 2009-12-14T00:00:00+00:00
2009-W33 | 2009-08-10T00:00:00+00:00
2009W511 | 2009-12-14T00:00:00+00:00
2009-05-19 00:00 | 2009-05-19T00:00:00+00:00
2009-05-19 14 | 2009-05-19T14:00:00+00:00
2009-05-19 14:31 | 2009-05-19T14:31:00+00:00
2009-05-19 14:39:22 | 2009-05-19T14:39:22+00:00
2009-05-19T14:39Z | 2009-05-19T14:39:00+00:00
2009-W21-2 | 2009-05-19T00:00:00+00:00
2009-W21-2T01:22 | 2009-05-19T01:22:00+00:00
2009-139 | 2009-05-19T00:00:00+00:00
2009-05-19 14:39:22-06:00 | 2009-05-19T20:39:22+00:00
2009-05-19 14:39:22+0600 | 2009-05-19T08:39:22+00:00
2009-05-19 14:39:22-01 | 2009-05-19T15:39:22+00:00
20090621T0545Z | 2009-06-21T05:45:00+00:00
2007-04-06T00:00 | 2007-04-06T00:00:00+00:00
2007-04-05T24:00 | 2007-04-05T00:00:00+00:00
2010-02-18T16:23:48.5 | 2010-02-18T16:23:48+00:00
2010-02-18T16:23:48,444 | 2010-02-18T16:23:48+00:00
2010-02-18T16:23:48,3-06:00 | 2010-02-18T22:23:48+00:00
2010-02-18T16:23.4 | 2010-02-18T16:23:24+00:00
2010-02-18T16:23,25 | 2010-02-18T16:23:15+00:00
2010-02-18T16:23.33+0600 | 2010-02-18T10:23:19+00:00
2010-02-18T16.23334444 | 2010-02-18T16:14:00+00:00
2010-02-18T16,2283 | 2010-02-18T16:13:41+00:00
2009-05-19 143922.500 | 2009-05-19T14:39:22+00:00
2009-05-19 1439,55 | 2009-05-19T14:39:33+00:00"
2009-12T12:34 | 2009-12-01T12:34:00+00:00 | 0
2009 | 2009-01-01T00:00:00+00:00 | 0
2009-05-19 | 2009-05-19T00:00:00+00:00 | 0
20090519 | 2009-05-19T00:00:00+00:00 | 0
2009123 | 2009-05-03T00:00:00+00:00 | 0
2009-05 | 2009-05-01T00:00:00+00:00 | 0
2009-123 | 2009-05-03T00:00:00+00:00 | 0
2009-222 | 2009-08-10T00:00:00+00:00 | 0
2009-001 | 2009-01-01T00:00:00+00:00 | 0
2009-W01-1 | 2008-12-29T00:00:00+00:00 | 0
2009-W51-1 | 2009-12-14T00:00:00+00:00 | 0
2009-W511 | 2009-12-14T00:00:00+00:00 | 0
2009-W33 | 2009-08-10T00:00:00+00:00 | 0
2009W511 | 2009-12-14T00:00:00+00:00 | 0
2009-05-19 00:00 | 2009-05-19T00:00:00+00:00 | 0
2009-05-19 14 | 2009-05-19T14:00:00+00:00 | 0
2009-05-19 14:31 | 2009-05-19T14:31:00+00:00 | 0
2009-05-19 14:39:22 | 2009-05-19T14:39:22+00:00 | 0
2009-05-19T14:39Z | 2009-05-19T14:39:00+00:00 | 0
2009-W21-2 | 2009-05-19T00:00:00+00:00 | 0
2009-W21-2T01:22 | 2009-05-19T01:22:00+00:00 | 0
2009-139 | 2009-05-19T00:00:00+00:00 | 0
2009-05-19 14:39:22-06:00 | 2009-05-19T20:39:22+00:00 | 0
2009-05-19 14:39:22+0600 | 2009-05-19T08:39:22+00:00 | 0
2009-05-19 14:39:22-01 | 2009-05-19T15:39:22+00:00 | 0
20090621T0545Z | 2009-06-21T05:45:00+00:00 | 0
2007-04-06T00:00 | 2007-04-06T00:00:00+00:00 | 0
2007-04-05T24:00 | 2007-04-05T00:00:00+00:00 | 0
2010-02-18T16:23:48.5 | 2010-02-18T16:23:48+00:00 | 0.5
2010-02-18T16:23:48,444 | 2010-02-18T16:23:48+00:00 | 0.444
2010-02-18T16:23:48,3-06:00 | 2010-02-18T22:23:48+00:00 | 0.3
2010-02-18T16:23.4 | 2010-02-18T16:23:24+00:00 | 0
2010-02-18T16:23,25 | 2010-02-18T16:23:15+00:00 | 0
2010-02-18T16:23.33+0600 | 2010-02-18T10:23:19+00:00 | 0.8
2010-02-18T16.23334444 | 2010-02-18T16:14:00+00:00 | 0.04
2010-02-18T16,2283 | 2010-02-18T16:13:41+00:00 | 0.88
2009-05-19 143922.500 | 2009-05-19T14:39:22+00:00 | 0.5
2009-05-19 1439,55 | 2009-05-19T14:39:33+00:00 | 0"
))

apply(tests, 1, function(x) {
d <- format_iso_8601(parse_iso_8601(x[1]))
expect_equal(d, unname(x[2]))
})
for (current_row in seq_len(nrow(tests))) {
d <- parse_iso_8601(tests$input[current_row])
expect_equal(format_iso_8601(d), tests$output[current_row])
expect_equal(as.numeric(d) %% 1, tests$sec_frac[current_row] %% 1, tolerance = 0.0001)
}

})

Expand Down