Skip to content

Commit

Permalink
Start identifying dateutil and other timezones based on tzname()
Browse files Browse the repository at this point in the history
  • Loading branch information
niccokunzmann committed Nov 12, 2024
1 parent ff49220 commit 5a3a54e
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 42 deletions.
3 changes: 2 additions & 1 deletion src/icalendar/cal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,8 @@ def get_used_tzids(self) -> set[str]:
Even if you use UTC, this will not show up.
"""
result = set()
for _, value in self.property_items(sorted=False):
for name, value in self.property_items(sorted=False):
print(name, value)
if hasattr(value, "params"):
result.add(value.params.get("TZID"))
return result - {None}
Expand Down
48 changes: 24 additions & 24 deletions src/icalendar/prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,29 @@
These types are mainly used for parsing and file generation. But you can set
them directly.
"""
from datetime import date
from datetime import datetime
from datetime import time
from datetime import timedelta
from datetime import tzinfo
from icalendar.caselessdict import CaselessDict
from icalendar.parser import Parameters
from icalendar.parser import escape_char
from icalendar.parser import unescape_char
from icalendar.parser_tools import (
DEFAULT_ENCODING, SEQUENCE_TYPES, to_unicode, from_unicode, ICAL_TYPE
)
from __future__ import annotations

import base64
import binascii
from .timezone import tzp
from collections import defaultdict
import re
import time as _time

from datetime import date, datetime, time, timedelta, tzinfo
from enum import Enum
from typing import Optional, Union
from enum import Enum, auto

from zoneinfo import ZoneInfo, available_timezones

from icalendar.caselessdict import CaselessDict
from icalendar.parser import Parameters, escape_char, unescape_char
from icalendar.parser_tools import (
DEFAULT_ENCODING,
ICAL_TYPE,
SEQUENCE_TYPES,
from_unicode,
to_unicode,
)

from .timezone import tzp

DURATION_REGEX = re.compile(r'([-+]?)P(?:(\d+)W)?(?:(\d+)D)?'
r'(?:T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?)?$')
Expand All @@ -66,13 +67,12 @@


def tzid_from_tzinfo(tzinfo: tzinfo) -> Optional[str]:
"""Retrieve the timezone id from the tzinfo object."""
tzid = None
if hasattr(tzinfo, 'zone'):
tzid = tzinfo.zone # pytz implementation
elif hasattr(tzinfo, 'key'):
tzid = tzinfo.key # ZoneInfo implementation
return tzid
"""Retrieve the timezone id from the tzinfo object.
Some timezones are equivalent.
Thus, we might return one ID that is equivelant to others.
"""
return (tzids_from_tzinfo(tzinfo) + (None,))[0]


def tzid_from_dt(dt: datetime) -> Optional[str]:
Expand Down Expand Up @@ -1140,4 +1140,4 @@ def from_ical(self, name, value):
"vCategory", "vDDDLists", "vDDDTypes", "vDate", "vDatetime",
"vDuration", "vFloat", "vFrequency", "vGeo", "vInline", "vInt",
"vMonth", "vPeriod", "vRecur", "vSkip", "vText", "vTime",
"vUTCOffset", "vUri", "vWeekday", "tzid_from_tzinfo"]
"vUTCOffset", "vUri", "vWeekday", "tzid_from_tzinfo", "tzids_from_tzinfo"]
2 changes: 1 addition & 1 deletion src/icalendar/tests/calendars/america_new_york.ics
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ END:VTIMEZONE
BEGIN:VEVENT
UID:noend123
DTSTART;TZID=custom_America/New_York;VALUE=DATE-TIME:20140829T080000
DTSTART;TZID=custom_America/New_York;VALUE=DATE-TIME:20140829T100000
DTEND;TZID=custom_America/New_York;VALUE=DATE-TIME:20140829T100000
SUMMARY:an event with a custom tz name
END:VEVENT
END:VCALENDAR
16 changes: 9 additions & 7 deletions src/icalendar/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,30 +247,32 @@ def other_tzp(request, tzp):
The purpose here is to cross test: pytz <-> zoneinfo.
tzp as parameter makes sure we test the cross product.
"""
tzp = TZP(request.param)
return tzp
return TZP(request.param)


@pytest.fixture
def pytz_only(tzp):
def pytz_only(tzp, tzp_name) -> str:
"""Skip tests that are not running under pytz."""
assert tzp.uses_pytz()

return tzp_name

@pytest.fixture
def zoneinfo_only(tzp, request, tzp_name):
def zoneinfo_only(tzp, request, tzp_name) -> str:
"""Skip tests that are not running under zoneinfo."""
assert tzp.uses_zoneinfo()
return tzp_name

@pytest.fixture
def no_pytz(tzp_name):
def no_pytz(tzp_name) -> str:
"""Do not run tests with pytz."""
assert tzp_name != "pytz"
return tzp_name

@pytest.fixture
def no_zoneinfo(tzp_name):
def no_zoneinfo(tzp_name) -> str:
"""Do not run tests with zoneinfo."""
assert tzp_name != "zoneinfo"
return tzp_name

def pytest_generate_tests(metafunc):
"""Parametrize without skipping:
Expand Down
46 changes: 37 additions & 9 deletions src/icalendar/tests/test_issue_722_generate_vtimezone.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@

from datetime import date, datetime, timedelta
from re import findall
from zoneinfo import available_timezones

import pytest
from dateutil.tz import gettz

from icalendar import Calendar, Component, Event, Timezone
from icalendar.prop import tzid_from_tzinfo
from icalendar.prop import _identify_tzinfo, tzid_from_tzinfo, tzids_from_tzinfo

tzids = pytest.mark.parametrize("tzid", [
"Europe/Berlin",
Expand Down Expand Up @@ -298,7 +300,6 @@ def test_unknown_tzid(calendars):

def test_custom_timezone_is_found_and_used(calendars):
"""Check the custom timezone component is not missing."""
pytest.skip("todo")
cal = calendars.america_new_york
assert "custom_America/New_York" in cal.get_used_tzids()
assert "custom_America/New_York" not in cal.get_missing_tzids()
Expand All @@ -314,22 +315,39 @@ def test_utc_is_not_missing(calendars):
assert "UTC" not in cal.get_missing_tzids()
assert "UTC" not in cal.get_used_tzids()

def test_dateutil_timezone_is_matched_with_tzname():
def test_dateutil_timezone_is_not_found_with_tzname(calendars, no_pytz):
"""dateutil is an example of a timezone that has no tzid.
In this test we make sure that the timezone is said to be missing.
"""
pytest.skip("todo")
cal : Calendar = calendars.america_new_york
cal.subcomponents.remove(cal.timezones[0])
assert cal.get_missing_tzids() == {"custom_America/New_York"}
assert "dateutil" in repr(cal.events[0].start.tzinfo.__class__)


@pytest.mark.parametrize("component", ["STANDARD", "DAYLIGHT"])
def test_dateutil_timezone_is_matched_with_tzname(component):
@pytest.mark.parametrize("tzname", ["America/New_York", "Europe/Berlin"])
# @pytest.mark.parametrize("component", ["STANDARD", "DAYLIGHT"])
def test_dateutil_timezone_is_matched_with_tzname(tzname):
"""dateutil is an example of a timezone that has no tzid.
In this test we make sure that the timezone is matched by its
tzname() in the timezone in the STANDARD and DAYLIGHT components.
"""
pytest.skip("todo")
cal = Calendar()
event = Event()
event.start = datetime(2024, 11, 12, tzinfo=gettz(tzname))
cal.add_component(event)
assert cal.get_missing_tzids() == {tzname}
cal.add_missing_timezones()
assert cal.get_missing_tzids() == set()


def test_dateutil_timezone_is_also_added(calendars):
"""We find and add a dateutil timezone.
This is important as we use those in the zoneinfo implementation.
"""

@pytest.mark.parametrize(
"calendar",
Expand Down Expand Up @@ -378,3 +396,13 @@ def test_dates_before_and_after_are_considered():
We should also consider a huge default range.
"""
pytest.skip("todo")


@pytest.mark.parametrize("tzid", available_timezones())
def test_we_can_identify_dateutil_timezones(tzid):
"""dateutil and others were badly supported.
But if we know their shortcodes, we should be able to identify them.
"""
tz = gettz(tzid)
assert tzid in tzids_from_tzinfo(tz), f"{tzid} -> {_identify_tzinfo(tz)}"
154 changes: 154 additions & 0 deletions src/icalendar/timezone/equivalent_timezone_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""This module helps identifying the timezone ids and where they differ.
The algorithm: We use the tzname and the utcoffset for each hour from
1970 - 2030.
We make a big map.
If they are equivalent, they are equivalent within the time that is mostly used.
You can regenerate the information from this module.
"""
from __future__ import annotations

from collections import defaultdict
from datetime import datetime, timedelta, tzinfo
from itertools import chain
from pprint import pprint
from zoneinfo import ZoneInfo, available_timezones


def main(start=datetime(1970, 1, 1), end=datetime(2030, 1, 1)):
"""Generate the module content."""
dts = []
dt = start
while dt <= end:
dts.append(dt)
dt += timedelta(hours=1)

def check(dt, tz:tzinfo):
return (dt, tz.utcoffset(dt), tz.tzname(dt))
def checks(tz:tzinfo) -> tuple:
return tuple(check(dt, tz) for dt in dts)

id2tzid = {}

m = defaultdict(list) # checks -> tzids
ids = list(sorted(available_timezones()))
print("Press Control+C for partial computation.")
tzs = list(map(ZoneInfo, ids))
try:
for i, tzid in enumerate(sorted(ids)):
m[checks(tzs[i])].append(tzid)
print(f"{i}/{len(ids)}")
except KeyboardInterrupt:
pass

print("The following ids are equivalent")
for ids in m.values():
print(ids)


def generate_count(ids_list:list[list]):
"""-> dt_id -> count"""
counts = defaultdict(int)
for ids in ids_list:
for dt_id in ids:
counts[dt_id] += 1
return counts

# we find identifying ids
result = defaultdict(list) # id -> tzids
print("Calculating most identifying date ids")
count = generate_count(m)
look = [(set(k), v) for k, v in m.items()]
ones = {k for k, v in count.items() if v == 1}
print("Calculating best dates to check")
date_count = defaultdict(int)
for dt_id in ones:
date_count[dt_id[0]] += 1
best_dates = sorted(date_count, key=date_count.__getitem__)
print("dates:")
for date in reversed(best_dates):
for tzid, tz in zip(ids, tzs):
for
# while look:
# for tz in tzs:
# if in ones:

# for i, (k, tzids) in enumerate(look):
# if dt_id in k:
# result[dt_id] = tzids
# look.pop(i)
# ones -= k
# break
# break
# print("Clearly identifying:")
# pprint(result)



# #
# # If we always take those with the count close to half of them,
# # we can create a binary decision tree.
# #
# def generate_tree(ids:list[list[list, list, list[str]]]):
# """Returns ((key), YES, NO)

# YES and NO are one of
# - (check, YES, NO)
# - [TZID]
# """
# print(f"generate_tree -> {len(m)}")
# if len(m) == 1:
# return m[list(m)[0]]
# half = len(m) / 2
# for dt_id, count in counts.items():
# if count == 1:
# ones.append(dt_id)
# # find count closest to half
# best_check = min(counts, key=lambda check: abs(counts[check] - half))
# yes = {}
# no = {}
# for cs, v in m.items():
# if best_check in cs:
# yes[cs] = v
# else:
# no[cs] = v
# return (
# best_check,
# generate_tree(yes),
# generate_tree(no)
# )

# start = [] # [more than once, once, tzids]
# counts = generate_count(m)
# for ids, tzids in m.items():
# ones = []
# more = []
# for dt_id in ids:
# if counts[dt_id] == 1:
# ones.append(dt_id)
# else:
# more.append(dt_id)
# start.append([more, ones, tzids])

tree = generate_tree(start, [])
pprint(tree)

# def tzids_from_tzinfo(tzinfo: tzinfo) -> tuple[str]:
# """Retrieve the timezone ids from the tzinfo object.

# Some of them might be equivalent, some of them are not.
# """
# if hasattr(tzinfo, 'zone'):
# return (tzinfo.zone,) # pytz implementation
# if hasattr(tzinfo, 'key'):
# return (tzinfo.key,) # ZoneInfo implementation
# if not _tzname_to_tzid:
# for tzid in :
# _tzname_to_tzid[_identify_tzinfo(ZoneInfo(tzid))] += (tzid,)
# return _tzname_to_tzid.get(_identify_tzinfo(tzinfo), ())



if __name__ == "__main__":
main()

0 comments on commit 5a3a54e

Please sign in to comment.