diff --git a/.gitignore b/.gitignore index 49528a2c2..da35552b6 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ build/* /venv .vscode/ +CONFIGFILE diff --git a/synapseclient/core/utils.py b/synapseclient/core/utils.py index b6a6a379a..b6e26b074 100644 --- a/synapseclient/core/utils.py +++ b/synapseclient/core/utils.py @@ -18,12 +18,13 @@ import sys import tempfile import threading +import typing import urllib.parse as urllib_parse import uuid import warnings -UNIX_EPOCH = datetime.datetime(1970, 1, 1, 0, 0) +UNIX_EPOCH = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) ISO_FORMAT = "%Y-%m-%dT%H:%M:%S.000Z" ISO_FORMAT_MICROS = "%Y-%m-%dT%H:%M:%S.%fZ" GB = 2**30 @@ -325,6 +326,25 @@ def is_synapse_id_str(obj): return None +def datetime_or_none(datetime_str: str) -> typing.Union[datetime.datetime, None]: + """Attempts to convert a string to a datetime object. Returns None if it fails. + + Some of the expected formats of datetime_str are: + - 2023-12-04T07:00:00Z + - 2001-01-01 15:00:00+07:00 + - 2001-01-01 15:00:00-07:00 + - 2023-12-04 07:00:00+00:00 + - 2019-01-01 + + :param datetime_str: The string to convert to a datetime object + :return: The datetime object or None if the conversion fails + """ + try: + return datetime.datetime.fromisoformat(datetime_str.replace("Z", "+00:00")) + except Exception: + return None + + def is_date(dt): """Objects of class datetime.date and datetime.datetime will be recognized as dates""" return isinstance(dt, datetime.date) or isinstance(dt, datetime.datetime) @@ -333,9 +353,18 @@ def is_date(dt): def to_list(value): """Convert the value (an iterable or a scalar value) to a list.""" if isinstance(value, collections.abc.Iterable) and not isinstance(value, str): - return list(value) + values = [] + for val in value: + possible_datetime = None + if isinstance(val, str): + possible_datetime = datetime_or_none(value) + values.append(val if possible_datetime is None else possible_datetime) + return values else: - return [value] + possible_datetime = None + if isinstance(value, str): + possible_datetime = datetime_or_none(value) + return [value if possible_datetime is None else possible_datetime] def _to_iterable(value): @@ -395,26 +424,48 @@ def make_bogus_binary_file(n=1 * KB, filepath=None, printprogress=False): return normalize_path(filepath) -def to_unix_epoch_time(dt): +def to_unix_epoch_time(dt: typing.Union[datetime.date, datetime.datetime, str]) -> int: """ Convert either `datetime.date or datetime.datetime objects `_ to UNIX time. """ - + if type(dt) == str: + dt = datetime.datetime.fromisoformat(dt.replace("Z", "+00:00")) if type(dt) == datetime.date: - return (dt - UNIX_EPOCH.date()).total_seconds() * 1000 - return int((dt - UNIX_EPOCH).total_seconds() * 1000) - - -def to_unix_epoch_time_secs(dt): + current_timezone = datetime.datetime.now().astimezone().tzinfo + datetime_utc = datetime.datetime.combine(dt, datetime.time(0, 0, 0, 0)).replace( + tzinfo=current_timezone + ) + else: + # If the datetime is not timezone aware, assume it is in the local timezone. + # This is required in order for windows to work with the `astimezone` method. + if dt.tzinfo is None: + current_timezone = datetime.datetime.now().astimezone().tzinfo + dt = dt.replace(tzinfo=current_timezone) + datetime_utc = dt.astimezone(datetime.timezone.utc) + return int((datetime_utc - UNIX_EPOCH).total_seconds() * 1000) + + +def to_unix_epoch_time_secs( + dt: typing.Union[datetime.date, datetime.datetime] +) -> float: """ Convert either `datetime.date or datetime.datetime objects `_ to UNIX time. """ - if type(dt) == datetime.date: - return (dt - UNIX_EPOCH.date()).total_seconds() - return (dt - UNIX_EPOCH).total_seconds() + current_timezone = datetime.datetime.now().astimezone().tzinfo + datetime_utc = datetime.datetime.combine(dt, datetime.time(0, 0, 0, 0)).replace( + tzinfo=current_timezone + ) + else: + # If the datetime is not timezone aware, assume it is in the local timezone. + # This is required in order for windows to work with the `astimezone` method. + if dt.tzinfo is None: + current_timezone = datetime.datetime.now().astimezone().tzinfo + dt = dt.replace(tzinfo=current_timezone) + datetime_utc = dt.astimezone(datetime.timezone.utc) + return (datetime_utc - UNIX_EPOCH).total_seconds() def from_unix_epoch_time_secs(secs): @@ -426,12 +477,23 @@ def from_unix_epoch_time_secs(secs): # so, here's a hack that enables ancient events, such as Chris's birthday to be # converted from milliseconds since the UNIX epoch to higher level Datetime objects. Ha! if platform.system() == "Windows" and secs < 0: - mirror_date = datetime.datetime.utcfromtimestamp(abs(secs)) - return UNIX_EPOCH - (mirror_date - UNIX_EPOCH) - return datetime.datetime.utcfromtimestamp(secs) + mirror_date = datetime.datetime.utcfromtimestamp(abs(secs)).replace( + tzinfo=datetime.timezone.utc + ) + + result = (UNIX_EPOCH - (mirror_date - UNIX_EPOCH)).replace( + tzinfo=datetime.timezone.utc + ) + + return result + datetime_instance = datetime.datetime.utcfromtimestamp(secs).replace( + tzinfo=datetime.timezone.utc + ) + + return datetime_instance -def from_unix_epoch_time(ms): +def from_unix_epoch_time(ms) -> datetime.datetime: """Returns a Datetime object given milliseconds since midnight Jan 1, 1970.""" if isinstance(ms, str): diff --git a/synapseutils/sync.py b/synapseutils/sync.py index a56c5cd9b..a4c88b161 100644 --- a/synapseutils/sync.py +++ b/synapseutils/sync.py @@ -11,7 +11,7 @@ from .monitor import notifyMe from synapseclient.entity import is_container from synapseclient.core import config -from synapseclient.core.utils import id_of, is_url, is_synapse_id_str +from synapseclient.core.utils import id_of, is_url, is_synapse_id_str, datetime_or_none from synapseclient import File, table from synapseclient.core.pool_provider import SingleThreadExecutor from synapseclient.core import utils @@ -1059,12 +1059,13 @@ def syncToSynapse( **Example manifest file** - =============== ======== ======= ======= =========================== ============================ - path parent annot1 annot2 used executed - =============== ======== ======= ======= =========================== ============================ - /path/file1.txt syn1243 "bar" 3.1415 "syn124;/path/file2.txt" "https://github.org/foo/bar" - /path/file2.txt syn12433 "baz" 2.71 "" "https://github.org/foo/baz" - =============== ======== ======= ======= =========================== ============================ + =============== ======== ======= ======= ========================= =========================== ============================ + path parent annot1 annot2 collection_date used executed + =============== ======== ======= ======= ========================= =========================== ============================ + /path/file1.txt syn1243 "bar" 3.1415 2023-12-04 07:00:00+00:00 "syn124;/path/file2.txt" "https://github.org/foo/bar" + /path/file2.txt syn12433 "baz" 2.71 2001-01-01 15:00:00+07:00 "" "https://github.org/foo/baz" + /path/file3.txt syn12455 "zzz" 3.52 2023-12-04T07:00:00Z "" "https://github.org/foo/zzz" + =============== ======== ======= ======= ========================= =========================== ============================ """ df = readManifestFile(syn, manifestFile) @@ -1115,7 +1116,18 @@ def _manifest_upload(syn, df): # if a item in the manifest upload is an empty string we do not want to upload that # as an empty string annotation - file.annotations = {k: v for k, v in annotations.items() if v != ""} + file_annotations = {} + + for annotation_key, annotation_value in annotations.items(): + if annotation_value is None or annotation_value == "": + continue + possible_datetime = None + if isinstance(annotation_value, str): + possible_datetime = datetime_or_none(annotation_value) + file_annotations[annotation_key] = ( + annotation_value if possible_datetime is None else possible_datetime + ) + file.annotations = file_annotations item = _SyncUploadItem( file, diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c34739a49..3c56ae2a1 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,7 +1,7 @@ import logging import platform import uuid -import os +import os, time import sys import shutil import tempfile @@ -151,3 +151,10 @@ def setup_otel(): ) else: trace.set_tracer_provider(TracerProvider(sampler=ALWAYS_OFF)) + + +@pytest.fixture(autouse=True) +def set_timezone(): + os.environ["TZ"] = "UTC" + if platform.system() != "Windows": + time.tzset() diff --git a/tests/integration/synapseclient/integration_test_Entity.py b/tests/integration/synapseclient/integration_test_Entity.py index 1ace6cff6..6f5be48a7 100644 --- a/tests/integration/synapseclient/integration_test_Entity.py +++ b/tests/integration/synapseclient/integration_test_Entity.py @@ -1,3 +1,4 @@ +import datetime import uuid import filecmp import os @@ -82,7 +83,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup): ), ("description= %s" % a_file.description) assert a_file["foo"][0] == "An arbitrary value", "foo= %s" % a_file["foo"][0] assert a_file["bar"] == [33, 44, 55] - assert a_file["bday"][0] == Datetime(2013, 3, 15) + assert a_file["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc) assert a_file.contentType == "text/flapdoodle", ( "contentType= %s" % a_file.contentType ) @@ -107,7 +108,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup): a_file = syn.store(a_file, forceVersion=False) assert a_file["foo"][0] == "Another arbitrary chunk of text data" assert a_file["bar"] == [33, 44, 55] - assert a_file["bday"][0] == Datetime(2013, 3, 15) + assert a_file["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc) assert a_file.new_key[0] == "A newly created value" assert a_file.path == path assert a_file.versionNumber == 1, "unexpected version number: " + str( @@ -134,7 +135,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup): link = syn.get(link, followLink=True) assert link["foo"][0] == "Another arbitrary chunk of text data" assert link["bar"] == [33, 44, 55] - assert link["bday"][0] == Datetime(2013, 3, 15) + assert link["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc) assert link.new_key[0] == "A newly created value" assert utils.equal_paths(link.path, path) assert link.versionNumber == 1, "unexpected version number: " + str( diff --git a/tests/integration/synapseclient/test_tables.py b/tests/integration/synapseclient/test_tables.py index f8ae4d877..7dbb646a0 100644 --- a/tests/integration/synapseclient/test_tables.py +++ b/tests/integration/synapseclient/test_tables.py @@ -5,7 +5,7 @@ import tempfile import time import uuid -from datetime import datetime +from datetime import datetime, timezone from pandas.testing import assert_frame_equal import pytest @@ -564,7 +564,9 @@ def test_synapse_integer_columns_with_missing_values_from_dataframe( @tracer.start_as_current_span("test_tables::test_store_table_datetime") def test_store_table_datetime(syn, project): - current_datetime = datetime.fromtimestamp(round(time.time(), 3)) + current_datetime = datetime.fromtimestamp(round(time.time(), 3)).replace( + tzinfo=timezone.utc + ) schema = syn.store( Schema("testTable", [Column(name="testerino", columnType="DATE")], project) ) diff --git a/tests/integration/synapseutils/test_synapseutils_sync.py b/tests/integration/synapseutils/test_synapseutils_sync.py index cd769d20d..757498459 100644 --- a/tests/integration/synapseutils/test_synapseutils_sync.py +++ b/tests/integration/synapseutils/test_synapseutils_sync.py @@ -4,6 +4,7 @@ import tempfile from func_timeout import FunctionTimedOut, func_set_timeout import pandas as pd +import numpy as np import pytest @@ -33,12 +34,15 @@ def __init__(self): self.f2 = utils.make_bogus_data_file(n=10) self.f3 = "https://www.synapse.org" - self.header = "path parent used executed activityName synapseStore foo\n" - self.row1 = '%s %s %s "%s;https://www.example.com" provName bar\n' % ( - self.f1, - self.project.id, - self.f2, - self.f3, + self.header = "path parent used executed activityName synapseStore foo date_1 datetime_1 datetime_2 datetime_3\n" + self.row1 = ( + '%s %s %s "%s;https://www.example.com" provName bar 2020-01-01 2023-12-04T07:00:00Z 2023-12-05 23:37:02.995000+00:00 2023-12-05 07:00:00+00:00\n' + % ( + self.f1, + self.project.id, + self.f2, + self.f3, + ) ) self.row2 = ( '%s %s "syn12" " syn123 ;https://www.example.com" provName2 bar\n' @@ -157,9 +161,30 @@ def test_syncToSynapse(test_state): assert ( orig_anots.shape[1] == new_anots.shape[1] ) # Verify that we have the same number of cols - assert new_anots.equals( - orig_anots.loc[:, new_anots.columns] - ), "Annotations different" + + assert new_anots.loc[:]["foo"].equals(orig_anots.loc[:]["foo"]) + # The dates in the manifest can accept a variety of formats, however we are always writing + # them back in the same expected format. Verify they're converted correctly. + assert new_anots.loc[:]["date_1"].tolist() == [ + "2020-01-01 00:00:00+00:00", + np.nan, + np.nan, + ] + assert new_anots.loc[:]["datetime_1"].tolist() == [ + "2023-12-04 07:00:00+00:00", + np.nan, + np.nan, + ] + assert new_anots.loc[:]["datetime_2"].tolist() == [ + "2023-12-05 23:37:02.995000+00:00", + np.nan, + np.nan, + ] + assert new_anots.loc[:]["datetime_3"].tolist() == [ + "2023-12-05 07:00:00+00:00", + np.nan, + np.nan, + ] # Validate that provenance is correct for provenanceType in ["executed", "used"]: diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 6808b458c..54c8d80ec 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,8 +1,10 @@ import logging +import platform import urllib.request from unittest import mock import pytest +import os, time from synapseclient import Synapse from synapseclient.core.logging_setup import SILENT_LOGGER_NAME @@ -42,6 +44,13 @@ def test_confirm_connections_blocked(): assert _BLOCKED_CONNECTION_MESSAGE == str(cm_ex.value) +@pytest.fixture(autouse=True) +def set_timezone(): + os.environ["TZ"] = "UTC" + if platform.system() != "Windows": + time.tzset() + + @pytest.fixture(scope="session") def syn(): """ diff --git a/tests/unit/synapseclient/unit_test_annotations.py b/tests/unit/synapseclient/unit_test_annotations.py index 7e8b60685..b5a6e4be3 100644 --- a/tests/unit/synapseclient/unit_test_annotations.py +++ b/tests/unit/synapseclient/unit_test_annotations.py @@ -2,6 +2,7 @@ ############################################################ from datetime import datetime as Datetime +import datetime from math import pi import time import uuid @@ -155,10 +156,10 @@ def test_round_trip_annotations(): "zoo": [123.1, 456.2, 789.3], "species": ["Moose"], "birthdays": [ - Datetime(1969, 4, 28), - Datetime(1973, 12, 8), - Datetime(2008, 1, 3), - Datetime(2013, 3, 15), + Datetime(1969, 4, 28, tzinfo=datetime.timezone.utc), + Datetime(1973, 12, 8, tzinfo=datetime.timezone.utc), + Datetime(2008, 1, 3, tzinfo=datetime.timezone.utc), + Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc), ], "facts": [ True, @@ -206,7 +207,7 @@ def test_idempotent_annotations(): def test_submission_status_annotations_round_trip(): - april_28_1969 = Datetime(1969, 4, 28) + april_28_1969 = Datetime(1969, 4, 28, tzinfo=datetime.timezone.utc) a = Annotations( "syn123", "7bdb83e9-a50a-46e4-987a-4962559f090f",