Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYNPY-1358] Correction of timestamp in annotations from manifest file #1020

Merged
merged 6 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ build/*
/venv

.vscode/
CONFIGFILE
96 changes: 79 additions & 17 deletions synapseclient/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
import sys
import tempfile
import threading
import typing
import urllib.parse as urllib_parse
import uuid
import warnings


UNIX_EPOCH = datetime.datetime(1970, 1, 1, 0, 0)
UNIX_EPOCH = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)
ISO_FORMAT = "%Y-%m-%dT%H:%M:%S.000Z"
ISO_FORMAT_MICROS = "%Y-%m-%dT%H:%M:%S.%fZ"
GB = 2**30
Expand Down Expand Up @@ -325,6 +326,25 @@ def is_synapse_id_str(obj):
return None


def datetime_or_none(datetime_str: str) -> typing.Union[datetime.datetime, None]:
BryanFauble marked this conversation as resolved.
Show resolved Hide resolved
"""Attempts to convert a string to a datetime object. Returns None if it fails.

Some of the expected formats of datetime_str are:
- 2023-12-04T07:00:00Z
- 2001-01-01 15:00:00+07:00
- 2001-01-01 15:00:00-07:00
- 2023-12-04 07:00:00+00:00
- 2019-01-01

:param datetime_str: The string to convert to a datetime object
:return: The datetime object or None if the conversion fails
"""
try:
return datetime.datetime.fromisoformat(datetime_str.replace("Z", "+00:00"))
except Exception:
return None


def is_date(dt):
"""Objects of class datetime.date and datetime.datetime will be recognized as dates"""
return isinstance(dt, datetime.date) or isinstance(dt, datetime.datetime)
Expand All @@ -333,9 +353,18 @@ def is_date(dt):
def to_list(value):
"""Convert the value (an iterable or a scalar value) to a list."""
if isinstance(value, collections.abc.Iterable) and not isinstance(value, str):
return list(value)
values = []
for val in value:
possible_datetime = None
if isinstance(val, str):
possible_datetime = datetime_or_none(value)
values.append(val if possible_datetime is None else possible_datetime)
return values
else:
return [value]
possible_datetime = None
if isinstance(value, str):
possible_datetime = datetime_or_none(value)
return [value if possible_datetime is None else possible_datetime]


def _to_iterable(value):
Expand Down Expand Up @@ -395,26 +424,48 @@ def make_bogus_binary_file(n=1 * KB, filepath=None, printprogress=False):
return normalize_path(filepath)


def to_unix_epoch_time(dt):
def to_unix_epoch_time(dt: typing.Union[datetime.date, datetime.datetime, str]) -> int:
"""
Convert either `datetime.date or datetime.datetime objects <http://docs.python.org/2/library/datetime.html>`_
to UNIX time.
"""

if type(dt) == str:
dt = datetime.datetime.fromisoformat(dt.replace("Z", "+00:00"))
if type(dt) == datetime.date:
return (dt - UNIX_EPOCH.date()).total_seconds() * 1000
return int((dt - UNIX_EPOCH).total_seconds() * 1000)


def to_unix_epoch_time_secs(dt):
current_timezone = datetime.datetime.now().astimezone().tzinfo
datetime_utc = datetime.datetime.combine(dt, datetime.time(0, 0, 0, 0)).replace(
tzinfo=current_timezone
)
else:
# If the datetime is not timezone aware, assume it is in the local timezone.
# This is required in order for windows to work with the `astimezone` method.
if dt.tzinfo is None:
current_timezone = datetime.datetime.now().astimezone().tzinfo
dt = dt.replace(tzinfo=current_timezone)
datetime_utc = dt.astimezone(datetime.timezone.utc)
return int((datetime_utc - UNIX_EPOCH).total_seconds() * 1000)


def to_unix_epoch_time_secs(
dt: typing.Union[datetime.date, datetime.datetime]
) -> float:
"""
Convert either `datetime.date or datetime.datetime objects <http://docs.python.org/2/library/datetime.html>`_
to UNIX time.
"""

if type(dt) == datetime.date:
return (dt - UNIX_EPOCH.date()).total_seconds()
return (dt - UNIX_EPOCH).total_seconds()
current_timezone = datetime.datetime.now().astimezone().tzinfo
datetime_utc = datetime.datetime.combine(dt, datetime.time(0, 0, 0, 0)).replace(
tzinfo=current_timezone
)
else:
# If the datetime is not timezone aware, assume it is in the local timezone.
# This is required in order for windows to work with the `astimezone` method.
if dt.tzinfo is None:
current_timezone = datetime.datetime.now().astimezone().tzinfo
dt = dt.replace(tzinfo=current_timezone)
datetime_utc = dt.astimezone(datetime.timezone.utc)
return (datetime_utc - UNIX_EPOCH).total_seconds()


def from_unix_epoch_time_secs(secs):
Expand All @@ -426,12 +477,23 @@ def from_unix_epoch_time_secs(secs):
# so, here's a hack that enables ancient events, such as Chris's birthday to be
# converted from milliseconds since the UNIX epoch to higher level Datetime objects. Ha!
if platform.system() == "Windows" and secs < 0:
mirror_date = datetime.datetime.utcfromtimestamp(abs(secs))
return UNIX_EPOCH - (mirror_date - UNIX_EPOCH)
return datetime.datetime.utcfromtimestamp(secs)
mirror_date = datetime.datetime.utcfromtimestamp(abs(secs)).replace(
tzinfo=datetime.timezone.utc
)

result = (UNIX_EPOCH - (mirror_date - UNIX_EPOCH)).replace(
tzinfo=datetime.timezone.utc
)

return result
datetime_instance = datetime.datetime.utcfromtimestamp(secs).replace(
tzinfo=datetime.timezone.utc
)

return datetime_instance


def from_unix_epoch_time(ms):
def from_unix_epoch_time(ms) -> datetime.datetime:
"""Returns a Datetime object given milliseconds since midnight Jan 1, 1970."""

if isinstance(ms, str):
Expand Down
28 changes: 20 additions & 8 deletions synapseutils/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .monitor import notifyMe
from synapseclient.entity import is_container
from synapseclient.core import config
from synapseclient.core.utils import id_of, is_url, is_synapse_id_str
from synapseclient.core.utils import id_of, is_url, is_synapse_id_str, datetime_or_none
from synapseclient import File, table
from synapseclient.core.pool_provider import SingleThreadExecutor
from synapseclient.core import utils
Expand Down Expand Up @@ -1059,12 +1059,13 @@ def syncToSynapse(

**Example manifest file**

=============== ======== ======= ======= =========================== ============================
path parent annot1 annot2 used executed
=============== ======== ======= ======= =========================== ============================
/path/file1.txt syn1243 "bar" 3.1415 "syn124;/path/file2.txt" "https://github.org/foo/bar"
/path/file2.txt syn12433 "baz" 2.71 "" "https://github.org/foo/baz"
=============== ======== ======= ======= =========================== ============================
=============== ======== ======= ======= ========================= =========================== ============================
path parent annot1 annot2 collection_date used executed
=============== ======== ======= ======= ========================= =========================== ============================
/path/file1.txt syn1243 "bar" 3.1415 2023-12-04 07:00:00+00:00 "syn124;/path/file2.txt" "https://github.org/foo/bar"
/path/file2.txt syn12433 "baz" 2.71 2001-01-01 15:00:00+07:00 "" "https://github.org/foo/baz"
/path/file3.txt syn12455 "zzz" 3.52 2023-12-04T07:00:00Z "" "https://github.org/foo/zzz"
=============== ======== ======= ======= ========================= =========================== ============================

"""
df = readManifestFile(syn, manifestFile)
Expand Down Expand Up @@ -1115,7 +1116,18 @@ def _manifest_upload(syn, df):

# if a item in the manifest upload is an empty string we do not want to upload that
# as an empty string annotation
file.annotations = {k: v for k, v in annotations.items() if v != ""}
file_annotations = {}

for annotation_key, annotation_value in annotations.items():
if annotation_value is None or annotation_value == "":
continue
possible_datetime = None
if isinstance(annotation_value, str):
possible_datetime = datetime_or_none(annotation_value)
file_annotations[annotation_key] = (
annotation_value if possible_datetime is None else possible_datetime
)
file.annotations = file_annotations

item = _SyncUploadItem(
file,
Expand Down
9 changes: 8 additions & 1 deletion tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import platform
import uuid
import os
import os, time
import sys
import shutil
import tempfile
Expand Down Expand Up @@ -151,3 +151,10 @@ def setup_otel():
)
else:
trace.set_tracer_provider(TracerProvider(sampler=ALWAYS_OFF))


@pytest.fixture(autouse=True)
def set_timezone():
os.environ["TZ"] = "UTC"
if platform.system() != "Windows":
time.tzset()
7 changes: 4 additions & 3 deletions tests/integration/synapseclient/integration_test_Entity.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import uuid
import filecmp
import os
Expand Down Expand Up @@ -82,7 +83,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
), ("description= %s" % a_file.description)
assert a_file["foo"][0] == "An arbitrary value", "foo= %s" % a_file["foo"][0]
assert a_file["bar"] == [33, 44, 55]
assert a_file["bday"][0] == Datetime(2013, 3, 15)
assert a_file["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc)
assert a_file.contentType == "text/flapdoodle", (
"contentType= %s" % a_file.contentType
)
Expand All @@ -107,7 +108,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
a_file = syn.store(a_file, forceVersion=False)
assert a_file["foo"][0] == "Another arbitrary chunk of text data"
assert a_file["bar"] == [33, 44, 55]
assert a_file["bday"][0] == Datetime(2013, 3, 15)
assert a_file["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc)
assert a_file.new_key[0] == "A newly created value"
assert a_file.path == path
assert a_file.versionNumber == 1, "unexpected version number: " + str(
Expand All @@ -134,7 +135,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
link = syn.get(link, followLink=True)
assert link["foo"][0] == "Another arbitrary chunk of text data"
assert link["bar"] == [33, 44, 55]
assert link["bday"][0] == Datetime(2013, 3, 15)
assert link["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc)
assert link.new_key[0] == "A newly created value"
assert utils.equal_paths(link.path, path)
assert link.versionNumber == 1, "unexpected version number: " + str(
Expand Down
6 changes: 4 additions & 2 deletions tests/integration/synapseclient/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import tempfile
import time
import uuid
from datetime import datetime
from datetime import datetime, timezone

from pandas.testing import assert_frame_equal
import pytest
Expand Down Expand Up @@ -564,7 +564,9 @@ def test_synapse_integer_columns_with_missing_values_from_dataframe(

@tracer.start_as_current_span("test_tables::test_store_table_datetime")
def test_store_table_datetime(syn, project):
current_datetime = datetime.fromtimestamp(round(time.time(), 3))
current_datetime = datetime.fromtimestamp(round(time.time(), 3)).replace(
tzinfo=timezone.utc
)
schema = syn.store(
Schema("testTable", [Column(name="testerino", columnType="DATE")], project)
)
Expand Down
43 changes: 34 additions & 9 deletions tests/integration/synapseutils/test_synapseutils_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile
from func_timeout import FunctionTimedOut, func_set_timeout
import pandas as pd
import numpy as np

import pytest

Expand Down Expand Up @@ -33,12 +34,15 @@ def __init__(self):
self.f2 = utils.make_bogus_data_file(n=10)
self.f3 = "https://www.synapse.org"

self.header = "path parent used executed activityName synapseStore foo\n"
self.row1 = '%s %s %s "%s;https://www.example.com" provName bar\n' % (
self.f1,
self.project.id,
self.f2,
self.f3,
self.header = "path parent used executed activityName synapseStore foo date_1 datetime_1 datetime_2 datetime_3\n"
self.row1 = (
'%s %s %s "%s;https://www.example.com" provName bar 2020-01-01 2023-12-04T07:00:00Z 2023-12-05 23:37:02.995000+00:00 2023-12-05 07:00:00+00:00\n'
% (
self.f1,
self.project.id,
self.f2,
self.f3,
)
)
self.row2 = (
'%s %s "syn12" " syn123 ;https://www.example.com" provName2 bar\n'
Expand Down Expand Up @@ -157,9 +161,30 @@ def test_syncToSynapse(test_state):
assert (
orig_anots.shape[1] == new_anots.shape[1]
) # Verify that we have the same number of cols
assert new_anots.equals(
orig_anots.loc[:, new_anots.columns]
), "Annotations different"

assert new_anots.loc[:]["foo"].equals(orig_anots.loc[:]["foo"])
# The dates in the manifest can accept a variety of formats, however we are always writing
# them back in the same expected format. Verify they're converted correctly.
assert new_anots.loc[:]["date_1"].tolist() == [
"2020-01-01 00:00:00+00:00",
np.nan,
np.nan,
]
assert new_anots.loc[:]["datetime_1"].tolist() == [
"2023-12-04 07:00:00+00:00",
np.nan,
np.nan,
]
assert new_anots.loc[:]["datetime_2"].tolist() == [
"2023-12-05 23:37:02.995000+00:00",
np.nan,
np.nan,
]
assert new_anots.loc[:]["datetime_3"].tolist() == [
"2023-12-05 07:00:00+00:00",
np.nan,
np.nan,
]

# Validate that provenance is correct
for provenanceType in ["executed", "used"]:
Expand Down
9 changes: 9 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging
import platform
import urllib.request

from unittest import mock
import pytest
import os, time

from synapseclient import Synapse
from synapseclient.core.logging_setup import SILENT_LOGGER_NAME
Expand Down Expand Up @@ -42,6 +44,13 @@ def test_confirm_connections_blocked():
assert _BLOCKED_CONNECTION_MESSAGE == str(cm_ex.value)


@pytest.fixture(autouse=True)
def set_timezone():
os.environ["TZ"] = "UTC"
if platform.system() != "Windows":
time.tzset()


@pytest.fixture(scope="session")
def syn():
"""
Expand Down
11 changes: 6 additions & 5 deletions tests/unit/synapseclient/unit_test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
############################################################

from datetime import datetime as Datetime
import datetime
from math import pi
import time
import uuid
Expand Down Expand Up @@ -155,10 +156,10 @@ def test_round_trip_annotations():
"zoo": [123.1, 456.2, 789.3],
"species": ["Moose"],
"birthdays": [
Datetime(1969, 4, 28),
Datetime(1973, 12, 8),
Datetime(2008, 1, 3),
Datetime(2013, 3, 15),
Datetime(1969, 4, 28, tzinfo=datetime.timezone.utc),
Datetime(1973, 12, 8, tzinfo=datetime.timezone.utc),
Datetime(2008, 1, 3, tzinfo=datetime.timezone.utc),
Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc),
],
"facts": [
True,
Expand Down Expand Up @@ -206,7 +207,7 @@ def test_idempotent_annotations():


def test_submission_status_annotations_round_trip():
april_28_1969 = Datetime(1969, 4, 28)
april_28_1969 = Datetime(1969, 4, 28, tzinfo=datetime.timezone.utc)
a = Annotations(
"syn123",
"7bdb83e9-a50a-46e4-987a-4962559f090f",
Expand Down
Loading