From 8f6b91da60b552cd9e5e1b59d3352f86f4790c3f Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Tue, 17 Nov 2020 12:59:04 +0100 Subject: [PATCH 1/3] perf: speedup date parsing using ciso8601 --- .github/workflows/main.yml | 5 ++++- jupyter_client/jsonutil.py | 9 +++++++++ jupyter_client/tests/test_session.py | 18 ++++++++++++++++++ setup.py | 2 +- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e5bbd1444..18c0e2bf6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,6 +37,9 @@ jobs: pip install --upgrade --upgrade-strategy eager --pre -e .[test] pytest-cov codecov 'coverage<5' pip freeze - name: Run the tests - run: py.test --cov jupyter_client jupyter_client + run: | + py.test --cov jupyter_client jupyter_client + pip install ciso8601 + py.test --cov jupyter_client jupyter_client - name: Code coverage run: codecov diff --git a/jupyter_client/jsonutil.py b/jupyter_client/jsonutil.py index d3a472fee..f5ea9f20f 100644 --- a/jupyter_client/jsonutil.py +++ b/jupyter_client/jsonutil.py @@ -6,6 +6,10 @@ from datetime import datetime import re import warnings +try: + import ciso8601 +except ImportError: + ciso8601 = None from dateutil.parser import parse as _dateutil_parse from dateutil.tz import tzlocal @@ -50,6 +54,11 @@ def parse_date(s): """ if s is None: return s + if ciso8601 is not None: + try: + return _ensure_tzinfo(ciso8601.parse_datetime(s)) + except ValueError: + return s m = ISO8601_PAT.match(s) if m: dt = _dateutil_parse(s) diff --git a/jupyter_client/tests/test_session.py b/jupyter_client/tests/test_session.py index d0ec66261..ff2859877 100644 --- a/jupyter_client/tests/test_session.py +++ b/jupyter_client/tests/test_session.py @@ -344,3 +344,21 @@ def test_clone(self): s._add_digest(digest) assert digest in s.digest_history assert digest not in s2.digest_history + + +@pytest.mark.usefixtures('no_copy_threshold', 'benchmark') +class TestPerformance(SessionTestCase): + @pytest.fixture(autouse=True) + def _request_benchmark(self, benchmark): + self.benchmark = benchmark + + def test_deserialize_performance(self): + def run(data): + self.session.digest_history = [] + self.session.deserialize(self.session.feed_identities(data)[1]) + content = dict(t=ss.utcnow()) + metadata = dict(t=ss.utcnow()) + p = self.session.msg('msg') + msg = self.session.msg('msg', content=content, metadata=metadata, parent=p['header']) + data = self.session.serialize(msg) + self.benchmark(run, data) diff --git a/setup.py b/setup.py index 2da642d5c..912e8113c 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,7 @@ def run(self): ], python_requires = '>=3.5', extras_require = { - 'test': ['ipykernel', 'ipython', 'mock', 'pytest', 'pytest-asyncio', 'async_generator', 'pytest-timeout'], + 'test': ['ipykernel', 'ipython', 'mock', 'pytest', 'pytest-asyncio', 'async_generator', 'pytest-timeout', 'pytest-benchmark'], 'doc': open('docs/requirements.txt').read().splitlines(), }, cmdclass = { From 898aa89946d0eba113fe17ac473c331639bdc644 Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Tue, 17 Nov 2020 13:22:28 +0100 Subject: [PATCH 2/3] rely on the regex for check --- jupyter_client/jsonutil.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/jupyter_client/jsonutil.py b/jupyter_client/jsonutil.py index f5ea9f20f..d63f06e1b 100644 --- a/jupyter_client/jsonutil.py +++ b/jupyter_client/jsonutil.py @@ -54,14 +54,12 @@ def parse_date(s): """ if s is None: return s - if ciso8601 is not None: - try: - return _ensure_tzinfo(ciso8601.parse_datetime(s)) - except ValueError: - return s m = ISO8601_PAT.match(s) if m: - dt = _dateutil_parse(s) + if ciso8601 is not None: + dt = ciso8601.parse_datetime(s) + else: + dt = _dateutil_parse(s) return _ensure_tzinfo(dt) return s From d164f41b8e3ee0f1f717f20fa7d34ef4d8535b55 Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Tue, 17 Nov 2020 16:52:50 +0100 Subject: [PATCH 3/3] assume where the date fields are --- jupyter_client/session.py | 10 +++++++--- jupyter_client/tests/test_performance.py | 23 +++++++++++++++++++++++ jupyter_client/tests/test_session.py | 17 ----------------- 3 files changed, 30 insertions(+), 20 deletions(-) create mode 100644 jupyter_client/tests/test_performance.py diff --git a/jupyter_client/session.py b/jupyter_client/session.py index 437dc6173..bd2af25f4 100644 --- a/jupyter_client/session.py +++ b/jupyter_client/session.py @@ -38,7 +38,7 @@ from traitlets.config.configurable import Configurable, LoggingConfigurable from ipython_genutils.importstring import import_item -from jupyter_client.jsonutil import extract_dates, squash_dates, date_default +from jupyter_client.jsonutil import extract_dates, squash_dates, date_default, parse_date from ipython_genutils.py3compat import str_to_bytes, str_to_unicode from traitlets import ( CBytes, Unicode, Bool, Any, Instance, Set, DottedObjectName, CUnicode, @@ -928,10 +928,14 @@ def deserialize(self, msg_list, content=True, copy=True): if not len(msg_list) >= minlen: raise TypeError("malformed message, must have at least %i elements"%minlen) header = self.unpack(msg_list[1]) - message['header'] = extract_dates(header) + + message['header'] = header + message['header']['date'] = parse_date(message['header']['date']) message['msg_id'] = header['msg_id'] message['msg_type'] = header['msg_type'] - message['parent_header'] = extract_dates(self.unpack(msg_list[2])) + message['parent_header'] = self.unpack(msg_list[2]) + if 'date' in message['parent_header']: + message['parent_header']['date'] = parse_date(message['parent_header']['date']) message['metadata'] = self.unpack(msg_list[3]) if content: message['content'] = self.unpack(msg_list[4]) diff --git a/jupyter_client/tests/test_performance.py b/jupyter_client/tests/test_performance.py new file mode 100644 index 000000000..9c19f88f4 --- /dev/null +++ b/jupyter_client/tests/test_performance.py @@ -0,0 +1,23 @@ +import pytest + +from jupyter_client import session as ss + +from .test_session import SessionTestCase + +@pytest.mark.usefixtures('benchmark') +class TestPerformance(SessionTestCase): + @pytest.fixture(autouse=True) + def _request_benchmark(self, benchmark): + self.benchmark = benchmark + + def test_deserialize_performance(self): + def run(data): + self.session.digest_history = [] + self.session.deserialize(self.session.feed_identities(data)[1]) + content = dict(t=ss.utcnow()) + metadata = dict(t=ss.utcnow()) + self.session.auth = None + p = self.session.msg('msg') + msg = self.session.msg('msg', content=content, metadata=metadata, parent=p['header']) + data = self.session.serialize(msg) + self.benchmark(run, data) diff --git a/jupyter_client/tests/test_session.py b/jupyter_client/tests/test_session.py index ff2859877..66dacc948 100644 --- a/jupyter_client/tests/test_session.py +++ b/jupyter_client/tests/test_session.py @@ -345,20 +345,3 @@ def test_clone(self): assert digest in s.digest_history assert digest not in s2.digest_history - -@pytest.mark.usefixtures('no_copy_threshold', 'benchmark') -class TestPerformance(SessionTestCase): - @pytest.fixture(autouse=True) - def _request_benchmark(self, benchmark): - self.benchmark = benchmark - - def test_deserialize_performance(self): - def run(data): - self.session.digest_history = [] - self.session.deserialize(self.session.feed_identities(data)[1]) - content = dict(t=ss.utcnow()) - metadata = dict(t=ss.utcnow()) - p = self.session.msg('msg') - msg = self.session.msg('msg', content=content, metadata=metadata, parent=p['header']) - data = self.session.serialize(msg) - self.benchmark(run, data)