Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: speedup date parsing using ciso8601 #590

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ jobs:
pip install --upgrade --upgrade-strategy eager --pre -e .[test] pytest-cov codecov 'coverage<5'
pip freeze
- name: Run the tests
run: py.test --cov jupyter_client jupyter_client
run: |
py.test --cov jupyter_client jupyter_client
pip install ciso8601
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be added to the existing pip install , one step above, instead of being part of the test stage?

py.test --cov jupyter_client jupyter_client
- name: Code coverage
run: codecov
9 changes: 8 additions & 1 deletion jupyter_client/jsonutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from datetime import datetime
import re
import warnings
try:
import ciso8601
except ImportError:
ciso8601 = None

from dateutil.parser import parse as _dateutil_parse
from dateutil.tz import tzlocal
Expand Down Expand Up @@ -52,7 +56,10 @@ def parse_date(s):
return s
m = ISO8601_PAT.match(s)
if m:
dt = _dateutil_parse(s)
if ciso8601 is not None:
dt = ciso8601.parse_datetime(s)
else:
dt = _dateutil_parse(s)
return _ensure_tzinfo(dt)
return s

Expand Down
10 changes: 7 additions & 3 deletions jupyter_client/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from traitlets.config.configurable import Configurable, LoggingConfigurable
from ipython_genutils.importstring import import_item
from jupyter_client.jsonutil import extract_dates, squash_dates, date_default
from jupyter_client.jsonutil import extract_dates, squash_dates, date_default, parse_date
from ipython_genutils.py3compat import str_to_bytes, str_to_unicode
from traitlets import (
CBytes, Unicode, Bool, Any, Instance, Set, DottedObjectName, CUnicode,
Expand Down Expand Up @@ -928,10 +928,14 @@ def deserialize(self, msg_list, content=True, copy=True):
if not len(msg_list) >= minlen:
raise TypeError("malformed message, must have at least %i elements"%minlen)
header = self.unpack(msg_list[1])
message['header'] = extract_dates(header)

message['header'] = header
message['header']['date'] = parse_date(message['header']['date'])
message['msg_id'] = header['msg_id']
message['msg_type'] = header['msg_type']
message['parent_header'] = extract_dates(self.unpack(msg_list[2]))
message['parent_header'] = self.unpack(msg_list[2])
if 'date' in message['parent_header']:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will break current stable ipyparallel, which currently relies on the behavior of parsing any date-like strong, but I can deal with that. I never should have made it find and parse any valid date objects!

I suspect that parsing just the date will break exactly as many things as disabling parsing entirely, though. Which is to say: ipyparallel and probably nothing else.

message['parent_header']['date'] = parse_date(message['parent_header']['date'])
message['metadata'] = self.unpack(msg_list[3])
if content:
message['content'] = self.unpack(msg_list[4])
Expand Down
23 changes: 23 additions & 0 deletions jupyter_client/tests/test_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest

from jupyter_client import session as ss

from .test_session import SessionTestCase

@pytest.mark.usefixtures('benchmark')
class TestPerformance(SessionTestCase):
@pytest.fixture(autouse=True)
def _request_benchmark(self, benchmark):
self.benchmark = benchmark

def test_deserialize_performance(self):
def run(data):
self.session.digest_history = []
self.session.deserialize(self.session.feed_identities(data)[1])
content = dict(t=ss.utcnow())
metadata = dict(t=ss.utcnow())
self.session.auth = None
p = self.session.msg('msg')
msg = self.session.msg('msg', content=content, metadata=metadata, parent=p['header'])
data = self.session.serialize(msg)
self.benchmark(run, data)
1 change: 1 addition & 0 deletions jupyter_client/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,4 @@ def test_clone(self):
s._add_digest(digest)
assert digest in s.digest_history
assert digest not in s2.digest_history

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def run(self):
],
python_requires = '>=3.5',
extras_require = {
'test': ['ipykernel', 'ipython', 'mock', 'pytest', 'pytest-asyncio', 'async_generator', 'pytest-timeout'],
'test': ['ipykernel', 'ipython', 'mock', 'pytest', 'pytest-asyncio', 'async_generator', 'pytest-timeout', 'pytest-benchmark'],
'doc': open('docs/requirements.txt').read().splitlines(),
},
cmdclass = {
Expand Down