From 5487421d94aea5007e2c509fcc83c9f4a4352979 Mon Sep 17 00:00:00 2001 From: Etienne Stalmans Date: Tue, 1 Oct 2024 12:30:55 +0200 Subject: [PATCH 1/4] chore: adding a logger masking filter Use a logging.Filter to redact JWT tokens that may be in log messages. --- realtime/_async/channel.py | 4 ++-- realtime/_async/client.py | 3 ++- realtime/_async/push.py | 3 ++- realtime/logging_util.py | 19 +++++++++++++++++++ 4 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 realtime/logging_util.py diff --git a/realtime/_async/channel.py b/realtime/_async/channel.py index b39ddbd..07c3c7e 100644 --- a/realtime/_async/channel.py +++ b/realtime/_async/channel.py @@ -15,7 +15,7 @@ RealtimePresenceState, RealtimeSubscribeStates, ) - +from ..logging_util import TokenMaskingFilter from ..transformers import http_endpoint_url from .presence import ( AsyncRealtimePresence, @@ -29,7 +29,7 @@ from .client import AsyncRealtimeClient logger = logging.getLogger(__name__) - +logger.addFilter(TokenMaskingFilter()) class AsyncRealtimeChannel: """ diff --git a/realtime/_async/client.py b/realtime/_async/client.py index fda8487..3d07349 100644 --- a/realtime/_async/client.py +++ b/realtime/_async/client.py @@ -8,6 +8,7 @@ import websockets from ..exceptions import NotConnectedError +from ..logging_util import TokenMaskingFilter from ..message import Message from ..transformers import http_endpoint_url from ..types import ( @@ -21,7 +22,7 @@ from .channel import AsyncRealtimeChannel, RealtimeChannelOptions logger = logging.getLogger(__name__) - +logger.addFilter(TokenMaskingFilter()) def ensure_connection(func: Callback): @wraps(func) diff --git a/realtime/_async/push.py b/realtime/_async/push.py index 06c62fb..f1950e1 100644 --- a/realtime/_async/push.py +++ b/realtime/_async/push.py @@ -2,13 +2,14 @@ import logging from typing import TYPE_CHECKING, Any, Dict, List, Optional +from ..logging_util import TokenMaskingFilter from ..types import DEFAULT_TIMEOUT, Callback, _Hook if TYPE_CHECKING: from .channel import AsyncRealtimeChannel logger = logging.getLogger(__name__) - +logger.addFilter(TokenMaskingFilter()) class AsyncPush: def __init__( diff --git a/realtime/logging_util.py b/realtime/logging_util.py new file mode 100644 index 0000000..405990b --- /dev/null +++ b/realtime/logging_util.py @@ -0,0 +1,19 @@ +import logging +import re + +redact = r"(eyJh\w*\.)(\w*)\." + +class TokenMaskingFilter(logging.Filter): + """Mask access_tokens in logs""" + + def filter(self, record): + record.msg = self.sanitize_line(record.msg) + return True + + @staticmethod + def sanitize_line(line): + def gred(g): + """Redact the payload of the JWT, keeping the header and signature""" + return f"{g.group(1)}REDACTED." if len(g.groups()) > 1 else g + + return re.sub(redact, gred, line) \ No newline at end of file From d4a083e9be93fbed25dce48b13e7a02d6c442d41 Mon Sep 17 00:00:00 2001 From: Etienne Stalmans Date: Tue, 1 Oct 2024 12:58:12 +0200 Subject: [PATCH 2/4] fix formatting --- realtime/_async/channel.py | 2 ++ realtime/_async/client.py | 1 + realtime/_async/push.py | 1 + realtime/logging_util.py | 3 ++- 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/realtime/_async/channel.py b/realtime/_async/channel.py index 07c3c7e..b4b3069 100644 --- a/realtime/_async/channel.py +++ b/realtime/_async/channel.py @@ -15,6 +15,7 @@ RealtimePresenceState, RealtimeSubscribeStates, ) + from ..logging_util import TokenMaskingFilter from ..transformers import http_endpoint_url from .presence import ( @@ -31,6 +32,7 @@ logger = logging.getLogger(__name__) logger.addFilter(TokenMaskingFilter()) + class AsyncRealtimeChannel: """ `Channel` is an abstraction for a topic listener for an existing socket connection. diff --git a/realtime/_async/client.py b/realtime/_async/client.py index 3d07349..478997a 100644 --- a/realtime/_async/client.py +++ b/realtime/_async/client.py @@ -24,6 +24,7 @@ logger = logging.getLogger(__name__) logger.addFilter(TokenMaskingFilter()) + def ensure_connection(func: Callback): @wraps(func) def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval: diff --git a/realtime/_async/push.py b/realtime/_async/push.py index f1950e1..fe17127 100644 --- a/realtime/_async/push.py +++ b/realtime/_async/push.py @@ -11,6 +11,7 @@ logger = logging.getLogger(__name__) logger.addFilter(TokenMaskingFilter()) + class AsyncPush: def __init__( self, diff --git a/realtime/logging_util.py b/realtime/logging_util.py index 405990b..7ed8f45 100644 --- a/realtime/logging_util.py +++ b/realtime/logging_util.py @@ -3,6 +3,7 @@ redact = r"(eyJh\w*\.)(\w*)\." + class TokenMaskingFilter(logging.Filter): """Mask access_tokens in logs""" @@ -16,4 +17,4 @@ def gred(g): """Redact the payload of the JWT, keeping the header and signature""" return f"{g.group(1)}REDACTED." if len(g.groups()) > 1 else g - return re.sub(redact, gred, line) \ No newline at end of file + return re.sub(redact, gred, line) From f38f35bda79ee0d981098c7d547908526e3a7764 Mon Sep 17 00:00:00 2001 From: Etienne Stalmans Date: Wed, 2 Oct 2024 10:46:18 +0200 Subject: [PATCH 3/4] updated regex for finding JWT --- realtime/logging_util.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/realtime/logging_util.py b/realtime/logging_util.py index 7ed8f45..5f50087 100644 --- a/realtime/logging_util.py +++ b/realtime/logging_util.py @@ -1,7 +1,11 @@ import logging import re -redact = r"(eyJh\w*\.)(\w*)\." +# redaction regex for detecting JWT tokens +#
.. +# character set [a-zA-Z0-9_-] +# \w covers [a-zA-Z0-9] +redact = r"(eyJh[-_\w]*\.)([-_\w]*)\." class TokenMaskingFilter(logging.Filter): From 053221bf99e5dd47e0c8dd0cb4d21b84801acb5b Mon Sep 17 00:00:00 2001 From: Etienne Stalmans Date: Wed, 2 Oct 2024 13:05:31 +0200 Subject: [PATCH 4/4] extend filtering to cover tuple and dict arguments for logging --- realtime/_async/client.py | 2 +- realtime/logging_util.py | 27 +++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/realtime/_async/client.py b/realtime/_async/client.py index 478997a..88bbc76 100644 --- a/realtime/_async/client.py +++ b/realtime/_async/client.py @@ -125,7 +125,7 @@ async def connect(self) -> None: while retries < self.max_retries: try: - self.ws_connection = await websockets.connect(self.url) + self.ws_connection = await websockets.connect(self.url, logger=logger) if self.ws_connection.open: logger.info("Connection was successful") return await self._on_connect() diff --git a/realtime/logging_util.py b/realtime/logging_util.py index 5f50087..cf04c30 100644 --- a/realtime/logging_util.py +++ b/realtime/logging_util.py @@ -1,3 +1,4 @@ +import copy import logging import re @@ -8,17 +9,35 @@ redact = r"(eyJh[-_\w]*\.)([-_\w]*)\." +def gred(g): + """Redact the payload of the JWT, keeping the header and signature""" + return f"{g.group(1)}REDACTED." if len(g.groups()) > 1 else g + + class TokenMaskingFilter(logging.Filter): """Mask access_tokens in logs""" def filter(self, record): record.msg = self.sanitize_line(record.msg) + record.args = self.sanitize_args(record.args) return True @staticmethod - def sanitize_line(line): - def gred(g): - """Redact the payload of the JWT, keeping the header and signature""" - return f"{g.group(1)}REDACTED." if len(g.groups()) > 1 else g + def sanitize_args(d): + if isinstance(d, dict): + d = d.copy() # so we don't overwrite anything + for k, v in d.items(): + d[k] = self.sanitize_line(v) + elif isinstance(d, tuple): + # need a deepcopy of tuple turned to a list, as to not change the original values + # otherwise we end up changing the items at the original memory location of the passed in tuple + y = copy.deepcopy(list(d)) + for x, value in enumerate(y): + if isinstance(value, str): + y[x] = re.sub(redact, gred, value) + return tuple(y) # convert the list back to a tuple + return d + @staticmethod + def sanitize_line(line): return re.sub(redact, gred, line)