diff --git a/sopel/irc/utils.py b/sopel/irc/utils.py index 12ba09461..a812f9687 100644 --- a/sopel/irc/utils.py +++ b/sopel/irc/utils.py @@ -17,27 +17,32 @@ from sopel.lifecycle import deprecated -def safe(string): - """Remove newlines from a string. +def safe(string: str) -> str: + """Remove disallowed bytes from a string, and ensure Unicode. - :param str string: input text to process - :return: the string without newlines - :rtype: str + :param string: input text to process + :return: the string as Unicode without characters prohibited in IRC messages :raises TypeError: when ``string`` is ``None`` - This function removes newlines from a string and always returns a unicode - string (``str``), but doesn't strip or alter it in any other way:: + This function removes newlines and null bytes from a string. It will always + return a Unicode ``str``, even if given non-Unicode input, but doesn't strip + or alter the string in any other way:: - >>> safe('some text\\r\\n') + >>> safe('some \\x00text\\r\\n') 'some text' - This is useful to ensure a string can be used in a IRC message. + This is useful to ensure a string can be used in a IRC message. Parameters + can **never** contain NUL, CR, or LF octets, per :rfc:`2812#section-2.3.1`. .. versionchanged:: 7.1 This function now raises a :exc:`TypeError` instead of an unpredictable behaviour when given ``None``. + .. versionchanged:: 8.0.1 + + Also remove NUL (``\\x00``) in addition to CR/LF. + """ if string is None: raise TypeError('safe function requires a string, not NoneType') @@ -45,6 +50,7 @@ def safe(string): string = string.decode("utf8") string = string.replace('\n', '') string = string.replace('\r', '') + string = string.replace('\x00', '') return string diff --git a/test/irc/test_irc_utils.py b/test/irc/test_irc_utils.py index 94eab6298..47f2e3d9a 100644 --- a/test/irc/test_irc_utils.py +++ b/test/irc/test_irc_utils.py @@ -1,22 +1,28 @@ """Tests for core ``sopel.irc.utils``""" from __future__ import annotations +from itertools import permutations + import pytest from sopel.irc import utils -def test_safe(): +@pytest.mark.parametrize('s1, s2, s3', permutations(('\n', '\r', '\x00'))) +def test_safe(s1, s2, s3): text = 'some text' - assert utils.safe(text + '\r\n') == text - assert utils.safe(text + '\n') == text - assert utils.safe(text + '\r') == text - assert utils.safe('\r\n' + text) == text - assert utils.safe('\n' + text) == text - assert utils.safe('\r' + text) == text - assert utils.safe('some \r\ntext') == text - assert utils.safe('some \ntext') == text - assert utils.safe('some \rtext') == text + seq = ''.join((s1, s2, s3)) + + assert utils.safe(text + seq) == text + assert utils.safe(seq + text) == text + assert utils.safe('some ' + seq + 'text') == text + assert utils.safe( + s1 + + 'some ' + + s2 + + 'text' + + s3 + ) == text def test_safe_empty(): @@ -24,20 +30,23 @@ def test_safe_empty(): assert utils.safe(text) == text -def test_safe_null(): +def test_safe_none(): with pytest.raises(TypeError): utils.safe(None) -def test_safe_bytes(): +@pytest.mark.parametrize('b1, b2, b3', permutations((b'\n', b'\r', b'\x00'))) +def test_safe_bytes(b1, b2, b3): text = b'some text' - assert utils.safe(text) == text.decode('utf-8') - assert utils.safe(text + b'\r\n') == text.decode('utf-8') - assert utils.safe(text + b'\n') == text.decode('utf-8') - assert utils.safe(text + b'\r') == text.decode('utf-8') - assert utils.safe(b'\r\n' + text) == text.decode('utf-8') - assert utils.safe(b'\n' + text) == text.decode('utf-8') - assert utils.safe(b'\r' + text) == text.decode('utf-8') - assert utils.safe(b'some \r\ntext') == text.decode('utf-8') - assert utils.safe(b'some \ntext') == text.decode('utf-8') - assert utils.safe(b'some \rtext') == text.decode('utf-8') + seq = b''.join((b1, b2, b3)) + + assert utils.safe(text + seq) == text.decode('utf-8') + assert utils.safe(seq + text) == text.decode('utf-8') + assert utils.safe(b'some ' + seq + b'text') == text.decode('utf-8') + assert utils.safe( + b1 + + b'some ' + + b2 + + b'text' + + b3 + ) == text.decode('utf-8')