From 5cc69c2d086d8ae047048d7fb2b085e17b5e3f9b Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Wed, 6 Sep 2023 14:20:49 +0100 Subject: [PATCH] Replace deprecated Py_UNICODE APIs to support Python 3.12 Fixes #18 This is the minimum to let it build on Python 3.12, but I am not sure if it's the most efficient way. --- .github/workflows/wheels.yml | 5 +- src/openstep_plist/_test.pyx | 51 ++++++------- src/openstep_plist/parser.pxd | 12 +-- src/openstep_plist/parser.pyx | 124 +++++++++++++++++------------- src/openstep_plist/util.pxd | 5 +- src/openstep_plist/util.pyx | 5 +- src/openstep_plist/writer.pxd | 2 +- src/openstep_plist/writer.pyx | 138 +++++++++++++++++++--------------- tests/test_writer.py | 4 +- 9 files changed, 184 insertions(+), 162 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e1d1fd9..cb33817 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -22,11 +22,8 @@ jobs: # Skip # # * Python 3.6 on all platforms, - # * Python 3.12 on all platforms, and # * PyPy on Windows. - # - # TODO: Activate Python 3.12 when issue 18 is resolved. - CIBW_SKIP: cp36-* cp312-* pp*-win_amd64 + CIBW_SKIP: cp36-* pp*-win_amd64 steps: - uses: actions/checkout@v2 with: diff --git a/src/openstep_plist/_test.pyx b/src/openstep_plist/_test.pyx index 16cfef5..f07ea6d 100644 --- a/src/openstep_plist/_test.pyx +++ b/src/openstep_plist/_test.pyx @@ -10,13 +10,13 @@ from .parser cimport ( parse_plist_string as _parse_plist_string, ) from .util cimport ( - PY_NARROW_UNICODE, tounicode, is_valid_unquoted_string_char as _is_valid_unquoted_string_char, ) from .writer cimport string_needs_quotes as _string_needs_quotes +from cpython.mem cimport PyMem_Free from cpython.unicode cimport ( - PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE, + PyUnicode_AsUCS4Copy, PyUnicode_GET_LENGTH, ) @@ -24,65 +24,64 @@ cdef class ParseContext: cdef unicode s cdef ParseInfo pi + cdef Py_UCS4 *buf cdef object dict_type - @classmethod - def fromstring( - ParseContext cls, - string, - Py_ssize_t offset=0, - dict_type=dict, - bint use_numbers=False + def __cinit__( + self, + string, + Py_ssize_t offset=0, + dict_type=dict, + bint use_numbers=False ): - cdef ParseContext self = ParseContext.__new__(cls) self.s = tounicode(string) - cdef Py_ssize_t length = PyUnicode_GET_SIZE(self.s) - cdef Py_UNICODE* buf = PyUnicode_AS_UNICODE(self.s) + cdef Py_ssize_t length = PyUnicode_GET_LENGTH(self.s) + self.buf = PyUnicode_AsUCS4Copy(self.s) + if not self.buf: + raise MemoryError() self.dict_type = dict_type self.pi = ParseInfo( - begin=buf, - curr=buf + offset, - end=buf + length, + begin=self.buf, + curr=self.buf + offset, + end=self.buf + length, dict_type=dict_type, use_numbers=use_numbers, ) - return self + def __dealloc__(self): + PyMem_Free(self.buf) -def is_narrow_unicode(): - return PY_NARROW_UNICODE - -def is_valid_unquoted_string_char(Py_UNICODE c): +def is_valid_unquoted_string_char(Py_UCS4 c): return _is_valid_unquoted_string_char(c) def line_number_strings(s, offset=0): - cdef ParseContext ctx = ParseContext.fromstring(s, offset) + cdef ParseContext ctx = ParseContext(s, offset) return _line_number_strings(&ctx.pi) def advance_to_non_space(s, offset=0): - cdef ParseContext ctx = ParseContext.fromstring(s, offset) + cdef ParseContext ctx = ParseContext(s, offset) eof = not _advance_to_non_space(&ctx.pi) return None if eof else s[ctx.pi.curr - ctx.pi.begin] def get_slashed_char(s, offset=0): - cdef ParseContext ctx = ParseContext.fromstring(s, offset) + cdef ParseContext ctx = ParseContext(s, offset) return _get_slashed_char(&ctx.pi) def parse_unquoted_plist_string(s): - cdef ParseContext ctx = ParseContext.fromstring(s) + cdef ParseContext ctx = ParseContext(s) return _parse_unquoted_plist_string(&ctx.pi) def parse_plist_string(s, required=True): - cdef ParseContext ctx = ParseContext.fromstring(s) + cdef ParseContext ctx = ParseContext(s) return _parse_plist_string(&ctx.pi, required=required) def string_needs_quotes(s): - cdef ParseContext ctx = ParseContext.fromstring(s) + cdef ParseContext ctx = ParseContext(s) return _string_needs_quotes(ctx.pi.begin, len(s)) diff --git a/src/openstep_plist/parser.pxd b/src/openstep_plist/parser.pxd index ec173d0..5b5b6de 100644 --- a/src/openstep_plist/parser.pxd +++ b/src/openstep_plist/parser.pxd @@ -5,9 +5,9 @@ from libcpp.vector cimport vector ctypedef struct ParseInfo: - const Py_UNICODE *begin - const Py_UNICODE *curr - const Py_UNICODE *end + const Py_UCS4 *begin + const Py_UCS4 *curr + const Py_UCS4 *end void *dict_type bint use_numbers @@ -22,10 +22,10 @@ cdef uint32_t line_number_strings(ParseInfo *pi) cdef bint advance_to_non_space(ParseInfo *pi) -cdef Py_UNICODE get_slashed_char(ParseInfo *pi) +cdef Py_UCS4 get_slashed_char(ParseInfo *pi) -cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote) +cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UCS4 quote) cdef enum UnquotedType: @@ -34,7 +34,7 @@ cdef enum UnquotedType: UNQUOTED_FLOAT = 2 -cdef UnquotedType get_unquoted_string_type(const Py_UNICODE *buf, Py_ssize_t length) +cdef UnquotedType get_unquoted_string_type(const Py_UCS4 *buf, Py_ssize_t length) cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=*) diff --git a/src/openstep_plist/parser.pyx b/src/openstep_plist/parser.pyx index 0770006..f02d7d2 100644 --- a/src/openstep_plist/parser.pyx +++ b/src/openstep_plist/parser.pyx @@ -2,8 +2,10 @@ #distutils: define_macros=CYTHON_TRACE_NOGIL=1 from cpython.bytes cimport PyBytes_FromStringAndSize +from cpython.mem cimport PyMem_Free from cpython.unicode cimport ( - PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE, + PyUnicode_4BYTE_KIND, PyUnicode_FromKindAndData, PyUnicode_AsUCS4Copy, + PyUnicode_GET_LENGTH, ) from libc.stdint cimport uint8_t, uint16_t, uint32_t from libcpp.algorithm cimport copy @@ -18,7 +20,6 @@ from .util cimport ( is_valid_unquoted_string_char, isdigit, isxdigit, - PY_NARROW_UNICODE, is_high_surrogate, is_low_surrogate, unicode_scalar_from_surrogates, @@ -27,7 +28,7 @@ from .util cimport ( cdef uint32_t line_number_strings(ParseInfo *pi): # warning: doesn't have a good idea of Unicode line separators - cdef const Py_UNICODE *p = pi.begin + cdef const Py_UCS4 *p = pi.begin cdef uint32_t count = 1 while p < pi.curr: if p[0] == c'\r': @@ -44,7 +45,7 @@ cdef bint advance_to_non_space(ParseInfo *pi): """Returns true if the advance found something that's not whitespace before the end of the buffer, false otherwise. """ - cdef Py_UNICODE ch2, ch3 + cdef Py_UCS4 ch2, ch3 while pi.curr < pi.end: ch2 = pi.curr[0] pi.curr += 1 @@ -109,8 +110,8 @@ cdef unsigned short* NEXT_STEP_DECODING_TABLE = [ @cython.boundscheck(False) @cython.wraparound(False) -cdef Py_UNICODE get_slashed_char(ParseInfo *pi): - cdef Py_UNICODE result +cdef Py_UCS4 get_slashed_char(ParseInfo *pi): + cdef Py_UCS4 result cdef uint8_t num cdef unsigned int codepoint, num_digits cdef unsigned long unum @@ -180,12 +181,12 @@ cdef Py_UNICODE get_slashed_char(ParseInfo *pi): return ch -cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote): - cdef vector[Py_UNICODE] string - cdef const Py_UNICODE *start_mark = pi.curr - cdef const Py_UNICODE *mark = pi.curr - cdef const Py_UNICODE *tmp - cdef Py_UNICODE ch, ch2 +cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UCS4 quote): + cdef vector[Py_UCS4] string + cdef const Py_UCS4 *start_mark = pi.curr + cdef const Py_UCS4 *mark = pi.curr + cdef const Py_UCS4 *tmp + cdef Py_UCS4 ch, ch2 while pi.curr < pi.end: ch = pi.curr[0] if ch == quote: @@ -201,7 +202,7 @@ cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote): # If we are on a "narrow" build, then the two code units already # represent a single codepoint internally. if ( - not PY_NARROW_UNICODE and is_high_surrogate(ch) + is_high_surrogate(ch) and pi.curr < pi.end and pi.curr[0] == c"\\" ): tmp = pi.curr @@ -228,7 +229,11 @@ cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote): # Advance past the quote character before returning pi.curr += 1 - return PyUnicode_FromUnicode(string.const_data(), string.size()) + return PyUnicode_FromKindAndData( + PyUnicode_4BYTE_KIND, + string.const_data(), + string.size() + ) def string_to_number(unicode s not None, bint required=True): @@ -236,17 +241,22 @@ def string_to_number(unicode s not None, bint required=True): Raises ValueError if the string is not a number. """ cdef: - Py_UNICODE c - Py_UNICODE* buf - Py_ssize_t length = PyUnicode_GET_SIZE(s) + Py_UCS4 c + Py_UCS4* buf + Py_ssize_t length = PyUnicode_GET_LENGTH(s) if length: - buf = PyUnicode_AS_UNICODE(s) - kind = get_unquoted_string_type(buf, length) - if kind == UNQUOTED_FLOAT: - return float(s) - elif kind == UNQUOTED_INTEGER: - return int(s) + buf = PyUnicode_AsUCS4Copy(s) + if not buf: + raise MemoryError() + try: + kind = get_unquoted_string_type(buf, length) + if kind == UNQUOTED_FLOAT: + return float(s) + elif kind == UNQUOTED_INTEGER: + return int(s) + finally: + PyMem_Free(buf) if required: raise ValueError(f"Could not convert string to float or int: {s!r}") @@ -255,9 +265,9 @@ def string_to_number(unicode s not None, bint required=True): cdef UnquotedType get_unquoted_string_type( - const Py_UNICODE *buf, Py_ssize_t length + const Py_UCS4 *buf, Py_ssize_t length ): - """Check if Py_UNICODE array starts with a digit, or '-' followed + """Check if Py_UCS4 array starts with a digit, or '-' followed by a digit, and if it contains a decimal point '.'. Return 0 if string cannot contain a number, 1 if it contains an integer, and 2 if it contains a float. @@ -268,8 +278,8 @@ cdef UnquotedType get_unquoted_string_type( bint maybe_number = True bint is_float = False int i = 0 - # deref here is safe since Py_UNICODE* are NULL-terminated - Py_UNICODE ch = buf[i] + # deref here is safe since Py_UCS4* are NULL-terminated + Py_UCS4 ch = buf[i] if ch == c'-': if length > 1: @@ -301,8 +311,8 @@ cdef UnquotedType get_unquoted_string_type( cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=False): cdef: - const Py_UNICODE *mark = pi.curr - Py_UNICODE ch + const Py_UCS4 *mark = pi.curr + Py_UCS4 ch Py_ssize_t length, i unicode s UnquotedType kind @@ -315,7 +325,7 @@ cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=False) break if pi.curr != mark: length = pi.curr - mark - s = PyUnicode_FromUnicode(mark, length) + s = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, mark, length) if not ensure_string and pi.use_numbers: kind = get_unquoted_string_type(mark, length) @@ -330,7 +340,7 @@ cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=False) cdef unicode parse_plist_string(ParseInfo *pi, bint required=True): - cdef Py_UNICODE ch + cdef Py_UCS4 ch if not advance_to_non_space(pi): if required: raise ParseError("Unexpected EOF while parsing string") @@ -437,7 +447,7 @@ cdef inline unsigned char from_hex_digit(unsigned char ch): cdef int get_data_bytes(ParseInfo *pi, vector[unsigned char]& result) except -1: cdef unsigned char first, second cdef int num_bytes_read = 0 - cdef Py_UNICODE ch1, ch2 + cdef Py_UCS4 ch1, ch2 while pi.curr < pi.end: ch1 = pi.curr[0] if ch1 == c'>': @@ -495,7 +505,7 @@ cdef bytes parse_plist_data(ParseInfo *pi): cdef object parse_plist_object(ParseInfo *pi, bint required=True): - cdef Py_UNICODE ch + cdef Py_UCS4 ch if not advance_to_non_space(pi): if required: raise ParseError("Unexpected EOF while parsing plist") @@ -523,8 +533,12 @@ cdef object parse_plist_object(ParseInfo *pi, bint required=True): def loads(string, dict_type=dict, bint use_numbers=False): cdef unicode s = tounicode(string) - cdef Py_ssize_t length = PyUnicode_GET_SIZE(s) - cdef Py_UNICODE* buf = PyUnicode_AS_UNICODE(s) + cdef Py_ssize_t length = PyUnicode_GET_LENGTH(s) + cdef const Py_UCS4 *begin + cdef object result = None + cdef Py_UCS4* buf = PyUnicode_AsUCS4Copy(s) + if not buf: + raise MemoryError() cdef ParseInfo pi = ParseInfo( begin=buf, @@ -534,25 +548,27 @@ def loads(string, dict_type=dict, bint use_numbers=False): use_numbers=use_numbers, ) - cdef const Py_UNICODE *begin = pi.curr - cdef object result = None - if not advance_to_non_space(&pi): - # a file consisting of only whitespace or empty is defined as an - # empty dictionary - result = {} - else: - result = parse_plist_object(&pi, required=True) - if result: - if advance_to_non_space(&pi): - if not isinstance(result, unicode): - raise ParseError( - "Junk after plist at line %d" % line_number_strings(&pi) - ) - else: - # keep parsing for a 'strings resource' file: it looks like - # a dictionary without the opening/closing curly braces - pi.curr = begin - result = parse_plist_dict_content(&pi) + try: + begin = pi.curr + if not advance_to_non_space(&pi): + # a file consisting of only whitespace or empty is defined as an + # empty dictionary + result = {} + else: + result = parse_plist_object(&pi, required=True) + if result: + if advance_to_non_space(&pi): + if not isinstance(result, unicode): + raise ParseError( + "Junk after plist at line %d" % line_number_strings(&pi) + ) + else: + # keep parsing for a 'strings resource' file: it looks like + # a dictionary without the opening/closing curly braces + pi.curr = begin + result = parse_plist_dict_content(&pi) + finally: + PyMem_Free(buf) return result diff --git a/src/openstep_plist/util.pxd b/src/openstep_plist/util.pxd index da99ac1..ac5437a 100644 --- a/src/openstep_plist/util.pxd +++ b/src/openstep_plist/util.pxd @@ -15,10 +15,7 @@ cdef unicode tounicode(s, encoding=*, errors=*) cdef tostr(s, encoding=*, errors=*) -cdef bint is_valid_unquoted_string_char(Py_UNICODE x) - - -cdef bint PY_NARROW_UNICODE +cdef bint is_valid_unquoted_string_char(Py_UCS4 x) cdef bint is_high_surrogate(uint32_t ch) diff --git a/src/openstep_plist/util.pyx b/src/openstep_plist/util.pyx index dd96ceb..24a67b1 100644 --- a/src/openstep_plist/util.pyx +++ b/src/openstep_plist/util.pyx @@ -26,7 +26,7 @@ cdef inline object tostr(s, encoding="ascii", errors="strict"): raise TypeError(f"Could not convert to str: {s!r}") -cdef inline bint is_valid_unquoted_string_char(Py_UNICODE x): +cdef inline bint is_valid_unquoted_string_char(Py_UCS4 x): return ( (x >= c'a' and x <= c'z') or (x >= c'A' and x <= c'Z') or @@ -40,9 +40,6 @@ cdef inline bint is_valid_unquoted_string_char(Py_UNICODE x): ) -cdef bint PY_NARROW_UNICODE = sizeof(Py_UNICODE) != 4 - - cdef inline bint is_high_surrogate(uint32_t ch): return ch >= 0xD800 and ch <= 0xDBFF diff --git a/src/openstep_plist/writer.pxd b/src/openstep_plist/writer.pxd index 5f782bd..ff26078 100644 --- a/src/openstep_plist/writer.pxd +++ b/src/openstep_plist/writer.pxd @@ -1,4 +1,4 @@ #cython: language_level=3 -cdef bint string_needs_quotes(const Py_UNICODE *a, Py_ssize_t length) +cdef bint string_needs_quotes(const Py_UCS4 *a, Py_ssize_t length) diff --git a/src/openstep_plist/writer.pyx b/src/openstep_plist/writer.pyx index 33e3e22..bc5f071 100644 --- a/src/openstep_plist/writer.pyx +++ b/src/openstep_plist/writer.pyx @@ -3,14 +3,15 @@ from collections import OrderedDict from cpython.unicode cimport ( - PyUnicode_FromUnicode, - PyUnicode_AS_UNICODE, - PyUnicode_AS_DATA, - PyUnicode_GET_SIZE, PyUnicode_AsUTF8String, + PyUnicode_4BYTE_KIND, + PyUnicode_FromKindAndData, + PyUnicode_AsUCS4Copy, + PyUnicode_GET_LENGTH, ) from cpython.bytes cimport PyBytes_GET_SIZE from cpython.object cimport Py_SIZE +from cpython.mem cimport PyMem_Free from libcpp.vector cimport vector from libc.stdint cimport uint16_t cimport cython @@ -19,20 +20,19 @@ from .util cimport ( tounicode, isdigit, isprint, - PY_NARROW_UNICODE, high_surrogate_from_unicode_scalar, low_surrogate_from_unicode_scalar, ) -cdef Py_UNICODE *HEX_MAP = [ +cdef Py_UCS4 *HEX_MAP = [ c'0', c'1', c'2', c'3', c'4', c'5', c'6', c'7', c'8', c'9', c'A', c'B', c'C', c'D', c'E', c'F', ] -cdef Py_UNICODE *ARRAY_SEP_NO_INDENT = [c',', c' '] -cdef Py_UNICODE *DICT_KEY_VALUE_SEP = [c' ', c'=', c' '] -cdef Py_UNICODE *DICT_ITEM_SEP_NO_INDENT = [c';', c' '] +cdef Py_UCS4 *ARRAY_SEP_NO_INDENT = [c',', c' '] +cdef Py_UCS4 *DICT_KEY_VALUE_SEP = [c' ', c'=', c' '] +cdef Py_UCS4 *DICT_ITEM_SEP_NO_INDENT = [c';', c' '] # this table includes A-Z, a-z, 0-9, '.', '_' and '$' @@ -56,14 +56,14 @@ cdef bint *VALID_UNQUOTED_CHARS = [ ] -cdef bint string_needs_quotes(const Py_UNICODE *a, Py_ssize_t length): +cdef bint string_needs_quotes(const Py_UCS4 *a, Py_ssize_t length): # empty string is always quoted if length == 0: return True cdef: Py_ssize_t i - Py_UNICODE ch + Py_UCS4 ch bint is_number = True bint seen_period = False @@ -91,8 +91,8 @@ cdef bint string_needs_quotes(const Py_UNICODE *a, Py_ssize_t length): return is_number -cdef inline void escape_unicode(uint16_t ch, Py_UNICODE *dest): - # caller must ensure 'dest' has rooms for 6 more Py_UNICODE +cdef inline void escape_unicode(uint16_t ch, Py_UCS4 *dest): + # caller must ensure 'dest' has rooms for 6 more Py_UCS4 dest[0] = c'\\' dest[1] = c'U' dest[5] = (ch & 15) + 55 if (ch & 15) > 9 else (ch & 15) + 48 @@ -107,7 +107,7 @@ cdef inline void escape_unicode(uint16_t ch, Py_UNICODE *dest): @cython.final cdef class Writer: - cdef vector[Py_UNICODE] *dest + cdef vector[Py_UCS4] *dest cdef bint unicode_escape cdef int float_precision cdef unicode indent @@ -121,7 +121,7 @@ cdef class Writer: indent=None, bint single_line_tuples=False, ): - self.dest = new vector[Py_UNICODE]() + self.dest = new vector[Py_UCS4]() self.unicode_escape = unicode_escape self.float_precision = float_precision @@ -158,15 +158,17 @@ cdef class Writer: return self.write_object(obj) cdef inline Py_ssize_t extend_buffer( - self, const Py_UNICODE *s, Py_ssize_t length - ) except +: + self, const Py_UCS4 *s, Py_ssize_t length + ) except -1: self.dest.reserve(self.dest.size() + length) self.dest.insert(self.dest.end(), s, s + length) return length cdef inline unicode _getvalue(self): - return PyUnicode_FromUnicode( - self.dest.const_data(), self.dest.size() + return PyUnicode_FromKindAndData( + PyUnicode_4BYTE_KIND, + self.dest.const_data(), + self.dest.size() ) cdef Py_ssize_t write_object(self, object obj) except -1: @@ -197,15 +199,15 @@ cdef class Writer: ) cdef Py_ssize_t write_quoted_string( - self, const Py_UNICODE *s, Py_ssize_t length + self, const Py_UCS4 *s, Py_ssize_t length ) except -1: cdef: - vector[Py_UNICODE] *dest = self.dest + vector[Py_UCS4] *dest = self.dest bint unicode_escape = self.unicode_escape - const Py_UNICODE *curr = s - const Py_UNICODE *end = &s[length] - Py_UNICODE *ptr + const Py_UCS4 *curr = s + const Py_UCS4 *end = &s[length] + Py_UCS4 *ptr unsigned long ch Py_ssize_t base_length = dest.size() Py_ssize_t new_length = 0 @@ -226,7 +228,7 @@ cdef class Writer: else: new_length += 4 elif unicode_escape: - if ch > 0xFFFF and not PY_NARROW_UNICODE: + if ch > 0xFFFF: new_length += 12 else: new_length += 6 @@ -235,7 +237,7 @@ cdef class Writer: curr += 1 dest.resize(base_length + new_length + 2) - ptr = dest.data() + base_length + ptr = dest.data() + base_length ptr[0] = '"' ptr += 1 @@ -276,7 +278,7 @@ cdef class Writer: ptr[0] = (ch & 7) + c'0' ptr += 3 elif unicode_escape: - if ch > 0xFFFF and not PY_NARROW_UNICODE: + if ch > 0xFFFF: escape_unicode(high_surrogate_from_unicode_scalar(ch), ptr) ptr += 6 escape_unicode(low_surrogate_from_unicode_scalar(ch), ptr) @@ -296,46 +298,60 @@ cdef class Writer: cdef inline Py_ssize_t write_unquoted_string(self, unicode string) except -1: cdef: - const Py_UNICODE *s = PyUnicode_AS_UNICODE(string) - Py_ssize_t length = PyUnicode_GET_SIZE(string) - - return self.extend_buffer(s, length) + Py_ssize_t length = PyUnicode_GET_LENGTH(string) + Py_UCS4 *s = PyUnicode_AsUCS4Copy(string) + if not s: + raise MemoryError() + try: + return self.extend_buffer(s, length) + finally: + PyMem_Free(s) cdef Py_ssize_t write_string(self, unicode string) except -1: cdef: - const Py_UNICODE *s = PyUnicode_AS_UNICODE(string) - Py_ssize_t length = PyUnicode_GET_SIZE(string) - - if string_needs_quotes(s, length): - return self.write_quoted_string(s, length) - else: - return self.extend_buffer(s, length) + Py_ssize_t length = PyUnicode_GET_LENGTH(string) + Py_UCS4 *s = PyUnicode_AsUCS4Copy(string) + if not s: + raise MemoryError() + try: + if string_needs_quotes(s, length): + return self.write_quoted_string(s, length) + else: + return self.extend_buffer(s, length) + finally: + PyMem_Free(s) cdef Py_ssize_t write_short_float_repr(self, object py_float) except -1: cdef: unicode string = f"{py_float:.{self.float_precision}f}" - const Py_UNICODE *s = PyUnicode_AS_UNICODE(string) - Py_ssize_t length = PyUnicode_GET_SIZE(string) - Py_UNICODE ch - - # read digits backwards, skipping all the '0's until either a - # non-'0' or '.' is found - while length > 0: - ch = s[length-1] - if ch == c'.': - length -= 1 # skip the trailing dot - break - elif ch != c'0': - break - length -= 1 - - return self.extend_buffer(s, length) + Py_ssize_t length = PyUnicode_GET_LENGTH(string) + Py_UCS4 ch + Py_UCS4 *s = PyUnicode_AsUCS4Copy(string) + + if not s: + raise MemoryError() + + try: + # read digits backwards, skipping all the '0's until either a + # non-'0' or '.' is found + while length > 0: + ch = s[length-1] + if ch == c'.': + length -= 1 # skip the trailing dot + break + elif ch != c'0': + break + length -= 1 + + return self.extend_buffer(s, length) + finally: + PyMem_Free(s) cdef Py_ssize_t write_data(self, bytes data) except -1: cdef: - vector[Py_UNICODE] *dest = self.dest + vector[Py_UCS4] *dest = self.dest const unsigned char *src = data - Py_UNICODE *ptr + Py_UCS4 *ptr Py_ssize_t length = PyBytes_GET_SIZE(data) Py_ssize_t extra_length, i, j @@ -346,7 +362,7 @@ cdef class Writer: j = dest.size() dest.resize(j + extra_length) - ptr = dest.data() + ptr = dest.data() ptr[j] = c'<' j += 1 @@ -375,7 +391,7 @@ cdef class Writer: Py_ssize_t last Py_ssize_t count Py_ssize_t i - vector[Py_UNICODE] *dest = self.dest + vector[Py_UCS4] *dest = self.dest unicode indent, newline_indent = "" if length == 0: @@ -420,7 +436,7 @@ cdef class Writer: Py_ssize_t last Py_ssize_t count Py_ssize_t i - vector[Py_UNICODE] *dest = self.dest + vector[Py_UCS4] *dest = self.dest unicode indent, newline_indent = "" if length == 0: @@ -464,7 +480,7 @@ cdef class Writer: cdef: unicode indent unicode newline_indent = "" - vector[Py_UNICODE] *dest = self.dest + vector[Py_UCS4] *dest = self.dest Py_ssize_t last, count, i if not d: @@ -519,7 +535,7 @@ cdef class Writer: cdef: unicode indent unicode newline_indent = "" - vector[Py_UNICODE] *dest = self.dest + vector[Py_UCS4] *dest = self.dest Py_ssize_t last, count, i if not d: diff --git a/tests/test_writer.py b/tests/test_writer.py index 77deabc..b00a919 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2,7 +2,7 @@ from __future__ import absolute_import, unicode_literals import openstep_plist from openstep_plist.writer import Writer -from openstep_plist._test import is_narrow_unicode, string_needs_quotes +from openstep_plist._test import string_needs_quotes from io import StringIO, BytesIO from collections import OrderedDict import string @@ -64,7 +64,7 @@ def test_quoted_string_no_unicode_escape(self): assert w.getvalue() == '"\u0410"' w = Writer(unicode_escape=False) - assert w.write("\U0001F4A9") == (4 if is_narrow_unicode() else 3) + assert w.write("\U0001F4A9") == 3 assert w.getvalue() == '"\U0001F4A9"' @pytest.mark.parametrize(