From 5cc69c2d086d8ae047048d7fb2b085e17b5e3f9b Mon Sep 17 00:00:00 2001
From: Cosimo Lupo <clupo@google.com>
Date: Wed, 6 Sep 2023 14:20:49 +0100
Subject: [PATCH] Replace deprecated Py_UNICODE APIs to support Python 3.12

Fixes #18

This is the minimum to let it build on Python 3.12, but I am not sure if it's the most efficient way.
---
 .github/workflows/wheels.yml  |   5 +-
 src/openstep_plist/_test.pyx  |  51 ++++++-------
 src/openstep_plist/parser.pxd |  12 +--
 src/openstep_plist/parser.pyx | 124 +++++++++++++++++-------------
 src/openstep_plist/util.pxd   |   5 +-
 src/openstep_plist/util.pyx   |   5 +-
 src/openstep_plist/writer.pxd |   2 +-
 src/openstep_plist/writer.pyx | 138 +++++++++++++++++++---------------
 tests/test_writer.py          |   4 +-
 9 files changed, 184 insertions(+), 162 deletions(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index e1d1fd9..cb33817 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -22,11 +22,8 @@ jobs:
       # Skip
       #
       # * Python 3.6 on all platforms,
-      # * Python 3.12 on all platforms, and
       # * PyPy on Windows.
-      #
-      # TODO: Activate Python 3.12 when issue 18 is resolved.
-      CIBW_SKIP: cp36-* cp312-* pp*-win_amd64
+      CIBW_SKIP: cp36-* pp*-win_amd64
     steps:
       - uses: actions/checkout@v2
         with:
diff --git a/src/openstep_plist/_test.pyx b/src/openstep_plist/_test.pyx
index 16cfef5..f07ea6d 100644
--- a/src/openstep_plist/_test.pyx
+++ b/src/openstep_plist/_test.pyx
@@ -10,13 +10,13 @@ from .parser cimport (
     parse_plist_string as _parse_plist_string,
 )
 from .util cimport (
-    PY_NARROW_UNICODE,
     tounicode,
     is_valid_unquoted_string_char as _is_valid_unquoted_string_char,
 )
 from .writer cimport string_needs_quotes as _string_needs_quotes
+from cpython.mem cimport PyMem_Free
 from cpython.unicode cimport (
-    PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE,
+    PyUnicode_AsUCS4Copy, PyUnicode_GET_LENGTH,
 )
 
 
@@ -24,65 +24,64 @@ cdef class ParseContext:
 
     cdef unicode s
     cdef ParseInfo pi
+    cdef Py_UCS4 *buf
     cdef object dict_type
 
-    @classmethod
-    def fromstring(
-            ParseContext cls,
-            string,
-            Py_ssize_t offset=0,
-            dict_type=dict,
-            bint use_numbers=False
+    def __cinit__(
+        self,
+        string,
+        Py_ssize_t offset=0,
+        dict_type=dict,
+        bint use_numbers=False
     ):
-        cdef ParseContext self = ParseContext.__new__(cls)
         self.s = tounicode(string)
-        cdef Py_ssize_t length = PyUnicode_GET_SIZE(self.s)
-        cdef Py_UNICODE* buf = PyUnicode_AS_UNICODE(self.s)
+        cdef Py_ssize_t length = PyUnicode_GET_LENGTH(self.s)
+        self.buf = PyUnicode_AsUCS4Copy(self.s)
+        if not self.buf:
+            raise MemoryError()
         self.dict_type = dict_type
         self.pi = ParseInfo(
-            begin=buf,
-            curr=buf + offset,
-            end=buf + length,
+            begin=self.buf,
+            curr=self.buf + offset,
+            end=self.buf + length,
             dict_type=<void*>dict_type,
             use_numbers=use_numbers,
         )
-        return self
 
+    def __dealloc__(self):
+        PyMem_Free(self.buf)
 
-def is_narrow_unicode():
-    return PY_NARROW_UNICODE
 
-
-def is_valid_unquoted_string_char(Py_UNICODE c):
+def is_valid_unquoted_string_char(Py_UCS4 c):
     return _is_valid_unquoted_string_char(c)
 
 
 def line_number_strings(s, offset=0):
-    cdef ParseContext ctx = ParseContext.fromstring(s, offset)
+    cdef ParseContext ctx = ParseContext(s, offset)
     return _line_number_strings(&ctx.pi)
 
 
 def advance_to_non_space(s, offset=0):
-    cdef ParseContext ctx = ParseContext.fromstring(s, offset)
+    cdef ParseContext ctx = ParseContext(s, offset)
     eof = not _advance_to_non_space(&ctx.pi)
     return None if eof else s[ctx.pi.curr - ctx.pi.begin]
 
 
 def get_slashed_char(s, offset=0):
-    cdef ParseContext ctx = ParseContext.fromstring(s, offset)
+    cdef ParseContext ctx = ParseContext(s, offset)
     return _get_slashed_char(&ctx.pi)
 
 
 def parse_unquoted_plist_string(s):
-    cdef ParseContext ctx = ParseContext.fromstring(s)
+    cdef ParseContext ctx = ParseContext(s)
     return _parse_unquoted_plist_string(&ctx.pi)
 
 
 def parse_plist_string(s, required=True):
-    cdef ParseContext ctx = ParseContext.fromstring(s)
+    cdef ParseContext ctx = ParseContext(s)
     return _parse_plist_string(&ctx.pi, required=required)
 
 
 def string_needs_quotes(s):
-    cdef ParseContext ctx = ParseContext.fromstring(s)
+    cdef ParseContext ctx = ParseContext(s)
     return _string_needs_quotes(ctx.pi.begin, len(s))
diff --git a/src/openstep_plist/parser.pxd b/src/openstep_plist/parser.pxd
index ec173d0..5b5b6de 100644
--- a/src/openstep_plist/parser.pxd
+++ b/src/openstep_plist/parser.pxd
@@ -5,9 +5,9 @@ from libcpp.vector cimport vector
 
 
 ctypedef struct ParseInfo:
-    const Py_UNICODE *begin
-    const Py_UNICODE *curr
-    const Py_UNICODE *end
+    const Py_UCS4 *begin
+    const Py_UCS4 *curr
+    const Py_UCS4 *end
     void *dict_type
     bint use_numbers
 
@@ -22,10 +22,10 @@ cdef uint32_t line_number_strings(ParseInfo *pi)
 cdef bint advance_to_non_space(ParseInfo *pi)
 
 
-cdef Py_UNICODE get_slashed_char(ParseInfo *pi)
+cdef Py_UCS4 get_slashed_char(ParseInfo *pi)
 
 
-cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote)
+cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UCS4 quote)
 
 
 cdef enum UnquotedType:
@@ -34,7 +34,7 @@ cdef enum UnquotedType:
     UNQUOTED_FLOAT = 2
 
 
-cdef UnquotedType get_unquoted_string_type(const Py_UNICODE *buf, Py_ssize_t length)
+cdef UnquotedType get_unquoted_string_type(const Py_UCS4 *buf, Py_ssize_t length)
 
 
 cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=*)
diff --git a/src/openstep_plist/parser.pyx b/src/openstep_plist/parser.pyx
index 0770006..f02d7d2 100644
--- a/src/openstep_plist/parser.pyx
+++ b/src/openstep_plist/parser.pyx
@@ -2,8 +2,10 @@
 #distutils: define_macros=CYTHON_TRACE_NOGIL=1
 
 from cpython.bytes cimport PyBytes_FromStringAndSize
+from cpython.mem cimport PyMem_Free
 from cpython.unicode cimport (
-    PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE,
+    PyUnicode_4BYTE_KIND, PyUnicode_FromKindAndData, PyUnicode_AsUCS4Copy,
+    PyUnicode_GET_LENGTH,
 )
 from libc.stdint cimport uint8_t, uint16_t, uint32_t
 from libcpp.algorithm cimport copy
@@ -18,7 +20,6 @@ from .util cimport (
     is_valid_unquoted_string_char,
     isdigit,
     isxdigit,
-    PY_NARROW_UNICODE,
     is_high_surrogate,
     is_low_surrogate,
     unicode_scalar_from_surrogates,
@@ -27,7 +28,7 @@ from .util cimport (
 
 cdef uint32_t line_number_strings(ParseInfo *pi):
     # warning: doesn't have a good idea of Unicode line separators
-    cdef const Py_UNICODE *p = pi.begin
+    cdef const Py_UCS4 *p = pi.begin
     cdef uint32_t count = 1
     while p < pi.curr:
         if p[0] == c'\r':
@@ -44,7 +45,7 @@ cdef bint advance_to_non_space(ParseInfo *pi):
     """Returns true if the advance found something that's not whitespace
     before the end of the buffer, false otherwise.
     """
-    cdef Py_UNICODE ch2, ch3
+    cdef Py_UCS4 ch2, ch3
     while pi.curr < pi.end:
         ch2 = pi.curr[0]
         pi.curr += 1
@@ -109,8 +110,8 @@ cdef unsigned short* NEXT_STEP_DECODING_TABLE = [
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef Py_UNICODE get_slashed_char(ParseInfo *pi):
-    cdef Py_UNICODE result
+cdef Py_UCS4 get_slashed_char(ParseInfo *pi):
+    cdef Py_UCS4 result
     cdef uint8_t num
     cdef unsigned int codepoint, num_digits
     cdef unsigned long unum
@@ -180,12 +181,12 @@ cdef Py_UNICODE get_slashed_char(ParseInfo *pi):
     return ch
 
 
-cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote):
-    cdef vector[Py_UNICODE] string
-    cdef const Py_UNICODE *start_mark = pi.curr
-    cdef const Py_UNICODE *mark = pi.curr
-    cdef const Py_UNICODE *tmp
-    cdef Py_UNICODE ch, ch2
+cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UCS4 quote):
+    cdef vector[Py_UCS4] string
+    cdef const Py_UCS4 *start_mark = pi.curr
+    cdef const Py_UCS4 *mark = pi.curr
+    cdef const Py_UCS4 *tmp
+    cdef Py_UCS4 ch, ch2
     while pi.curr < pi.end:
         ch = pi.curr[0]
         if ch == quote:
@@ -201,7 +202,7 @@ cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote):
             # If we are on a "narrow" build, then the two code units already
             # represent a single codepoint internally.
             if (
-                not PY_NARROW_UNICODE and is_high_surrogate(ch)
+                is_high_surrogate(ch)
                 and pi.curr < pi.end and pi.curr[0] == c"\\"
             ):
                 tmp = pi.curr
@@ -228,7 +229,11 @@ cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote):
     # Advance past the quote character before returning
     pi.curr += 1
 
-    return PyUnicode_FromUnicode(<const Py_UNICODE*>string.const_data(), string.size())
+    return PyUnicode_FromKindAndData(
+        PyUnicode_4BYTE_KIND,
+        string.const_data(),
+        string.size()
+    )
 
 
 def string_to_number(unicode s not None, bint required=True):
@@ -236,17 +241,22 @@ def string_to_number(unicode s not None, bint required=True):
     Raises ValueError if the string is not a number.
     """
     cdef:
-        Py_UNICODE c
-        Py_UNICODE* buf
-        Py_ssize_t length = PyUnicode_GET_SIZE(s)
+        Py_UCS4 c
+        Py_UCS4* buf
+        Py_ssize_t length = PyUnicode_GET_LENGTH(s)
 
     if length:
-        buf = PyUnicode_AS_UNICODE(s)
-        kind = get_unquoted_string_type(buf, length)
-        if kind == UNQUOTED_FLOAT:
-            return float(s)
-        elif kind == UNQUOTED_INTEGER:
-            return int(s)
+        buf = PyUnicode_AsUCS4Copy(s)
+        if not buf:
+            raise MemoryError()
+        try:
+            kind = get_unquoted_string_type(buf, length)
+            if kind == UNQUOTED_FLOAT:
+                return float(s)
+            elif kind == UNQUOTED_INTEGER:
+                return int(s)
+        finally:
+            PyMem_Free(buf)
 
     if required:
         raise ValueError(f"Could not convert string to float or int: {s!r}")
@@ -255,9 +265,9 @@ def string_to_number(unicode s not None, bint required=True):
 
 
 cdef UnquotedType get_unquoted_string_type(
-    const Py_UNICODE *buf, Py_ssize_t length
+    const Py_UCS4 *buf, Py_ssize_t length
 ):
-    """Check if Py_UNICODE array starts with a digit, or '-' followed
+    """Check if Py_UCS4 array starts with a digit, or '-' followed
     by a digit, and if it contains a decimal point '.'.
     Return 0 if string cannot contain a number, 1 if it contains an
     integer, and 2 if it contains a float.
@@ -268,8 +278,8 @@ cdef UnquotedType get_unquoted_string_type(
         bint maybe_number = True
         bint is_float = False
         int i = 0
-        # deref here is safe since Py_UNICODE* are NULL-terminated
-        Py_UNICODE ch = buf[i]
+        # deref here is safe since Py_UCS4* are NULL-terminated
+        Py_UCS4 ch = buf[i]
 
     if ch == c'-':
         if length > 1:
@@ -301,8 +311,8 @@ cdef UnquotedType get_unquoted_string_type(
 
 cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=False):
     cdef:
-        const Py_UNICODE *mark = pi.curr
-        Py_UNICODE ch
+        const Py_UCS4 *mark = pi.curr
+        Py_UCS4 ch
         Py_ssize_t length, i
         unicode s
         UnquotedType kind
@@ -315,7 +325,7 @@ cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=False)
             break
     if pi.curr != mark:
         length = pi.curr - mark
-        s = PyUnicode_FromUnicode(mark, length)
+        s = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, <const void *>mark, length)
 
         if not ensure_string and pi.use_numbers:
             kind = get_unquoted_string_type(mark, length)
@@ -330,7 +340,7 @@ cdef object parse_unquoted_plist_string(ParseInfo *pi, bint ensure_string=False)
 
 
 cdef unicode parse_plist_string(ParseInfo *pi, bint required=True):
-    cdef Py_UNICODE ch
+    cdef Py_UCS4 ch
     if not advance_to_non_space(pi):
         if required:
             raise ParseError("Unexpected EOF while parsing string")
@@ -437,7 +447,7 @@ cdef inline unsigned char from_hex_digit(unsigned char ch):
 cdef int get_data_bytes(ParseInfo *pi, vector[unsigned char]& result) except -1:
     cdef unsigned char first, second
     cdef int num_bytes_read = 0
-    cdef Py_UNICODE ch1, ch2
+    cdef Py_UCS4 ch1, ch2
     while pi.curr < pi.end:
         ch1 = pi.curr[0]
         if ch1 == c'>':
@@ -495,7 +505,7 @@ cdef bytes parse_plist_data(ParseInfo *pi):
 
 
 cdef object parse_plist_object(ParseInfo *pi, bint required=True):
-    cdef Py_UNICODE ch
+    cdef Py_UCS4 ch
     if not advance_to_non_space(pi):
         if required:
             raise ParseError("Unexpected EOF while parsing plist")
@@ -523,8 +533,12 @@ cdef object parse_plist_object(ParseInfo *pi, bint required=True):
 
 def loads(string, dict_type=dict, bint use_numbers=False):
     cdef unicode s = tounicode(string)
-    cdef Py_ssize_t length = PyUnicode_GET_SIZE(s)
-    cdef Py_UNICODE* buf = PyUnicode_AS_UNICODE(s)
+    cdef Py_ssize_t length = PyUnicode_GET_LENGTH(s)
+    cdef const Py_UCS4 *begin
+    cdef object result = None
+    cdef Py_UCS4* buf = PyUnicode_AsUCS4Copy(s)
+    if not buf:
+        raise MemoryError()
 
     cdef ParseInfo pi = ParseInfo(
         begin=buf,
@@ -534,25 +548,27 @@ def loads(string, dict_type=dict, bint use_numbers=False):
         use_numbers=use_numbers,
     )
 
-    cdef const Py_UNICODE *begin = pi.curr
-    cdef object result = None
-    if not advance_to_non_space(&pi):
-        # a file consisting of only whitespace or empty is defined as an
-        # empty dictionary
-        result = {}
-    else:
-        result = parse_plist_object(&pi, required=True)
-        if result:
-            if advance_to_non_space(&pi):
-                if not isinstance(result, unicode):
-                    raise ParseError(
-                        "Junk after plist at line %d" % line_number_strings(&pi)
-                    )
-                else:
-                    # keep parsing for a 'strings resource' file: it looks like
-                    # a dictionary without the opening/closing curly braces
-                    pi.curr = begin
-                    result = parse_plist_dict_content(&pi)
+    try:
+        begin = pi.curr
+        if not advance_to_non_space(&pi):
+            # a file consisting of only whitespace or empty is defined as an
+            # empty dictionary
+            result = {}
+        else:
+            result = parse_plist_object(&pi, required=True)
+            if result:
+                if advance_to_non_space(&pi):
+                    if not isinstance(result, unicode):
+                        raise ParseError(
+                            "Junk after plist at line %d" % line_number_strings(&pi)
+                        )
+                    else:
+                        # keep parsing for a 'strings resource' file: it looks like
+                        # a dictionary without the opening/closing curly braces
+                        pi.curr = begin
+                        result = parse_plist_dict_content(&pi)
+    finally:
+        PyMem_Free(buf)
 
     return result
 
diff --git a/src/openstep_plist/util.pxd b/src/openstep_plist/util.pxd
index da99ac1..ac5437a 100644
--- a/src/openstep_plist/util.pxd
+++ b/src/openstep_plist/util.pxd
@@ -15,10 +15,7 @@ cdef unicode tounicode(s, encoding=*, errors=*)
 cdef tostr(s, encoding=*, errors=*)
 
 
-cdef bint is_valid_unquoted_string_char(Py_UNICODE x)
-
-
-cdef bint PY_NARROW_UNICODE
+cdef bint is_valid_unquoted_string_char(Py_UCS4 x)
 
 
 cdef bint is_high_surrogate(uint32_t ch)
diff --git a/src/openstep_plist/util.pyx b/src/openstep_plist/util.pyx
index dd96ceb..24a67b1 100644
--- a/src/openstep_plist/util.pyx
+++ b/src/openstep_plist/util.pyx
@@ -26,7 +26,7 @@ cdef inline object tostr(s, encoding="ascii", errors="strict"):
         raise TypeError(f"Could not convert to str: {s!r}")
 
 
-cdef inline bint is_valid_unquoted_string_char(Py_UNICODE x):
+cdef inline bint is_valid_unquoted_string_char(Py_UCS4 x):
     return (
         (x >= c'a' and x <= c'z') or
         (x >= c'A' and x <= c'Z') or
@@ -40,9 +40,6 @@ cdef inline bint is_valid_unquoted_string_char(Py_UNICODE x):
     )
 
 
-cdef bint PY_NARROW_UNICODE = sizeof(Py_UNICODE) != 4
-
-
 cdef inline bint is_high_surrogate(uint32_t ch):
     return ch >= 0xD800 and ch <= 0xDBFF
 
diff --git a/src/openstep_plist/writer.pxd b/src/openstep_plist/writer.pxd
index 5f782bd..ff26078 100644
--- a/src/openstep_plist/writer.pxd
+++ b/src/openstep_plist/writer.pxd
@@ -1,4 +1,4 @@
 #cython: language_level=3
 
 
-cdef bint string_needs_quotes(const Py_UNICODE *a, Py_ssize_t length)
+cdef bint string_needs_quotes(const Py_UCS4 *a, Py_ssize_t length)
diff --git a/src/openstep_plist/writer.pyx b/src/openstep_plist/writer.pyx
index 33e3e22..bc5f071 100644
--- a/src/openstep_plist/writer.pyx
+++ b/src/openstep_plist/writer.pyx
@@ -3,14 +3,15 @@
 
 from collections import OrderedDict
 from cpython.unicode cimport (
-    PyUnicode_FromUnicode,
-    PyUnicode_AS_UNICODE,
-    PyUnicode_AS_DATA,
-    PyUnicode_GET_SIZE,
     PyUnicode_AsUTF8String,
+    PyUnicode_4BYTE_KIND,
+    PyUnicode_FromKindAndData,
+    PyUnicode_AsUCS4Copy,
+    PyUnicode_GET_LENGTH,
 )
 from cpython.bytes cimport PyBytes_GET_SIZE
 from cpython.object cimport Py_SIZE
+from cpython.mem cimport PyMem_Free
 from libcpp.vector cimport vector
 from libc.stdint cimport uint16_t
 cimport cython
@@ -19,20 +20,19 @@ from .util cimport (
     tounicode,
     isdigit,
     isprint,
-    PY_NARROW_UNICODE,
     high_surrogate_from_unicode_scalar,
     low_surrogate_from_unicode_scalar,
 )
 
 
-cdef Py_UNICODE *HEX_MAP = [
+cdef Py_UCS4 *HEX_MAP = [
     c'0', c'1', c'2', c'3', c'4', c'5', c'6', c'7',
     c'8', c'9', c'A', c'B', c'C', c'D', c'E', c'F',
 ]
 
-cdef Py_UNICODE *ARRAY_SEP_NO_INDENT = [c',', c' ']
-cdef Py_UNICODE *DICT_KEY_VALUE_SEP = [c' ', c'=', c' ']
-cdef Py_UNICODE *DICT_ITEM_SEP_NO_INDENT = [c';', c' ']
+cdef Py_UCS4 *ARRAY_SEP_NO_INDENT = [c',', c' ']
+cdef Py_UCS4 *DICT_KEY_VALUE_SEP = [c' ', c'=', c' ']
+cdef Py_UCS4 *DICT_ITEM_SEP_NO_INDENT = [c';', c' ']
 
 
 # this table includes A-Z, a-z, 0-9, '.', '_' and '$'
@@ -56,14 +56,14 @@ cdef bint *VALID_UNQUOTED_CHARS = [
 ]
 
 
-cdef bint string_needs_quotes(const Py_UNICODE *a, Py_ssize_t length):
+cdef bint string_needs_quotes(const Py_UCS4 *a, Py_ssize_t length):
     # empty string is always quoted
     if length == 0:
         return True
 
     cdef:
         Py_ssize_t i
-        Py_UNICODE ch
+        Py_UCS4 ch
         bint is_number = True
         bint seen_period = False
 
@@ -91,8 +91,8 @@ cdef bint string_needs_quotes(const Py_UNICODE *a, Py_ssize_t length):
     return is_number
 
 
-cdef inline void escape_unicode(uint16_t ch, Py_UNICODE *dest):
-    # caller must ensure 'dest' has rooms for 6 more Py_UNICODE
+cdef inline void escape_unicode(uint16_t ch, Py_UCS4 *dest):
+    # caller must ensure 'dest' has rooms for 6 more Py_UCS4
     dest[0] = c'\\'
     dest[1] = c'U'
     dest[5] = (ch & 15) + 55 if (ch & 15) > 9 else (ch & 15) + 48
@@ -107,7 +107,7 @@ cdef inline void escape_unicode(uint16_t ch, Py_UNICODE *dest):
 @cython.final
 cdef class Writer:
 
-    cdef vector[Py_UNICODE] *dest
+    cdef vector[Py_UCS4] *dest
     cdef bint unicode_escape
     cdef int float_precision
     cdef unicode indent
@@ -121,7 +121,7 @@ cdef class Writer:
         indent=None,
         bint single_line_tuples=False,
     ):
-        self.dest = new vector[Py_UNICODE]()
+        self.dest = new vector[Py_UCS4]()
         self.unicode_escape = unicode_escape
         self.float_precision = float_precision
 
@@ -158,15 +158,17 @@ cdef class Writer:
         return self.write_object(obj)
 
     cdef inline Py_ssize_t extend_buffer(
-        self, const Py_UNICODE *s, Py_ssize_t length
-    ) except +:
+        self, const Py_UCS4 *s, Py_ssize_t length
+    ) except -1:
         self.dest.reserve(self.dest.size() + length)
         self.dest.insert(self.dest.end(), s, s + length)
         return length
 
     cdef inline unicode _getvalue(self):
-        return PyUnicode_FromUnicode(
-            <const Py_UNICODE*>self.dest.const_data(), self.dest.size()
+        return PyUnicode_FromKindAndData(
+            PyUnicode_4BYTE_KIND,
+            self.dest.const_data(),
+            self.dest.size()
         )
 
     cdef Py_ssize_t write_object(self, object obj) except -1:
@@ -197,15 +199,15 @@ cdef class Writer:
             )
 
     cdef Py_ssize_t write_quoted_string(
-        self, const Py_UNICODE *s, Py_ssize_t length
+        self, const Py_UCS4 *s, Py_ssize_t length
     ) except -1:
 
         cdef:
-            vector[Py_UNICODE] *dest = self.dest
+            vector[Py_UCS4] *dest = self.dest
             bint unicode_escape = self.unicode_escape
-            const Py_UNICODE *curr = s
-            const Py_UNICODE *end = &s[length]
-            Py_UNICODE *ptr
+            const Py_UCS4 *curr = s
+            const Py_UCS4 *end = &s[length]
+            Py_UCS4 *ptr
             unsigned long ch
             Py_ssize_t base_length = dest.size()
             Py_ssize_t new_length = 0
@@ -226,7 +228,7 @@ cdef class Writer:
                     else:
                         new_length += 4
                 elif unicode_escape:
-                    if ch > 0xFFFF and not PY_NARROW_UNICODE:
+                    if ch > 0xFFFF:
                         new_length += 12
                     else:
                         new_length += 6
@@ -235,7 +237,7 @@ cdef class Writer:
             curr += 1
 
         dest.resize(base_length + new_length + 2)
-        ptr = <Py_UNICODE*>dest.data() + base_length
+        ptr = <Py_UCS4*>dest.data() + base_length
         ptr[0] = '"'
         ptr += 1
 
@@ -276,7 +278,7 @@ cdef class Writer:
                         ptr[0] = (ch & 7) + c'0'
                         ptr += 3
                 elif unicode_escape:
-                    if ch > 0xFFFF and not PY_NARROW_UNICODE:
+                    if ch > 0xFFFF:
                         escape_unicode(high_surrogate_from_unicode_scalar(ch), ptr)
                         ptr += 6
                         escape_unicode(low_surrogate_from_unicode_scalar(ch), ptr)
@@ -296,46 +298,60 @@ cdef class Writer:
 
     cdef inline Py_ssize_t write_unquoted_string(self, unicode string) except -1:
         cdef:
-            const Py_UNICODE *s = PyUnicode_AS_UNICODE(string)
-            Py_ssize_t length = PyUnicode_GET_SIZE(string)
-
-        return self.extend_buffer(s, length)
+            Py_ssize_t length = PyUnicode_GET_LENGTH(string)
+            Py_UCS4 *s = PyUnicode_AsUCS4Copy(string)
+        if not s:
+            raise MemoryError()
+        try:
+            return self.extend_buffer(s, length)
+        finally:
+            PyMem_Free(s)
 
     cdef Py_ssize_t write_string(self, unicode string) except -1:
         cdef:
-            const Py_UNICODE *s = PyUnicode_AS_UNICODE(string)
-            Py_ssize_t length = PyUnicode_GET_SIZE(string)
-
-        if string_needs_quotes(s, length):
-            return self.write_quoted_string(s, length)
-        else:
-            return self.extend_buffer(s, length)
+            Py_ssize_t length = PyUnicode_GET_LENGTH(string)
+            Py_UCS4 *s = PyUnicode_AsUCS4Copy(string)
+        if not s:
+            raise MemoryError()
+        try:
+            if string_needs_quotes(s, length):
+                return self.write_quoted_string(s, length)
+            else:
+                return self.extend_buffer(s, length)
+        finally:
+            PyMem_Free(s)
 
     cdef Py_ssize_t write_short_float_repr(self, object py_float) except -1:
         cdef:
             unicode string = f"{py_float:.{self.float_precision}f}"
-            const Py_UNICODE *s = PyUnicode_AS_UNICODE(string)
-            Py_ssize_t length = PyUnicode_GET_SIZE(string)
-            Py_UNICODE ch
-
-        # read digits backwards, skipping all the '0's until either a
-        # non-'0' or '.' is found
-        while length > 0:
-            ch = s[length-1]
-            if ch == c'.':
-                length -= 1  # skip the trailing dot
-                break
-            elif ch != c'0':
-                break
-            length -= 1
-
-        return self.extend_buffer(s, length)
+            Py_ssize_t length = PyUnicode_GET_LENGTH(string)
+            Py_UCS4 ch
+            Py_UCS4 *s = PyUnicode_AsUCS4Copy(string)
+
+        if not s:
+            raise MemoryError()
+
+        try:
+            # read digits backwards, skipping all the '0's until either a
+            # non-'0' or '.' is found
+            while length > 0:
+                ch = s[length-1]
+                if ch == c'.':
+                    length -= 1  # skip the trailing dot
+                    break
+                elif ch != c'0':
+                    break
+                length -= 1
+
+            return self.extend_buffer(s, length)
+        finally:
+            PyMem_Free(s)
 
     cdef Py_ssize_t write_data(self, bytes data) except -1:
         cdef:
-            vector[Py_UNICODE] *dest = self.dest
+            vector[Py_UCS4] *dest = self.dest
             const unsigned char *src = data
-            Py_UNICODE *ptr
+            Py_UCS4 *ptr
             Py_ssize_t length = PyBytes_GET_SIZE(data)
             Py_ssize_t extra_length, i, j
 
@@ -346,7 +362,7 @@ cdef class Writer:
 
         j = dest.size()
         dest.resize(j + extra_length)
-        ptr = <Py_UNICODE*>dest.data()
+        ptr = <Py_UCS4*>dest.data()
 
         ptr[j] = c'<'
         j += 1
@@ -375,7 +391,7 @@ cdef class Writer:
             Py_ssize_t last
             Py_ssize_t count
             Py_ssize_t i
-            vector[Py_UNICODE] *dest = self.dest
+            vector[Py_UCS4] *dest = self.dest
             unicode indent, newline_indent = ""
 
         if length == 0:
@@ -420,7 +436,7 @@ cdef class Writer:
             Py_ssize_t last
             Py_ssize_t count
             Py_ssize_t i
-            vector[Py_UNICODE] *dest = self.dest
+            vector[Py_UCS4] *dest = self.dest
             unicode indent, newline_indent = ""
 
         if length == 0:
@@ -464,7 +480,7 @@ cdef class Writer:
         cdef:
             unicode indent
             unicode newline_indent = ""
-            vector[Py_UNICODE] *dest = self.dest
+            vector[Py_UCS4] *dest = self.dest
             Py_ssize_t last, count, i
 
         if not d:
@@ -519,7 +535,7 @@ cdef class Writer:
         cdef:
             unicode indent
             unicode newline_indent = ""
-            vector[Py_UNICODE] *dest = self.dest
+            vector[Py_UCS4] *dest = self.dest
             Py_ssize_t last, count, i
 
         if not d:
diff --git a/tests/test_writer.py b/tests/test_writer.py
index 77deabc..b00a919 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -2,7 +2,7 @@
 from __future__ import absolute_import, unicode_literals
 import openstep_plist
 from openstep_plist.writer import Writer
-from openstep_plist._test import is_narrow_unicode, string_needs_quotes
+from openstep_plist._test import string_needs_quotes
 from io import StringIO, BytesIO
 from collections import OrderedDict
 import string
@@ -64,7 +64,7 @@ def test_quoted_string_no_unicode_escape(self):
         assert w.getvalue() == '"\u0410"'
 
         w = Writer(unicode_escape=False)
-        assert w.write("\U0001F4A9") == (4 if is_narrow_unicode() else 3)
+        assert w.write("\U0001F4A9") == 3
         assert w.getvalue() == '"\U0001F4A9"'
 
     @pytest.mark.parametrize(