99Py_DEPRECATED (3.13 ) typedef wchar_t PY_UNICODE_TYPE ;
1010Py_DEPRECATED (3.13 ) typedef wchar_t Py_UNICODE ;
1111
12+
1213/* --- Internal Unicode Operations ---------------------------------------- */
1314
1415// Static inline functions to work with surrogates
@@ -43,6 +44,7 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) {
4344 return (0xDC00 + (ch & 0x3FF ));
4445}
4546
47+
4648/* --- Unicode Type ------------------------------------------------------- */
4749
4850/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
@@ -375,6 +377,7 @@ static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op)
375377#define PyUnicode_MAX_CHAR_VALUE (op ) \
376378 PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op))
377379
380+
378381/* === Public API ========================================================= */
379382
380383/* With PEP 393, this is the recommended way to allocate a new unicode object.
@@ -440,6 +443,123 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
440443 const void * buffer ,
441444 Py_ssize_t size );
442445
446+
447+ /* --- _PyUnicodeWriter API ----------------------------------------------- */
448+
449+ typedef struct {
450+ PyObject * buffer ;
451+ void * data ;
452+ int kind ;
453+ Py_UCS4 maxchar ;
454+ Py_ssize_t size ;
455+ Py_ssize_t pos ;
456+
457+ /* minimum number of allocated characters (default: 0) */
458+ Py_ssize_t min_length ;
459+
460+ /* minimum character (default: 127, ASCII) */
461+ Py_UCS4 min_char ;
462+
463+ /* If non-zero, overallocate the buffer (default: 0). */
464+ unsigned char overallocate ;
465+
466+ /* If readonly is 1, buffer is a shared string (cannot be modified)
467+ and size is set to 0. */
468+ unsigned char readonly ;
469+ } _PyUnicodeWriter ;
470+
471+ // Initialize a Unicode writer.
472+ //
473+ // By default, the minimum buffer size is 0 character and overallocation is
474+ // disabled. Set min_length, min_char and overallocate attributes to control
475+ // the allocation of the buffer.
476+ PyAPI_FUNC (void )
477+ _PyUnicodeWriter_Init (_PyUnicodeWriter * writer );
478+
479+ /* Prepare the buffer to write 'length' characters
480+ with the specified maximum character.
481+
482+ Return 0 on success, raise an exception and return -1 on error. */
483+ #define _PyUnicodeWriter_Prepare (WRITER , LENGTH , MAXCHAR ) \
484+ (((MAXCHAR) <= (WRITER)->maxchar \
485+ && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
486+ ? 0 \
487+ : (((LENGTH) == 0) \
488+ ? 0 \
489+ : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
490+
491+ /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
492+ instead. */
493+ PyAPI_FUNC (int )
494+ _PyUnicodeWriter_PrepareInternal (_PyUnicodeWriter * writer ,
495+ Py_ssize_t length , Py_UCS4 maxchar );
496+
497+ /* Prepare the buffer to have at least the kind KIND.
498+ For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
499+ support characters in range U+000-U+FFFF.
500+
501+ Return 0 on success, raise an exception and return -1 on error. */
502+ #define _PyUnicodeWriter_PrepareKind (WRITER , KIND ) \
503+ ((KIND) <= (WRITER)->kind \
504+ ? 0 \
505+ : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
506+
507+ /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
508+ macro instead. */
509+ PyAPI_FUNC (int )
510+ _PyUnicodeWriter_PrepareKindInternal (_PyUnicodeWriter * writer ,
511+ int kind );
512+
513+ /* Append a Unicode character.
514+ Return 0 on success, raise an exception and return -1 on error. */
515+ PyAPI_FUNC (int )
516+ _PyUnicodeWriter_WriteChar (_PyUnicodeWriter * writer ,
517+ Py_UCS4 ch
518+ );
519+
520+ /* Append a Unicode string.
521+ Return 0 on success, raise an exception and return -1 on error. */
522+ PyAPI_FUNC (int )
523+ _PyUnicodeWriter_WriteStr (_PyUnicodeWriter * writer ,
524+ PyObject * str /* Unicode string */
525+ );
526+
527+ /* Append a substring of a Unicode string.
528+ Return 0 on success, raise an exception and return -1 on error. */
529+ PyAPI_FUNC (int )
530+ _PyUnicodeWriter_WriteSubstring (_PyUnicodeWriter * writer ,
531+ PyObject * str , /* Unicode string */
532+ Py_ssize_t start ,
533+ Py_ssize_t end
534+ );
535+
536+ /* Append an ASCII-encoded byte string.
537+ Return 0 on success, raise an exception and return -1 on error. */
538+ PyAPI_FUNC (int )
539+ _PyUnicodeWriter_WriteASCIIString (_PyUnicodeWriter * writer ,
540+ const char * str , /* ASCII-encoded byte string */
541+ Py_ssize_t len /* number of bytes, or -1 if unknown */
542+ );
543+
544+ /* Append a latin1-encoded byte string.
545+ Return 0 on success, raise an exception and return -1 on error. */
546+ PyAPI_FUNC (int )
547+ _PyUnicodeWriter_WriteLatin1String (_PyUnicodeWriter * writer ,
548+ const char * str , /* latin1-encoded byte string */
549+ Py_ssize_t len /* length in bytes */
550+ );
551+
552+ /* Get the value of the writer as a Unicode string. Clear the
553+ buffer of the writer. Raise an exception and return NULL
554+ on error. */
555+ PyAPI_FUNC (PyObject * )
556+ _PyUnicodeWriter_Finish (_PyUnicodeWriter * writer );
557+
558+ /* Deallocate memory of a writer (clear its internal buffer). */
559+ PyAPI_FUNC (void )
560+ _PyUnicodeWriter_Dealloc (_PyUnicodeWriter * writer );
561+
562+
443563/* --- Manage the default encoding ---------------------------------------- */
444564
445565/* Returns a pointer to the default encoding (UTF-8) of the
@@ -457,6 +577,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
457577
458578PyAPI_FUNC (const char * ) PyUnicode_AsUTF8 (PyObject * unicode );
459579
580+ // Alias kept for backward compatibility
581+ #define _PyUnicode_AsString PyUnicode_AsUTF8
582+
583+
460584/* === Characters Type APIs =============================================== */
461585
462586/* These should not be used directly. Use the Py_UNICODE_IS* and
@@ -570,3 +694,10 @@ static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) {
570694 || Py_UNICODE_ISDIGIT (ch )
571695 || Py_UNICODE_ISNUMERIC (ch ));
572696}
697+
698+
699+ /* === Misc functions ===================================================== */
700+
701+ // Return an interned Unicode object for an Identifier; may fail if there is no
702+ // memory.
703+ PyAPI_FUNC (PyObject * ) _PyUnicode_FromId (_Py_Identifier * );
0 commit comments