Skip to content

Commit b79f969

Browse files
committed
add unicodedata.grapheme_cluster_break()
1 parent f523255 commit b79f969

File tree

6 files changed

+2921
-2724
lines changed

6 files changed

+2921
-2724
lines changed

Modules/clinic/unicodedata.c.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,34 @@ unicodedata_UCD_east_asian_width(PyObject *self, PyObject *arg)
255255
return return_value;
256256
}
257257

258+
PyDoc_STRVAR(unicodedata_UCD_grapheme_cluster_break__doc__,
259+
"grapheme_cluster_break($self, chr, /)\n"
260+
"--\n"
261+
"\n"
262+
"Returns the east asian width assigned to the character chr as string.");
263+
264+
#define UNICODEDATA_UCD_GRAPHEME_CLUSTER_BREAK_METHODDEF \
265+
{"grapheme_cluster_break", (PyCFunction)unicodedata_UCD_grapheme_cluster_break, \
266+
METH_O, unicodedata_UCD_grapheme_cluster_break__doc__},
267+
268+
static PyObject *
269+
unicodedata_UCD_grapheme_cluster_break_impl(PyObject *self, int chr);
270+
271+
static PyObject *
272+
unicodedata_UCD_grapheme_cluster_break(PyObject *self, PyObject *arg)
273+
{
274+
PyObject *return_value = NULL;
275+
int chr;
276+
277+
if (!PyArg_Parse(arg, "C:grapheme_cluster_break", &chr)) {
278+
goto exit;
279+
}
280+
return_value = unicodedata_UCD_grapheme_cluster_break_impl(self, chr);
281+
282+
exit:
283+
return return_value;
284+
}
285+
258286
PyDoc_STRVAR(unicodedata_UCD_decomposition__doc__,
259287
"decomposition($self, chr, /)\n"
260288
"--\n"

Modules/unicodedata.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ typedef struct {
3737
const unsigned char east_asian_width; /* index into
3838
_PyUnicode_EastAsianWidth */
3939
const unsigned char normalization_quick_check; /* see is_normalized() */
40+
const unsigned char grapheme_cluster_break;
4041
} _PyUnicode_DatabaseRecord;
4142

4243
typedef struct change_record {
@@ -382,6 +383,26 @@ unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
382383
return PyUnicode_FromString(_PyUnicode_EastAsianWidthNames[index]);
383384
}
384385

386+
/*[clinic input]
387+
unicodedata.UCD.grapheme_cluster_break
388+
389+
self: self
390+
chr: int(accept={str})
391+
/
392+
393+
Returns the east asian width assigned to the character chr as string.
394+
[clinic start generated code]*/
395+
396+
static PyObject *
397+
unicodedata_UCD_grapheme_cluster_break_impl(PyObject *self, int chr)
398+
/*[clinic end generated code: output=484e8537d9ee8197 input=c4854798aab026e0]*/
399+
{
400+
int index;
401+
Py_UCS4 c = (Py_UCS4)chr;
402+
index = (int) _getrecord_ex(c)->grapheme_cluster_break;
403+
return PyUnicode_FromString(_PyUnicode_GraphemeBreakProperty[index]);
404+
}
405+
385406
/*[clinic input]
386407
unicodedata.UCD.decomposition
387408
@@ -1264,6 +1285,7 @@ static PyMethodDef unicodedata_functions[] = {
12641285
UNICODEDATA_UCD_COMBINING_METHODDEF
12651286
UNICODEDATA_UCD_MIRRORED_METHODDEF
12661287
UNICODEDATA_UCD_EAST_ASIAN_WIDTH_METHODDEF
1288+
UNICODEDATA_UCD_GRAPHEME_CLUSTER_BREAK_METHODDEF
12671289
UNICODEDATA_UCD_DECOMPOSITION_METHODDEF
12681290
UNICODEDATA_UCD_NAME_METHODDEF
12691291
UNICODEDATA_UCD_LOOKUP_METHODDEF

0 commit comments

Comments
 (0)