Skip to content

Commit 9074876

Browse files
gh-149079: Optimize sorting in unicodedata.normalize() (GH-150782)
Sort the Py_UCS4 buffer instead of PyUnicodeObject. This allows to avoid the use of PyUnicode_READ() and PyUnicode_WRITE().
1 parent 35ce2e5 commit 9074876

1 file changed

Lines changed: 23 additions & 39 deletions

File tree

Modules/unicodedata.c

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -560,39 +560,36 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
560560
#define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20
561561

562562
static void
563-
canonical_ordering_sort_insertion(int kind, void *data,
564-
Py_ssize_t start, Py_ssize_t end)
563+
canonical_ordering_sort_insertion(Py_UCS4 *data, Py_ssize_t length)
565564
{
566-
for (Py_ssize_t i = start + 1; i < end; i++) {
567-
Py_UCS4 code = PyUnicode_READ(kind, data, i);
565+
for (Py_ssize_t i = 1; i < length; i++) {
566+
Py_UCS4 code = data[i];
568567
unsigned char combining = _getrecord_ex(code)->combining;
569568
Py_ssize_t j = i;
570569

571-
while (j > start) {
572-
Py_UCS4 previous = PyUnicode_READ(kind, data, j - 1);
570+
while (j > 0) {
571+
Py_UCS4 previous = data[j - 1];
573572
if (_getrecord_ex(previous)->combining <= combining) {
574573
break;
575574
}
576-
PyUnicode_WRITE(kind, data, j, previous);
575+
data[j] = previous;
577576
j--;
578577
}
579578
if (j != i) {
580-
PyUnicode_WRITE(kind, data, j, code);
579+
data[j] = code;
581580
}
582581
}
583582
}
584583

585584
static void
586-
canonical_ordering_sort_counting(int kind, void *data,
587-
Py_ssize_t start, Py_ssize_t end,
585+
canonical_ordering_sort_counting(Py_UCS4 *data, Py_ssize_t length,
588586
Py_UCS4 *sortbuf)
589587
{
590588
Py_ssize_t counts[256] = {0};
591-
Py_ssize_t run_length = end - start;
592589
Py_ssize_t total = 0;
593590

594-
for (Py_ssize_t i = start; i < end; i++) {
595-
Py_UCS4 code = PyUnicode_READ(kind, data, i);
591+
for (Py_ssize_t i = 0; i < length; i++) {
592+
Py_UCS4 code = data[i];
596593
unsigned char combining = _getrecord_ex(code)->combining;
597594
counts[combining]++;
598595
}
@@ -604,14 +601,12 @@ canonical_ordering_sort_counting(int kind, void *data,
604601
}
605602

606603
/* Reuse counts[] as the next output slot for each CCC. */
607-
for (Py_ssize_t i = start; i < end; i++) {
608-
Py_UCS4 code = PyUnicode_READ(kind, data, i);
604+
for (Py_ssize_t i = 0; i < length; i++) {
605+
Py_UCS4 code = data[i];
609606
unsigned char combining = _getrecord_ex(code)->combining;
610607
sortbuf[counts[combining]++] = code;
611608
}
612-
for (Py_ssize_t i = 0; i < run_length; i++) {
613-
PyUnicode_WRITE(kind, data, start + i, sortbuf[i]);
614-
}
609+
memcpy(data, sortbuf, length * sizeof(Py_UCS4));
615610
}
616611

617612
static PyObject*
@@ -620,9 +615,8 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
620615
PyObject *result;
621616
Py_UCS4 *output;
622617
Py_ssize_t i, o, osize;
623-
int input_kind, result_kind;
618+
int input_kind;
624619
const void *input_data;
625-
void *result_data;
626620
/* Longest decomposition in Unicode 3.2: U+FDFA */
627621
Py_UCS4 stack[20];
628622
Py_ssize_t space, isize;
@@ -715,22 +709,13 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
715709
}
716710
}
717711

718-
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
719-
output, o);
720-
PyMem_Free(output);
721-
if (!result)
722-
return NULL;
723-
724-
result_kind = PyUnicode_KIND(result);
725-
result_data = PyUnicode_DATA(result);
726-
727712
/* Sort each consecutive combining-character run canonically. */
728713
i = 0;
729714
while (i < o) {
730715
Py_ssize_t run_length, run_start;
731716
int needs_sort = 0;
732717

733-
Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i);
718+
Py_UCS4 ch = output[i];
734719
prev = _getrecord_ex(ch)->combining;
735720
if (prev == 0) {
736721
i++;
@@ -739,7 +724,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
739724

740725
run_start = i++;
741726
while (i < o) {
742-
Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i);
727+
Py_UCS4 ch = output[i];
743728
cur = _getrecord_ex(ch)->combining;
744729
if (cur == 0) {
745730
break;
@@ -756,29 +741,28 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
756741

757742
run_length = i - run_start;
758743
if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD) {
759-
canonical_ordering_sort_insertion(result_kind, result_data,
760-
run_start, i);
744+
canonical_ordering_sort_insertion(output + run_start, run_length);
761745
continue;
762746
}
763747

764748
if (run_length > sortbuflen) {
765-
Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf,
766-
Py_UCS4,
767-
run_length);
749+
Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf, Py_UCS4, run_length);
768750
if (new_sortbuf == NULL) {
769751
PyErr_NoMemory();
770752
PyMem_Free(sortbuf);
771-
Py_DECREF(result);
753+
PyMem_Free(output);
772754
return NULL;
773755
}
774756
sortbuf = new_sortbuf;
775757
sortbuflen = run_length;
776758
}
777759

778-
canonical_ordering_sort_counting(result_kind, result_data,
779-
run_start, i, sortbuf);
760+
canonical_ordering_sort_counting(output + run_start, run_length,
761+
sortbuf);
780762
}
781763
PyMem_Free(sortbuf);
764+
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, o);
765+
PyMem_Free(output);
782766
return result;
783767
}
784768

0 commit comments

Comments
 (0)