|
9 | 9 |
|
10 | 10 | // The _PyUnicode_CheckConsistency definition has been moved to the internal API |
11 | 11 | // https://github.com/python/cpython/pull/106398 |
12 | | -#if defined(Py_DEBUG) && defined(CPY_3_13_FEATURES) |
| 12 | +#if defined(Py_DEBUG) && CPY_3_13_FEATURES |
13 | 13 | #include "internal/pycore_unicodeobject.h" |
14 | 14 | #endif |
15 | 15 |
|
@@ -678,6 +678,80 @@ bool CPyStr_IsAlnum(PyObject *str) { |
678 | 678 | return true; |
679 | 679 | } |
680 | 680 |
|
| 681 | +static inline int CPy_ASCII_Lower(unsigned char c) { return Py_TOLOWER(c); } |
| 682 | +static inline int CPy_ASCII_Upper(unsigned char c) { return Py_TOUPPER(c); } |
| 683 | + |
| 684 | +static inline PyObject *CPyStr_ChangeCase(PyObject *self, |
| 685 | + int (*ascii_func)(unsigned char), |
| 686 | +#if CPY_3_13_FEATURES |
| 687 | + PyObject *method_name |
| 688 | +#else |
| 689 | + int (*unicode_func)(Py_UCS4, Py_UCS4 *) |
| 690 | +#endif |
| 691 | + ) { |
| 692 | + Py_ssize_t len = PyUnicode_GET_LENGTH(self); |
| 693 | + if (len == 0) { |
| 694 | + Py_INCREF(self); |
| 695 | + return self; |
| 696 | + } |
| 697 | + |
| 698 | + // ASCII fast path: 1-to-1, no expansion possible |
| 699 | + if (PyUnicode_IS_ASCII(self)) { |
| 700 | + PyObject *res = PyUnicode_New(len, 127); |
| 701 | + if (res == NULL) return NULL; |
| 702 | + const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self); |
| 703 | + Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res); |
| 704 | + for (Py_ssize_t i = 0; i < len; i++) { |
| 705 | + res_data[i] = ascii_func(data[i]); |
| 706 | + } |
| 707 | + return res; |
| 708 | + } |
| 709 | + |
| 710 | +#if CPY_3_13_FEATURES |
| 711 | + // On 3.13+, _PyUnicode_ToLowerFull/ToUpperFull are no longer exported, |
| 712 | + // so fall back to CPython's method implementation for non-ASCII strings. |
| 713 | + return PyObject_CallMethodNoArgs(self, method_name); |
| 714 | +#else |
| 715 | + // General Unicode: unicode_func handles 1-to-N expansion. |
| 716 | + // Worst case: each codepoint expands to 3 (per Unicode standard). |
| 717 | + // The tmp buffer is short-lived, and PyUnicode_FromKindAndData |
| 718 | + // compacts the result to the optimal string kind automatically. |
| 719 | + int kind = PyUnicode_KIND(self); |
| 720 | + const void *data = PyUnicode_DATA(self); |
| 721 | + Py_UCS4 *tmp = PyMem_Malloc(sizeof(Py_UCS4) * len * 3); |
| 722 | + if (tmp == NULL) return PyErr_NoMemory(); |
| 723 | + |
| 724 | + Py_UCS4 mapped[3]; |
| 725 | + Py_ssize_t out_len = 0; |
| 726 | + for (Py_ssize_t i = 0; i < len; i++) { |
| 727 | + int n = unicode_func(PyUnicode_READ(kind, data, i), mapped); |
| 728 | + for (int j = 0; j < n; j++) { |
| 729 | + tmp[out_len++] = mapped[j]; |
| 730 | + } |
| 731 | + } |
| 732 | + |
| 733 | + PyObject *res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, tmp, out_len); |
| 734 | + PyMem_Free(tmp); |
| 735 | + return res; |
| 736 | +#endif |
| 737 | +} |
| 738 | + |
| 739 | +PyObject *CPyStr_Lower(PyObject *self) { |
| 740 | +#if CPY_3_13_FEATURES |
| 741 | + return CPyStr_ChangeCase(self, CPy_ASCII_Lower, mypyc_interned_str.lower); |
| 742 | +#else |
| 743 | + return CPyStr_ChangeCase(self, CPy_ASCII_Lower, _PyUnicode_ToLowerFull); |
| 744 | +#endif |
| 745 | +} |
| 746 | + |
| 747 | +PyObject *CPyStr_Upper(PyObject *self) { |
| 748 | +#if CPY_3_13_FEATURES |
| 749 | + return CPyStr_ChangeCase(self, CPy_ASCII_Upper, mypyc_interned_str.upper); |
| 750 | +#else |
| 751 | + return CPyStr_ChangeCase(self, CPy_ASCII_Upper, _PyUnicode_ToUpperFull); |
| 752 | +#endif |
| 753 | +} |
| 754 | + |
681 | 755 | bool CPyStr_IsDigit(PyObject *str) { |
682 | 756 | Py_ssize_t len = PyUnicode_GET_LENGTH(str); |
683 | 757 | if (len == 0) return false; |
|
0 commit comments