diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index d47beee68eaa33..8cc935cd06659c 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -219,3 +219,162 @@ called with a non-bytes parameter. reallocation fails, the original bytes object at *\*bytes* is deallocated, *\*bytes* is set to ``NULL``, :exc:`MemoryError` is set, and ``-1`` is returned. + +PyBytesWriter +------------- + +The :c:type:`PyBytesWriter` API can be used to create a Python :class:`bytes` +object. + +.. versionadded:: next + +.. c:type:: PyBytesWriter + + A bytes writer instance. + + The API is **not thread safe**: a writer should only be used by a single + thread at the same time. + + The instance must be destroyed by :c:func:`PyBytesWriter_Finish` on + success, or :c:func:`PyBytesWriter_Discard` on error. + + +Create, Finish, Discard +^^^^^^^^^^^^^^^^^^^^^^^ + +.. c:function:: PyBytesWriter* PyBytesWriter_Create(Py_ssize_t size) + + Create a :c:type:`PyBytesWriter` to write *size* bytes. + + If *size* is greater than zero, allocate *size* bytes, and set the + writer size to *size*. The caller is responsible to write *size* + bytes using :c:func:`PyBytesWriter_GetData`. + + On error, set an exception and return NULL. + + *size* must be positive or zero. + +.. c:function:: PyObject* PyBytesWriter_Finish(PyBytesWriter *writer) + + Finish a :c:type:`PyBytesWriter` created by + :c:func:`PyBytesWriter_Create`. + + On success, return a Python :class:`bytes` object. + On error, set an exception and return ``NULL``. + + The writer instance is invalid after the call in any case. + No API can be called on the writer after :c:func:`PyBytesWriter_Finish`. + +.. c:function:: PyObject* PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size) + + Similar to :c:func:`PyBytesWriter_Finish`, but resize the writer + to *size* bytes before creating the :class:`bytes` object. + +.. c:function:: PyObject* PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf) + + Similar to :c:func:`PyBytesWriter_Finish`, but resize the writer + using *buf* pointer before creating the :class:`bytes` object. + + Set an exception and return ``NULL`` if *buf* pointer is outside the + internal buffer bounds. + + Function pseudo-code:: + + Py_ssize_t size = (char*)buf - (char*)PyBytesWriter_GetData(writer); + return PyBytesWriter_FinishWithSize(writer, size); + +.. c:function:: void PyBytesWriter_Discard(PyBytesWriter *writer) + + Discard a :c:type:`PyBytesWriter` created by :c:func:`PyBytesWriter_Create`. + + Do nothing if *writer* is ``NULL``. + + The writer instance is invalid after the call. + No API can be called on the writer after :c:func:`PyBytesWriter_Discard`. + +High-level API +^^^^^^^^^^^^^^ + +.. c:function:: int PyBytesWriter_WriteBytes(PyBytesWriter *writer, const void *bytes, Py_ssize_t size) + + Grow the *writer* internal buffer by *size* bytes, + write *size* bytes of *bytes* at the *writer* end, + and add *size* to the *writer* size. + + If *size* is equal to ``-1``, call ``strlen(bytes)`` to get the + string length. + + On success, return ``0``. + On error, set an exception and return ``-1``. + +.. c:function:: int PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) + + Similar to :c:func:`PyBytes_FromFormat`, but write the output directly at + the writer end. Grow the writer internal buffer on demand. Then add the + written size to the writer size. + + On success, return ``0``. + On error, set an exception and return ``-1``. + + +Getters +^^^^^^^ + +.. c:function:: Py_ssize_t PyBytesWriter_GetSize(PyBytesWriter *writer) + + Get the writer size. + +.. c:function:: void* PyBytesWriter_GetData(PyBytesWriter *writer) + + Get the writer data: start of the internal buffer. + + The pointer is valid until :c:func:`PyBytesWriter_Finish` or + :c:func:`PyBytesWriter_Discard` is called on *writer*. + + +Low-level API +^^^^^^^^^^^^^ + +.. c:function:: int PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size) + + Resize the writer to *size* bytes. It can be used to enlarge or to + shrink the writer. + + Newly allocated bytes are left uninitialized. + + On success, return ``0``. + On error, set an exception and return ``-1``. + + *size* must be positive or zero. + +.. c:function:: int PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t grow) + + Resize the writer by adding *grow* bytes to the current writer size. + + Newly allocated bytes are left uninitialized. + + On success, return ``0``. + On error, set an exception and return ``-1``. + + *size* can be negative to shrink the writer. + +.. c:function:: void* PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, void *buf) + + Similar to :c:func:`PyBytesWriter_Grow`, but update also the *buf* + pointer. + + The *buf* pointer is moved if the internal buffer is moved in memory. + The *buf* relative position within the internal buffer is left + unchanged. + + On error, set an exception and return ``NULL``. + + *buf* must not be ``NULL``. + + Function pseudo-code:: + + Py_ssize_t pos = (char*)buf - (char*)PyBytesWriter_GetData(writer); + if (PyBytesWriter_Grow(writer, size) < 0) { + return NULL; + } + return (char*)PyBytesWriter_GetData(writer) + pos; diff --git a/Doc/faq/python-video-icon.png b/Doc/faq/python-video-icon.png deleted file mode 100644 index 265da50c7b38fc..00000000000000 Binary files a/Doc/faq/python-video-icon.png and /dev/null differ diff --git a/Doc/howto/remote_debugging.rst b/Doc/howto/remote_debugging.rst index b7323803654628..78b40bcdf7127b 100644 --- a/Doc/howto/remote_debugging.rst +++ b/Doc/howto/remote_debugging.rst @@ -3,6 +3,78 @@ Remote debugging attachment protocol ==================================== +This protocol enables external tools to attach to a running CPython process and +execute Python code remotely. + +Most platforms require elevated privileges to attach to another Python process. + +.. _permission-requirements: + +Permission requirements +======================= + +Attaching to a running Python process for remote debugging requires elevated +privileges on most platforms. The specific requirements and troubleshooting +steps depend on your operating system: + +.. rubric:: Linux + +The tracer process must have the ``CAP_SYS_PTRACE`` capability or equivalent +privileges. You can only trace processes you own and can signal. Tracing may +fail if the process is already being traced, or if it is running with +set-user-ID or set-group-ID. Security modules like Yama may further restrict +tracing. + +To temporarily relax ptrace restrictions (until reboot), run: + + ``echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope`` + +.. note:: + + Disabling ``ptrace_scope`` reduces system hardening and should only be done + in trusted environments. + +If running inside a container, use ``--cap-add=SYS_PTRACE`` or +``--privileged``, and run as root if needed. + +Try re-running the command with elevated privileges: + + ``sudo -E !!`` + + +.. rubric:: macOS + +To attach to another process, you typically need to run your debugging tool +with elevated privileges. This can be done by using ``sudo`` or running as +root. + +Even when attaching to processes you own, macOS may block debugging unless +the debugger is run with root privileges due to system security restrictions. + + +.. rubric:: Windows + +To attach to another process, you usually need to run your debugging tool +with administrative privileges. Start the command prompt or terminal as +Administrator. + +Some processes may still be inaccessible even with Administrator rights, +unless you have the ``SeDebugPrivilege`` privilege enabled. + +To resolve file or folder access issues, adjust the security permissions: + + 1. Right-click the file or folder and select **Properties**. + 2. Go to the **Security** tab to view users and groups with access. + 3. Click **Edit** to modify permissions. + 4. Select your user account. + 5. In **Permissions**, check **Read** or **Full control** as needed. + 6. Click **Apply**, then **OK** to confirm. + + +.. note:: + + Ensure you've satisfied all :ref:`permission-requirements` before proceeding. + This section describes the low-level protocol that enables external tools to inject and execute a Python script within a running CPython process. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 3b7fd11968ef8a..1d029e3914baf5 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -707,6 +707,23 @@ New features and :c:data:`Py_mod_abi`. (Contributed by Petr Viktorin in :gh:`137210`.) +* Implement :pep:`782`, the :c:type:`PyBytesWriter` API. Add functions: + + * :c:func:`PyBytesWriter_Create` + * :c:func:`PyBytesWriter_Discard` + * :c:func:`PyBytesWriter_FinishWithPointer` + * :c:func:`PyBytesWriter_FinishWithSize` + * :c:func:`PyBytesWriter_Finish` + * :c:func:`PyBytesWriter_Format` + * :c:func:`PyBytesWriter_GetData` + * :c:func:`PyBytesWriter_GetSize` + * :c:func:`PyBytesWriter_GrowAndUpdatePointer` + * :c:func:`PyBytesWriter_Grow` + * :c:func:`PyBytesWriter_Resize` + * :c:func:`PyBytesWriter_WriteBytes` + + (Contributed by Victor Stinner in :gh:`129813`.) + Porting to Python 3.15 ---------------------- diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 71c133f173f157..85bc2b827df8fb 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -40,3 +40,46 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable) { return PyBytes_Join(sep, iterable); } + + +// --- PyBytesWriter API ----------------------------------------------------- + +typedef struct PyBytesWriter PyBytesWriter; + +PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create( + Py_ssize_t size); +PyAPI_FUNC(void) PyBytesWriter_Discard( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithSize( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithPointer( + PyBytesWriter *writer, + void *buf); + +PyAPI_FUNC(void*) PyBytesWriter_GetData( + PyBytesWriter *writer); +PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetSize( + PyBytesWriter *writer); + +PyAPI_FUNC(int) PyBytesWriter_WriteBytes( + PyBytesWriter *writer, + const void *bytes, + Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_Format( + PyBytesWriter *writer, + const char *format, + ...); + +PyAPI_FUNC(int) PyBytesWriter_Resize( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_Grow( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(void*) PyBytesWriter_GrowAndUpdatePointer( + PyBytesWriter *writer, + Py_ssize_t size, + void *buf); diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 8ea9b3ebb88454..9f519d3ca95e92 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -143,6 +143,10 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, const void *bytes, Py_ssize_t size); +// Export for '_testcapi' shared extension. +PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray( + Py_ssize_t size); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 8e1415f27b63f3..0f0804d5db61f7 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -274,6 +274,8 @@ extern void _PyLineTable_InitAddressRange( /** API for traversing the line number table. */ extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); +// This is used in dump_frame() in traceback.c without an attached tstate. +extern int _PyCode_Addr2LineNoTstate(PyCodeObject *co, int addr); /** API for executors */ extern void _PyCode_Clear_Executors(PyCodeObject *code); diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 59beb92f3f7b9c..46e2a82ea03456 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -27,6 +27,7 @@ extern "C" { # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 # define Py_unicode_writers_MAXFREELIST 1 +# define Py_bytes_writers_MAXFREELIST 1 # define Py_pycfunctionobject_MAXFREELIST 16 # define Py_pycmethodobject_MAXFREELIST 16 # define Py_pymethodobjects_MAXFREELIST 20 @@ -61,6 +62,7 @@ struct _Py_freelists { struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; struct _Py_freelist unicode_writers; + struct _Py_freelist bytes_writers; struct _Py_freelist pycfunctionobject; struct _Py_freelist pycmethodobject; struct _Py_freelist pymethodobjects; diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index 2683f34cc7113b..f39e11fdd513b4 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -222,6 +222,20 @@ def _print_cycle_exception(exception: CycleFoundException): print(f"cycle: {inames}", file=sys.stderr) +def exit_with_permission_help_text(): + """ + Prints a message pointing to platform-specific permission help text and exits the program. + This function is called when a PermissionError is encountered while trying + to attach to a process. + """ + print( + "Error: The specified process cannot be attached to due to insufficient permissions.\n" + "See the Python documentation for details on required privileges and troubleshooting:\n" + "https://docs.python.org/3.14/howto/remote_debugging.html#permission-requirements\n" + ) + sys.exit(1) + + def _get_awaited_by_tasks(pid: int) -> list: try: return get_all_awaited_by(pid) @@ -230,6 +244,8 @@ def _get_awaited_by_tasks(pid: int) -> list: e = e.__context__ print(f"Error retrieving tasks: {e}") sys.exit(1) + except PermissionError as e: + exit_with_permission_help_text() def display_awaited_by_tasks_table(pid: int) -> None: diff --git a/Lib/pdb.py b/Lib/pdb.py index fc83728fb6dc94..a783583a2b1c38 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -3504,6 +3504,20 @@ def help(): "-c 'until X'".""" +def exit_with_permission_help_text(): + """ + Prints a message pointing to platform-specific permission help text and exits the program. + This function is called when a PermissionError is encountered while trying + to attach to a process. + """ + print( + "Error: The specified process cannot be attached to due to insufficient permissions.\n" + "See the Python documentation for details on required privileges and troubleshooting:\n" + "https://docs.python.org/3.14/howto/remote_debugging.html#permission-requirements\n" + ) + sys.exit(1) + + def main(): import argparse @@ -3537,7 +3551,10 @@ def main(): opts = parser.parse_args() if opts.module: parser.error("argument -m: not allowed with argument --pid") - attach(opts.pid, opts.commands) + try: + attach(opts.pid, opts.commands) + except PermissionError as e: + exit_with_permission_help_text() return elif opts.module: # If a module is being debugged, we consider the arguments after "-m module" to diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index bc820bd68d9e21..410ebab729c2cf 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -299,5 +299,95 @@ def test_join(self): bytes_join(b'', NULL) +class BytesWriterTest(unittest.TestCase): + result_type = bytes + + def create_writer(self, alloc=0, string=b''): + return _testcapi.PyBytesWriter(alloc, string, 0) + + def test_create(self): + # Test PyBytesWriter_Create() + writer = self.create_writer() + self.assertEqual(writer.get_size(), 0) + self.assertEqual(writer.finish(), self.result_type(b'')) + + writer = self.create_writer(3, b'abc') + self.assertEqual(writer.get_size(), 3) + self.assertEqual(writer.finish(), self.result_type(b'abc')) + + writer = self.create_writer(10, b'abc') + self.assertEqual(writer.get_size(), 10) + self.assertEqual(writer.finish_with_size(3), self.result_type(b'abc')) + + def test_write_bytes(self): + # Test PyBytesWriter_WriteBytes() + writer = self.create_writer() + writer.write_bytes(b'Hello World!', -1) + self.assertEqual(writer.finish(), self.result_type(b'Hello World!')) + + writer = self.create_writer() + writer.write_bytes(b'Hello ', -1) + writer.write_bytes(b'World! ', 6) + self.assertEqual(writer.finish(), self.result_type(b'Hello World!')) + + def test_resize(self): + # Test PyBytesWriter_Resize() + writer = self.create_writer() + writer.resize(len(b'number=123456'), b'number=123456') + writer.resize(len(b'number=123456'), b'') + self.assertEqual(writer.get_size(), len(b'number=123456')) + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(0, b'') + writer.resize(len(b'number=123456'), b'number=123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(len(b'number='), b'number=') + writer.resize(len(b'number=123456'), b'123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(len(b'number='), b'number=') + writer.resize(len(b'number='), b'') + writer.resize(len(b'number=123456'), b'123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(len(b'number'), b'number') + writer.resize(len(b'number='), b'=') + writer.resize(len(b'number=123'), b'123') + writer.resize(len(b'number=123456'), b'456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + def test_format_i(self): + # Test PyBytesWriter_Format() + writer = self.create_writer() + writer.format_i(b'x=%i', 123456) + self.assertEqual(writer.finish(), self.result_type(b'x=123456')) + + writer = self.create_writer() + writer.format_i(b'x=%i, ', 123) + writer.format_i(b'y=%i', 456) + self.assertEqual(writer.finish(), self.result_type(b'x=123, y=456')) + + def test_example_abc(self): + self.assertEqual(_testcapi.byteswriter_abc(), b'abc') + + def test_example_resize(self): + self.assertEqual(_testcapi.byteswriter_resize(), b'Hello World') + + def test_example_highlevel(self): + self.assertEqual(_testcapi.byteswriter_highlevel(), b'Hello World!') + + +class ByteArrayWriterTest(BytesWriterTest): + result_type = bytearray + + def create_writer(self, alloc=0, string=b''): + return _testcapi.PyBytesWriter(alloc, string, 1) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_remote_pdb.py b/Lib/test/test_remote_pdb.py index 280e2444ef7d34..ec11e41678849b 100644 --- a/Lib/test/test_remote_pdb.py +++ b/Lib/test/test_remote_pdb.py @@ -1539,6 +1539,9 @@ def do_integration_test(self, client_stdin): redirect_stdout(client_stdout), redirect_stderr(client_stderr), unittest.mock.patch("sys.argv", ["pdb", "-p", str(process.pid)]), + unittest.mock.patch( + "pdb.exit_with_permission_help_text", side_effect=PermissionError + ), ): try: pdb.main() diff --git a/Misc/NEWS.d/next/C_API/2025-09-12-13-05-20.gh-issue-129813.dJZpME.rst b/Misc/NEWS.d/next/C_API/2025-09-12-13-05-20.gh-issue-129813.dJZpME.rst new file mode 100644 index 00000000000000..e4abfb6f6ed410 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-09-12-13-05-20.gh-issue-129813.dJZpME.rst @@ -0,0 +1,16 @@ +Implement :pep:`782`, the :c:type:`PyBytesWriter` API. Add functions: + +* :c:func:`PyBytesWriter_Create` +* :c:func:`PyBytesWriter_Discard` +* :c:func:`PyBytesWriter_FinishWithPointer` +* :c:func:`PyBytesWriter_FinishWithSize` +* :c:func:`PyBytesWriter_Finish` +* :c:func:`PyBytesWriter_Format` +* :c:func:`PyBytesWriter_GetData` +* :c:func:`PyBytesWriter_GetSize` +* :c:func:`PyBytesWriter_GrowAndUpdatePointer` +* :c:func:`PyBytesWriter_Grow` +* :c:func:`PyBytesWriter_Resize` +* :c:func:`PyBytesWriter_WriteBytes` + +Patch by Victor Stinner. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 442804353756c7..628e6dc11668e0 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -1101,20 +1101,19 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) /*[clinic end generated code: output=dcb09335dd2fe908 input=224d047da2c12a42]*/ { EVP_MD_CTX *temp_ctx; - PyObject *retval; if (length == 0) { return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } - retval = PyBytes_FromStringAndSize(NULL, length); - if (retval == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; } temp_ctx = py_wrapper_EVP_MD_CTX_new(); if (temp_ctx == NULL) { - Py_DECREF(retval); + PyBytesWriter_Discard(writer); return NULL; } @@ -1122,7 +1121,7 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) goto error; } if (!EVP_DigestFinalXOF(temp_ctx, - (unsigned char*)PyBytes_AS_STRING(retval), + (unsigned char*)PyBytesWriter_GetData(writer), length)) { notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_DigestFinalXOF)); @@ -1130,10 +1129,10 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) } EVP_MD_CTX_free(temp_ctx); - return retval; + return PyBytesWriter_Finish(writer); error: - Py_DECREF(retval); + PyBytesWriter_Discard(writer); EVP_MD_CTX_free(temp_ctx); return NULL; } @@ -1750,7 +1749,6 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, long maxmem, long dklen) /*[clinic end generated code: output=d424bc3e8c6b9654 input=bdeac9628d07f7a1]*/ { - PyObject *key = NULL; int retval; if (password->len > INT_MAX) { @@ -1791,8 +1789,8 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, return NULL; } - key = PyBytes_FromStringAndSize(NULL, dklen); - if (key == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(dklen); + if (writer == NULL) { return NULL; } @@ -1801,16 +1799,16 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, (const char *)password->buf, (size_t)password->len, (const unsigned char *)salt->buf, (size_t)salt->len, (uint64_t)n, (uint64_t)r, (uint64_t)p, (uint64_t)maxmem, - (unsigned char *)PyBytes_AS_STRING(key), (size_t)dklen + PyBytesWriter_GetData(writer), (size_t)dklen ); Py_END_ALLOW_THREADS if (!retval) { - Py_DECREF(key); + PyBytesWriter_Discard(writer); notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_PBE_scrypt)); return NULL; } - return key; + return PyBytesWriter_Finish(writer); } #undef HASHLIB_SCRYPT_MAX_DKLEN diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 33903de14ba68d..388e65456c3a8b 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -1,6 +1,11 @@ +// Use pycore_bytes.h +#define PYTESTCAPI_NEED_INTERNAL_API + #include "parts.h" #include "util.h" +#include "pycore_bytesobject.h" // _PyBytesWriter_CreateByteArray() + /* Test _PyBytes_Resize() */ static PyObject * @@ -51,9 +56,308 @@ bytes_join(PyObject *Py_UNUSED(module), PyObject *args) } +// --- PyBytesWriter type --------------------------------------------------- + +typedef struct { + PyObject_HEAD + PyBytesWriter *writer; +} WriterObject; + + +static PyObject * +writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)type->tp_alloc(type, 0); + if (!self) { + return NULL; + } + self->writer = NULL; + return (PyObject*)self; +} + + +static int +writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)self_raw; + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + + if (kwargs && PyDict_GET_SIZE(kwargs)) { + PyErr_Format(PyExc_TypeError, + "PyBytesWriter() takes exactly no keyword arguments"); + return -1; + } + + Py_ssize_t alloc; + char *str; + Py_ssize_t str_size; + int use_bytearray; + if (!PyArg_ParseTuple(args, "ny#i", + &alloc, &str, &str_size, &use_bytearray)) { + return -1; + } + + if (use_bytearray) { + self->writer = _PyBytesWriter_CreateByteArray(alloc); + } + else { + self->writer = PyBytesWriter_Create(alloc); + } + if (self->writer == NULL) { + return -1; + } + + if (str_size) { + char *buf = PyBytesWriter_GetData(self->writer); + memcpy(buf, str, str_size); + } + + return 0; +} + + +static void +writer_dealloc(PyObject *self_raw) +{ + WriterObject *self = (WriterObject *)self_raw; + PyTypeObject *tp = Py_TYPE(self); + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + tp->tp_free(self); + Py_DECREF(tp); +} + + +static inline int +writer_check(WriterObject *self) +{ + if (self->writer == NULL) { + PyErr_SetString(PyExc_ValueError, "operation on finished writer"); + return -1; + } + return 0; +} + + +static PyObject* +writer_write_bytes(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *bytes; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &bytes, &size)) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(self->writer, bytes, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_format_i(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *format; + int value; + if (!PyArg_ParseTuple(args, "yi", &format, &value)) { + return NULL; + } + + if (PyBytesWriter_Format(self->writer, format, value) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_resize(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t size; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, + "ny#", + &size, &str, &str_size)) { + return NULL; + } + assert(size >= str_size); + + Py_ssize_t pos = PyBytesWriter_GetSize(self->writer); + if (PyBytesWriter_Resize(self->writer, size) < 0) { + return NULL; + } + + char *buf = PyBytesWriter_GetData(self->writer); + memcpy(buf + pos, str, str_size); + + Py_RETURN_NONE; +} + + +static PyObject* +writer_get_size(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t alloc = PyBytesWriter_GetSize(self->writer); + return PyLong_FromSsize_t(alloc); +} + + +static PyObject* +writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + PyObject *str = PyBytesWriter_Finish(self->writer); + self->writer = NULL; + return str; +} + + +static PyObject* +writer_finish_with_size(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "n", &size)) { + return NULL; + } + + PyObject *str = PyBytesWriter_FinishWithSize(self->writer, size); + self->writer = NULL; + return str; +} + + +static PyMethodDef writer_methods[] = { + {"write_bytes", _PyCFunction_CAST(writer_write_bytes), METH_VARARGS}, + {"format_i", _PyCFunction_CAST(writer_format_i), METH_VARARGS}, + {"resize", _PyCFunction_CAST(writer_resize), METH_VARARGS}, + {"get_size", _PyCFunction_CAST(writer_get_size), METH_NOARGS}, + {"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS}, + {"finish_with_size", _PyCFunction_CAST(writer_finish_with_size), METH_VARARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyType_Slot Writer_Type_slots[] = { + {Py_tp_new, writer_new}, + {Py_tp_init, writer_init}, + {Py_tp_dealloc, writer_dealloc}, + {Py_tp_methods, writer_methods}, + {0, 0}, /* sentinel */ +}; + +static PyType_Spec Writer_spec = { + .name = "_testcapi.PyBytesWriter", + .basicsize = sizeof(WriterObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = Writer_Type_slots, +}; + + +static PyObject * +byteswriter_abc(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(3); + if (writer == NULL) { + return NULL; + } + + char *str = PyBytesWriter_GetData(writer); + memcpy(str, "abc", 3); + + return PyBytesWriter_Finish(writer); +} + + +static PyObject * +byteswriter_resize(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + // Allocate 10 bytes + PyBytesWriter *writer = PyBytesWriter_Create(10); + if (writer == NULL) { + return NULL; + } + char *buf = PyBytesWriter_GetData(writer); + + // Write some bytes + memcpy(buf, "Hello ", strlen("Hello ")); + buf += strlen("Hello "); + + // Allocate 10 more bytes + buf = PyBytesWriter_GrowAndUpdatePointer(writer, 10, buf); + if (buf == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + + // Write more bytes + memcpy(buf, "World", strlen("World")); + buf += strlen("World"); + + // Truncate to the exact size and create a bytes object + return PyBytesWriter_FinishWithPointer(writer, buf); +} + + +static PyObject * +byteswriter_highlevel(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(0); + if (writer == NULL) { + goto error; + } + if (PyBytesWriter_WriteBytes(writer, "Hello", -1) < 0) { + goto error; + } + if (PyBytesWriter_Format(writer, " %s!", "World") < 0) { + goto error; + } + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, {"bytes_join", bytes_join, METH_VARARGS}, + {"byteswriter_abc", byteswriter_abc, METH_NOARGS}, + {"byteswriter_resize", byteswriter_resize, METH_NOARGS}, + {"byteswriter_highlevel", byteswriter_highlevel, METH_NOARGS}, {NULL}, }; @@ -64,5 +368,15 @@ _PyTestCapi_Init_Bytes(PyObject *m) return -1; } + PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec); + if (writer_type == NULL) { + return -1; + } + if (PyModule_AddType(m, writer_type) < 0) { + Py_DECREF(writer_type); + return -1; + } + Py_DECREF(writer_type); + return 0; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index db82f7eb684f30..fc9e1bef80f037 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -7,6 +7,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE() #include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_DigitValue @@ -195,10 +196,11 @@ PyBytes_FromString(const char *str) return (PyObject *) op; } -PyObject * -PyBytes_FromFormatV(const char *format, va_list vargs) + +static char* +bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos, + const char *format, va_list vargs) { - char *s; const char *f; const char *p; Py_ssize_t prec; @@ -212,21 +214,20 @@ PyBytes_FromFormatV(const char *format, va_list vargs) Longest 64-bit pointer representation: "0xffffffffffffffff\0" (19 bytes). */ char buffer[21]; - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); + char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos; - s = _PyBytesWriter_Alloc(&writer, strlen(format)); - if (s == NULL) - return NULL; - writer.overallocate = 1; - -#define WRITE_BYTES(str) \ +#define WRITE_BYTES_LEN(str, len_expr) \ do { \ - s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ - if (s == NULL) \ + size_t len = (len_expr); \ + s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \ + if (s == NULL) { \ goto error; \ + } \ + memcpy(s, (str), len); \ + s += len; \ } while (0) +#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str)) for (f = format; *f; f++) { if (*f != '%') { @@ -267,10 +268,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) ++f; } - /* subtract bytes preallocated for the format string - (ex: 2 for "%s") */ - writer.min_size -= (f - p + 1); - switch (*f) { case 'c': { @@ -281,7 +278,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) "expects an integer in range [0; 255]"); goto error; } - writer.min_size++; *s++ = (unsigned char)c; break; } @@ -340,9 +336,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs) i++; } } - s = _PyBytesWriter_WriteBytes(&writer, s, p, i); - if (s == NULL) - goto error; + WRITE_BYTES_LEN(p, i); break; } @@ -361,31 +355,45 @@ PyBytes_FromFormatV(const char *format, va_list vargs) break; case '%': - writer.min_size++; *s++ = '%'; break; default: - if (*f == 0) { - /* fix min_size if we reached the end of the format string */ - writer.min_size++; - } - /* invalid format string: copy unformatted string and exit */ WRITE_BYTES(p); - return _PyBytesWriter_Finish(&writer, s); + return s; } } #undef WRITE_BYTES +#undef WRITE_BYTES_LEN - return _PyBytesWriter_Finish(&writer, s); + return s; error: - _PyBytesWriter_Dealloc(&writer); return NULL; } + +PyObject * +PyBytes_FromFormatV(const char *format, va_list vargs) +{ + Py_ssize_t alloc = strlen(format); + PyBytesWriter *writer = PyBytesWriter_Create(alloc); + if (writer == NULL) { + return NULL; + } + + char *s = bytes_fromformat(writer, 0, format, vargs); + if (s == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + + return PyBytesWriter_FinishWithPointer(writer, s); +} + + PyObject * PyBytes_FromFormat(const char *format, ...) { @@ -398,6 +406,7 @@ PyBytes_FromFormat(const char *format, ...) return ret; } + /* Helpers for formatstring */ Py_LOCAL_INLINE(PyObject *) @@ -3747,3 +3756,321 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, } } + +// --- PyBytesWriter API ----------------------------------------------------- + +struct PyBytesWriter { + char small_buffer[256]; + PyObject *obj; + Py_ssize_t size; + int use_bytearray; +}; + + +static inline char* +byteswriter_data(PyBytesWriter *writer) +{ + if (writer->obj == NULL) { + return writer->small_buffer; + } + else if (writer->use_bytearray) { + return PyByteArray_AS_STRING(writer->obj); + } + else { + return PyBytes_AS_STRING(writer->obj); + } +} + + +static inline Py_ssize_t +byteswriter_allocated(PyBytesWriter *writer) +{ + if (writer->obj == NULL) { + return sizeof(writer->small_buffer); + } + else if (writer->use_bytearray) { + return PyByteArray_GET_SIZE(writer->obj); + } + else { + return PyBytes_GET_SIZE(writer->obj); + } +} + + +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif + + +static inline int +byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) +{ + assert(size >= 0); + + if (size <= byteswriter_allocated(writer)) { + return 0; + } + + if (overallocate && !writer->use_bytearray) { + if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) { + size += size / OVERALLOCATE_FACTOR; + } + } + + if (writer->obj != NULL) { + if (writer->use_bytearray) { + if (PyByteArray_Resize(writer->obj, size)) { + return -1; + } + } + else { + if (_PyBytes_Resize(&writer->obj, size)) { + return -1; + } + } + assert(writer->obj != NULL); + } + else if (writer->use_bytearray) { + writer->obj = PyByteArray_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return -1; + } + assert((size_t)size > sizeof(writer->small_buffer)); + memcpy(PyByteArray_AS_STRING(writer->obj), + writer->small_buffer, + sizeof(writer->small_buffer)); + } + else { + writer->obj = PyBytes_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return -1; + } + assert((size_t)size > sizeof(writer->small_buffer)); + memcpy(PyBytes_AS_STRING(writer->obj), + writer->small_buffer, + sizeof(writer->small_buffer)); + } + return 0; +} + + +static PyBytesWriter* +byteswriter_create(Py_ssize_t size, int use_bytearray) +{ + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be >= 0"); + return NULL; + } + + PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers); + if (writer == NULL) { + writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter)); + if (writer == NULL) { + PyErr_NoMemory(); + return NULL; + } + } + writer->obj = NULL; + writer->size = 0; + writer->use_bytearray = use_bytearray; + + if (size >= 1) { + if (byteswriter_resize(writer, size, 0) < 0) { + PyBytesWriter_Discard(writer); + return NULL; + } + writer->size = size; + } + return writer; +} + +PyBytesWriter* +PyBytesWriter_Create(Py_ssize_t size) +{ + return byteswriter_create(size, 0); +} + +PyBytesWriter* +_PyBytesWriter_CreateByteArray(Py_ssize_t size) +{ + return byteswriter_create(size, 1); +} + + +void +PyBytesWriter_Discard(PyBytesWriter *writer) +{ + if (writer == NULL) { + return; + } + + Py_XDECREF(writer->obj); + _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free); +} + + +PyObject* +PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size) +{ + PyObject *result; + if (size == 0) { + result = bytes_get_empty(); + } + else if (writer->obj != NULL) { + if (writer->use_bytearray) { + if (size != PyByteArray_GET_SIZE(writer->obj)) { + if (PyByteArray_Resize(writer->obj, size)) { + goto error; + } + } + } + else { + if (size != PyBytes_GET_SIZE(writer->obj)) { + if (_PyBytes_Resize(&writer->obj, size)) { + goto error; + } + } + } + result = writer->obj; + writer->obj = NULL; + } + else if (writer->use_bytearray) { + result = PyByteArray_FromStringAndSize(writer->small_buffer, size); + } + else { + result = PyBytes_FromStringAndSize(writer->small_buffer, size); + } + PyBytesWriter_Discard(writer); + return result; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + +PyObject* +PyBytesWriter_Finish(PyBytesWriter *writer) +{ + return PyBytesWriter_FinishWithSize(writer, writer->size); +} + + +PyObject* +PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf) +{ + Py_ssize_t size = (char*)buf - byteswriter_data(writer); + if (size < 0 || size > byteswriter_allocated(writer)) { + PyBytesWriter_Discard(writer); + PyErr_SetString(PyExc_ValueError, "invalid end pointer"); + return NULL; + } + + return PyBytesWriter_FinishWithSize(writer, size); +} + + +void* +PyBytesWriter_GetData(PyBytesWriter *writer) +{ + return byteswriter_data(writer); +} + + +Py_ssize_t +PyBytesWriter_GetSize(PyBytesWriter *writer) +{ + return writer->size; +} + + +int +PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be >= 0"); + return -1; + } + if (byteswriter_resize(writer, size, 1) < 0) { + return -1; + } + writer->size = size; + return 0; +} + + +int +PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size) +{ + if (size < 0 && writer->size + size < 0) { + PyErr_SetString(PyExc_ValueError, "invalid size"); + return -1; + } + if (size > PY_SSIZE_T_MAX - writer->size) { + PyErr_NoMemory(); + return -1; + } + size = writer->size + size; + + if (byteswriter_resize(writer, size, 1) < 0) { + return -1; + } + writer->size = size; + return 0; +} + + +void* +PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *buf) +{ + Py_ssize_t pos = (char*)buf - byteswriter_data(writer); + if (PyBytesWriter_Grow(writer, size) < 0) { + return NULL; + } + return byteswriter_data(writer) + pos; +} + + +int +PyBytesWriter_WriteBytes(PyBytesWriter *writer, + const void *bytes, Py_ssize_t size) +{ + if (size < 0) { + size_t len = strlen(bytes); + if (len > (size_t)PY_SSIZE_T_MAX) { + PyErr_NoMemory(); + return -1; + } + size = (Py_ssize_t)len; + } + + Py_ssize_t pos = writer->size; + if (PyBytesWriter_Grow(writer, size) < 0) { + return -1; + } + char *buf = byteswriter_data(writer); + memcpy(buf + pos, bytes, size); + return 0; +} + + +int +PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) +{ + Py_ssize_t pos = writer->size; + if (PyBytesWriter_Grow(writer, strlen(format)) < 0) { + return -1; + } + + va_list vargs; + va_start(vargs, format); + char *buf = bytes_fromformat(writer, pos, format, vargs); + va_end(vargs); + + Py_ssize_t size = buf - byteswriter_data(writer); + return PyBytesWriter_Resize(writer, size); +} diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 55ba6ae372be41..0d264a6e346f95 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1006,7 +1006,7 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) ******************/ int -PyCode_Addr2Line(PyCodeObject *co, int addrq) +_PyCode_Addr2LineNoTstate(PyCodeObject *co, int addrq) { if (addrq < 0) { return co->co_firstlineno; @@ -1020,6 +1020,16 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) return _PyCode_CheckLineNumber(addrq, &bounds); } +int +PyCode_Addr2Line(PyCodeObject *co, int addrq) +{ + int lineno; + Py_BEGIN_CRITICAL_SECTION(co); + lineno = _PyCode_Addr2LineNoTstate(co, addrq); + Py_END_CRITICAL_SECTION(); + return lineno; +} + void _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) { diff --git a/Objects/object.c b/Objects/object.c index bd3ba02f8eb255..aaa3c0b338434e 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -945,6 +945,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); + clear_freelist(&freelists->bytes_writers, is_finalization, PyMem_Free); clear_freelist(&freelists->ints, is_finalization, free_object); clear_freelist(&freelists->pycfunctionobject, is_finalization, PyObject_GC_Del); clear_freelist(&freelists->pycmethodobject, is_finalization, PyObject_GC_Del); diff --git a/Python/traceback.c b/Python/traceback.c index da7956d1ec47b4..46106e52dbaf82 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -993,8 +993,8 @@ dump_frame(int fd, _PyInterpreterFrame *frame) } else { PUTS(fd, "???"); } - - int lineno = PyUnstable_InterpreterFrame_GetLine(frame); + int lasti = PyUnstable_InterpreterFrame_GetLasti(frame); + int lineno = _PyCode_Addr2LineNoTstate(code, lasti); PUTS(fd, ", line "); if (lineno >= 0) { _Py_DumpDecimal(fd, (size_t)lineno);