From 328da67b2c71a52b0cdf61c4e52b697e7e921c22 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 4 Apr 2026 19:56:40 +0800 Subject: [PATCH 01/13] gh-146073: Revert "gh-146073: Add fitness/exit quality mechanism for JIT trace frontend (GH-147966)" (#148082) This reverts commit 198b04b75f7425c401ffe40a748688a89d28dd59. --- Include/cpython/pystats.h | 1 - Include/internal/pycore_interp_structs.h | 4 - Include/internal/pycore_optimizer.h | 20 +--- Python/optimizer.c | 130 ++--------------------- Python/pystate.c | 16 --- Python/pystats.c | 1 - 6 files changed, 7 insertions(+), 165 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 5d1f44988a6df1..e473110eca7415 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -144,7 +144,6 @@ typedef struct _optimization_stats { uint64_t unknown_callee; uint64_t trace_immediately_deopts; uint64_t executors_invalidated; - uint64_t fitness_terminated_traces; UOpStats opcode[PYSTATS_MAX_UOP_ID + 1]; uint64_t unsupported_opcode[256]; uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 0cebe1b4b9e995..f76d4f41c55119 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -449,10 +449,6 @@ typedef struct _PyOptimizationConfig { uint16_t side_exit_initial_value; uint16_t side_exit_initial_backoff; - // Trace fitness thresholds - uint16_t fitness_initial; - uint16_t fitness_initial_side; - // Optimization flags bool specialization_enabled; bool uops_optimize_enabled; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 820ee32201c1f8..2986afb142b5d1 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -15,23 +15,6 @@ extern "C" { #include "pycore_optimizer_types.h" #include -/* Default fitness configuration values for trace quality control. - * FITNESS_INITIAL and FITNESS_INITIAL_SIDE can be overridden via - * PYTHON_JIT_FITNESS_INITIAL and PYTHON_JIT_FITNESS_INITIAL_SIDE */ -#define FITNESS_PER_INSTRUCTION 2 -#define FITNESS_BRANCH_BASE 5 -#define FITNESS_INITIAL (FITNESS_PER_INSTRUCTION * 1000) -#define FITNESS_INITIAL_SIDE (FITNESS_INITIAL / 2) -#define FITNESS_BACKWARD_EDGE (FITNESS_INITIAL / 10) - -/* Exit quality constants for fitness-based trace termination. - * Higher values mean better places to stop the trace. */ - -#define EXIT_QUALITY_DEFAULT 200 -#define EXIT_QUALITY_CLOSE_LOOP (4 * EXIT_QUALITY_DEFAULT) -#define EXIT_QUALITY_ENTER_EXECUTOR (2 * EXIT_QUALITY_DEFAULT + 100) -#define EXIT_QUALITY_SPECIALIZABLE (EXIT_QUALITY_DEFAULT / 4) - typedef struct _PyJitUopBuffer { _PyUOpInstruction *start; @@ -118,8 +101,7 @@ typedef struct _PyJitTracerPreviousState { } _PyJitTracerPreviousState; typedef struct _PyJitTracerTranslatorState { - int32_t fitness; // Current trace fitness, starts high, decrements - int frame_depth; // Current inline depth (0 = root frame) + int jump_backward_seen; } _PyJitTracerTranslatorState; typedef struct _PyJitTracerState { diff --git a/Python/optimizer.c b/Python/optimizer.c index c7a6b7e746545c..f09bf778587b12 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -549,6 +549,8 @@ dynamic_exit_uop[MAX_UOP_ID + 1] = { }; +#define CONFIDENCE_RANGE 1000 +#define CONFIDENCE_CUTOFF 333 #ifdef Py_DEBUG #define DPRINTF(level, ...) \ @@ -596,46 +598,6 @@ add_to_trace( ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) -/* Compute branch fitness penalty based on how likely the traced path is. - * The penalty is small when the traced path is common, large when rare. - * A branch that historically goes the other way gets a heavy penalty. */ -static inline int -compute_branch_penalty(uint16_t history, bool branch_taken) -{ - int taken_count = _Py_popcount32((uint32_t)history); - int on_trace_count = branch_taken ? taken_count : 16 - taken_count; - int off_trace = 16 - on_trace_count; - /* Linear scaling: off_trace ranges from 0 (fully biased our way) - * to 16 (fully biased against us), so the penalty ranges from - * FITNESS_BRANCH_BASE to FITNESS_BRANCH_BASE + 32. */ - return FITNESS_BRANCH_BASE + off_trace * 2; -} - -/* Compute exit quality for the current trace position. - * Higher values mean better places to stop the trace. */ -static inline int32_t -compute_exit_quality(_Py_CODEUNIT *target_instr, int opcode, - const _PyJitTracerState *tracer) -{ - if (target_instr == tracer->initial_state.start_instr || - target_instr == tracer->initial_state.close_loop_instr) { - return EXIT_QUALITY_CLOSE_LOOP; - } - if (target_instr->op.code == ENTER_EXECUTOR) { - return EXIT_QUALITY_ENTER_EXECUTOR; - } - if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]] > 0) { - return EXIT_QUALITY_SPECIALIZABLE; - } - return EXIT_QUALITY_DEFAULT; -} - -static inline int32_t -compute_frame_penalty(const _PyOptimizationConfig *cfg) -{ - return (int32_t)cfg->fitness_initial / 10 + 1; -} - static int is_terminator(const _PyUOpInstruction *uop) { @@ -675,7 +637,6 @@ _PyJit_translate_single_bytecode_to_trace( _Py_CODEUNIT *this_instr = tracer->prev_state.instr; _Py_CODEUNIT *target_instr = this_instr; uint32_t target = 0; - int end_trace_opcode = _DEOPT; target = Py_IsNone((PyObject *)old_code) ? (uint32_t)(target_instr - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR) @@ -773,14 +734,16 @@ _PyJit_translate_single_bytecode_to_trace( DPRINTF(2, "Unsupported: oparg too large\n"); unsupported: { + // Rewind to previous instruction and replace with _EXIT_TRACE. _PyUOpInstruction *curr = uop_buffer_last(trace); while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) { trace->next--; curr = uop_buffer_last(trace); } + assert(curr->opcode == _SET_IP || uop_buffer_length(trace) == 2); if (curr->opcode == _SET_IP) { int32_t old_target = (int32_t)uop_get_target(curr); - curr->opcode = end_trace_opcode; + curr->opcode = _DEOPT; curr->format = UOP_FORMAT_TARGET; curr->target = old_target; } @@ -800,23 +763,6 @@ _PyJit_translate_single_bytecode_to_trace( return 1; } - // Fitness-based trace quality check (before reserving space for this instruction) - _PyJitTracerTranslatorState *ts = &tracer->translator_state; - int32_t eq = compute_exit_quality(target_instr, opcode, tracer); - DPRINTF(3, "Fitness check: %s(%d) fitness=%d, exit_quality=%d, depth=%d\n", - _PyOpcode_OpName[opcode], oparg, ts->fitness, eq, ts->frame_depth); - - // Check if fitness is depleted — should we stop the trace? - if (ts->fitness < eq) { - // This is a tracer heuristic rather than normal program control flow, - // so leave operand1 clear and let the resulting side exit increase chain_depth. - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); - OPT_STAT_INC(fitness_terminated_traces); - DPRINTF(2, "Fitness terminated: %s(%d) fitness=%d < exit_quality=%d\n", - _PyOpcode_OpName[opcode], oparg, ts->fitness, eq); - goto done; - } - // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT trace->end -= 2; @@ -870,12 +816,6 @@ _PyJit_translate_single_bytecode_to_trace( assert(jump_happened ? (next_instr == computed_jump_instr) : (next_instr == computed_next_instr)); uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_happened]; ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ? computed_next_instr : computed_jump_instr, old_code)); - int bp = compute_branch_penalty(target_instr[1].cache, jump_happened); - tracer->translator_state.fitness -= bp; - DPRINTF(3, " branch penalty: -%d (history=0x%04x, taken=%d) -> fitness=%d\n", - bp, target_instr[1].cache, jump_happened, - tracer->translator_state.fitness); - break; } case JUMP_BACKWARD_JIT: @@ -883,9 +823,6 @@ _PyJit_translate_single_bytecode_to_trace( case JUMP_BACKWARD_NO_JIT: case JUMP_BACKWARD: ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); - tracer->translator_state.fitness -= FITNESS_BACKWARD_EDGE; - DPRINTF(3, " backward edge penalty: -%d -> fitness=%d\n", - FITNESS_BACKWARD_EDGE, tracer->translator_state.fitness); _Py_FALLTHROUGH; case JUMP_BACKWARD_NO_INTERRUPT: { @@ -1008,44 +945,6 @@ _PyJit_translate_single_bytecode_to_trace( assert(next->op.code == STORE_FAST); operand = next->op.arg; } - else if (uop == _PUSH_FRAME) { - _PyJitTracerTranslatorState *ts_depth = &tracer->translator_state; - ts_depth->frame_depth++; - if (ts_depth->frame_depth >= MAX_ABSTRACT_FRAME_DEPTH) { - // The optimizer can't handle frames this deep, - // so there's no point continuing the trace. - DPRINTF(2, "Unsupported: frame depth %d >= MAX_ABSTRACT_FRAME_DEPTH\n", - ts_depth->frame_depth); - end_trace_opcode = _EXIT_TRACE; - goto unsupported; - } - int32_t frame_penalty = compute_frame_penalty(&tstate->interp->opt_config); - int32_t cost = frame_penalty * ts_depth->frame_depth; - ts_depth->fitness -= cost; - DPRINTF(3, " _PUSH_FRAME: depth=%d, penalty=-%d (per_frame=%d) -> fitness=%d\n", - ts_depth->frame_depth, cost, frame_penalty, - ts_depth->fitness); - } - else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { - _PyJitTracerTranslatorState *ts_depth = &tracer->translator_state; - int32_t frame_penalty = compute_frame_penalty(&tstate->interp->opt_config); - if (ts_depth->frame_depth <= 0) { - // Underflow: returning from a frame we didn't enter - ts_depth->fitness -= frame_penalty * 2; - DPRINTF(3, " %s: underflow penalty=-%d -> fitness=%d\n", - _PyOpcode_uop_name[uop], frame_penalty * 2, - ts_depth->fitness); - } - else { - // Reward returning: small inlined calls should be encouraged - ts_depth->fitness += frame_penalty; - DPRINTF(3, " %s: return reward=+%d, depth=%d -> fitness=%d\n", - _PyOpcode_uop_name[uop], frame_penalty, - ts_depth->frame_depth - 1, - ts_depth->fitness); - } - ts_depth->frame_depth = ts_depth->frame_depth <= 0 ? 0 : ts_depth->frame_depth - 1; - } else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) { PyObject *recorded_value = tracer->prev_state.recorded_value; tracer->prev_state.recorded_value = NULL; @@ -1087,13 +986,7 @@ _PyJit_translate_single_bytecode_to_trace( ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); goto done; } - // Update fitness AFTER translation, BEFORE returning to continue tracing. - // This ensures the next iteration's fitness check reflects the cost of - // all instructions translated so far. - tracer->translator_state.fitness -= FITNESS_PER_INSTRUCTION; - DPRINTF(3, " per-insn cost: -%d -> fitness=%d\n", - FITNESS_PER_INSTRUCTION, tracer->translator_state.fitness); - DPRINTF(2, "Trace continuing (fitness=%d)\n", tracer->translator_state.fitness); + DPRINTF(2, "Trace continuing\n"); return 1; done: DPRINTF(2, "Trace done\n"); @@ -1176,17 +1069,6 @@ _PyJit_TryInitializeTracing( assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code == RESUME_CHECK_JIT || (exit != NULL)); tracer->initial_state.jump_backward_instr = curr_instr; - // Initialize fitness tracking state - const _PyOptimizationConfig *cfg = &tstate->interp->opt_config; - _PyJitTracerTranslatorState *ts = &tracer->translator_state; - bool is_side_trace = (exit != NULL); - ts->fitness = is_side_trace - ? (int32_t)cfg->fitness_initial_side - : (int32_t)cfg->fitness_initial; - ts->frame_depth = 0; - DPRINTF(3, "Fitness init: %s trace, fitness=%d\n", - is_side_trace ? "side" : "root", ts->fitness); - tracer->is_tracing = true; return 1; } diff --git a/Python/pystate.c b/Python/pystate.c index 78eab7cc7d2459..143175da0f45c7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -635,22 +635,6 @@ init_interpreter(PyInterpreterState *interp, "PYTHON_JIT_SIDE_EXIT_INITIAL_BACKOFF", SIDE_EXIT_INITIAL_BACKOFF, 0, MAX_BACKOFF); - // Trace fitness configuration - init_policy(&interp->opt_config.fitness_initial, - "PYTHON_JIT_FITNESS_INITIAL", - FITNESS_INITIAL, 100, 10000); - init_policy(&interp->opt_config.fitness_initial_side, - "PYTHON_JIT_FITNESS_INITIAL_SIDE", - FITNESS_INITIAL_SIDE, 50, 5000); - /* The tracer starts at start_instr, so initial fitness must not be below - * the close-loop exit quality or tracing will terminate immediately. */ - if (interp->opt_config.fitness_initial < EXIT_QUALITY_CLOSE_LOOP) { - interp->opt_config.fitness_initial = EXIT_QUALITY_CLOSE_LOOP; - } - if (interp->opt_config.fitness_initial_side < EXIT_QUALITY_CLOSE_LOOP) { - interp->opt_config.fitness_initial_side = EXIT_QUALITY_CLOSE_LOOP; - } - interp->opt_config.specialization_enabled = !is_env_enabled("PYTHON_SPECIALIZATION_OFF"); interp->opt_config.uops_optimize_enabled = !is_env_disabled("PYTHON_UOPS_OPTIMIZE"); if (interp != &runtime->_main_interpreter) { diff --git a/Python/pystats.c b/Python/pystats.c index 2fac2db1b738c7..a057ad884566d8 100644 --- a/Python/pystats.c +++ b/Python/pystats.c @@ -274,7 +274,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence); fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee); fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated); - fprintf(out, "Optimization fitness terminated: %" PRIu64 "\n", stats->fitness_terminated_traces); print_histogram(out, "Trace length", stats->trace_length_hist); print_histogram(out, "Trace run length", stats->trace_run_length_hist); From fe9befc1ca7eac36749ec358969464334381b9f9 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sat, 4 Apr 2026 13:29:17 +0100 Subject: [PATCH 02/13] gh-145883: Fix two heap-buffer-overflows in `_zoneinfo` (#145885) --- Lib/test/test_zoneinfo/test_zoneinfo.py | 32 +++++++++++++++++++ Lib/zoneinfo/_common.py | 4 +++ Lib/zoneinfo/_zoneinfo.py | 2 +- ...-03-12-21-01-48.gh-issue-145883.lUvXcc.rst | 2 ++ Modules/_zoneinfo.c | 4 +-- Tools/build/compute-changes.py | 3 ++ 6 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-03-12-21-01-48.gh-issue-145883.lUvXcc.rst diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index aaab4709464fd0..7502b120825fbc 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -741,6 +741,38 @@ def test_empty_zone(self): with self.assertRaises(ValueError): self.klass.from_file(zf) + def test_invalid_transition_index(self): + STD = ZoneOffset("STD", ZERO) + DST = ZoneOffset("DST", ONE_H, ONE_H) + + zf = self.construct_zone([ + ZoneTransition(datetime(2026, 3, 1, 2), STD, DST), + ZoneTransition(datetime(2026, 11, 1, 2), DST, STD), + ], after="", version=1) + + data = bytearray(zf.read()) + timecnt = struct.unpack_from(">l", data, 32)[0] + idx_offset = 44 + timecnt * 4 + data[idx_offset + 1] = 2 # typecnt is 2, so index 2 is OOB + f = io.BytesIO(bytes(data)) + + with self.assertRaises(ValueError): + self.klass.from_file(f) + + def test_transition_lookahead_out_of_bounds(self): + STD = ZoneOffset("STD", ZERO) + DST = ZoneOffset("DST", ONE_H, ONE_H) + EXT = ZoneOffset("EXT", ONE_H) + + zf = self.construct_zone([ + ZoneTransition(datetime(2026, 3, 1), STD, DST), + ZoneTransition(datetime(2026, 6, 1), DST, EXT), + ZoneTransition(datetime(2026, 9, 1), EXT, DST), + ], after="") + + zi = self.klass.from_file(zf) + self.assertIsNotNone(zi) + def test_zone_very_large_timestamp(self): """Test when a transition is in the far past or future. diff --git a/Lib/zoneinfo/_common.py b/Lib/zoneinfo/_common.py index 59f3f0ce853f74..98668c15d8bf94 100644 --- a/Lib/zoneinfo/_common.py +++ b/Lib/zoneinfo/_common.py @@ -67,6 +67,10 @@ def load_data(fobj): f">{timecnt}{time_type}", fobj.read(timecnt * time_size) ) trans_idx = struct.unpack(f">{timecnt}B", fobj.read(timecnt)) + + if max(trans_idx) >= typecnt: + raise ValueError("Invalid transition index found while reading TZif: " + f"{max(trans_idx)}") else: trans_list_utc = () trans_idx = () diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py index bd3fefc6c9d959..7063eb6a9025ac 100644 --- a/Lib/zoneinfo/_zoneinfo.py +++ b/Lib/zoneinfo/_zoneinfo.py @@ -338,7 +338,7 @@ def _utcoff_to_dstoff(trans_idx, utcoffsets, isdsts): if not isdsts[comp_idx]: dstoff = utcoff - utcoffsets[comp_idx] - if not dstoff and idx < (typecnt - 1): + if not dstoff and idx < (typecnt - 1) and i + 1 < len(trans_idx): comp_idx = trans_idx[i + 1] # If the following transition is also DST and we couldn't diff --git a/Misc/NEWS.d/next/Library/2026-03-12-21-01-48.gh-issue-145883.lUvXcc.rst b/Misc/NEWS.d/next/Library/2026-03-12-21-01-48.gh-issue-145883.lUvXcc.rst new file mode 100644 index 00000000000000..2c17768c5189da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-12-21-01-48.gh-issue-145883.lUvXcc.rst @@ -0,0 +1,2 @@ +:mod:`zoneinfo`: Fix heap buffer overflow reads from malformed TZif data. +Found by OSS Fuzz, issues :oss-fuzz:`492245058` and :oss-fuzz:`492230068`. diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index aa0b1302cb2fc6..eaffd020ed97c0 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -1075,7 +1075,7 @@ load_data(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *file_obj) } trans_idx[i] = (size_t)cur_trans_idx; - if (trans_idx[i] > self->num_ttinfos) { + if (trans_idx[i] >= self->num_ttinfos) { PyErr_Format( PyExc_ValueError, "Invalid transition index found while reading TZif: %zd", @@ -2081,7 +2081,7 @@ utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, dstoff = utcoff - utcoffs[comp_idx]; } - if (!dstoff && idx < (num_ttinfos - 1)) { + if (!dstoff && idx < (num_ttinfos - 1) && i + 1 < num_transitions) { comp_idx = trans_idx[i + 1]; // If the following transition is also DST and we couldn't find diff --git a/Tools/build/compute-changes.py b/Tools/build/compute-changes.py index c15dc599f993f3..4870388da0d8a5 100644 --- a/Tools/build/compute-changes.py +++ b/Tools/build/compute-changes.py @@ -99,6 +99,9 @@ Path("Modules/pyexpat.c"), # zipfile Path("Lib/zipfile/"), + # zoneinfo + Path("Lib/zoneinfo/"), + Path("Modules/_zoneinfo.c"), }) From 289f19adb0abaaab0e914f52dceca831905a1967 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 4 Apr 2026 21:32:12 +0900 Subject: [PATCH 03/13] gh-148083: Constant-fold _CONTAINS_OP_SET for frozenset (gh-148084) --- Lib/test/test_capi/test_opt.py | 20 ++++++++- ...-04-04-20-59-12.gh-issue-148083.9ZHNBN.rst | 1 + Python/optimizer_analysis.c | 1 + Python/optimizer_bytecodes.c | 3 ++ Python/optimizer_cases.c.h | 44 +++++++++++++++++++ Python/optimizer_symbols.c | 3 +- 6 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-04-20-59-12.gh-issue-148083.9ZHNBN.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 022f05bbe37fa4..56f90194b480a1 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -23,6 +23,9 @@ # For frozendict JIT tests FROZEN_DICT_CONST = frozendict(x=1, y=2) +# For frozenset JIT tests +FROZEN_SET_CONST = frozenset({1, 2, 3}) + class _GenericKey: pass @@ -2169,7 +2172,8 @@ def f(n): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertNotIn("_GUARD_TOS_ANY_SET", uops) - self.assertIn("_CONTAINS_OP_SET", uops) + # _CONTAINS_OP_SET is constant-folded away for frozenset literals + self.assertIn("_INSERT_2_LOAD_CONST_INLINE_BORROW", uops) def test_remove_guard_for_known_type_tuple(self): def f(n): @@ -4399,6 +4403,20 @@ def testfunc(n): # lookup result is folded to constant 1, so comparison is optimized away self.assertNotIn("_COMPARE_OP_INT", uops) + def test_contains_op_frozenset_const_fold(self): + def testfunc(n): + x = 0 + for _ in range(n): + if 1 in FROZEN_SET_CONST: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_CONTAINS_OP_SET", uops) + def test_binary_subscr_list_slice(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-04-20-59-12.gh-issue-148083.9ZHNBN.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-04-20-59-12.gh-issue-148083.9ZHNBN.rst new file mode 100644 index 00000000000000..fea4659d0b9916 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-04-20-59-12.gh-issue-148083.9ZHNBN.rst @@ -0,0 +1 @@ +Constant-fold ``_CONTAINS_OP_SET`` for :class:`frozenset`. Patch by Donghee Na. diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4672a272fc9203..2953311b392600 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -30,6 +30,7 @@ #include "pycore_unicodeobject.h" #include "pycore_ceval.h" #include "pycore_floatobject.h" +#include "pycore_setobject.h" #include #include diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index dfb97625bf924f..6e9a34384ba531 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -706,6 +706,9 @@ dummy_func(void) { b = sym_new_type(ctx, &PyBool_Type); l = left; r = right; + if (sym_matches_type(right, &PyFrozenSet_Type)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, b); + } } op(_CONTAINS_OP_DICT, (left, right -- b, l, r)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4643a0ed0c5f9d..dc00b6bc1397f5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2993,6 +2993,50 @@ b = sym_new_type(ctx, &PyBool_Type); l = left; r = right; + if (sym_matches_type(right, &PyFrozenSet_Type)) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef b_stackref; + _PyStackRef l_stackref; + _PyStackRef r_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyAnySet_CheckExact(right_o)); + STAT_INC(CONTAINS_OP, hit); + int res = _PySet_Contains((PySetObject *)right_o, left_o); + if (res < 0) { + JUMP_TO_LABEL(error); + } + b_stackref = (res ^ oparg) ? PyStackRef_True : PyStackRef_False; + l_stackref = left; + r_stackref = right; + /* End of uop copied from bytecodes for constant evaluation */ + (void)l_stackref; + (void)r_stackref; + b = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(b_stackref)); + if (sym_is_const(ctx, b)) { + PyObject *result = sym_get_const(ctx, b); + if (_Py_IsImmortal(result)) { + // Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } + CHECK_STACK_BOUNDS(1); + stack_pointer[-2] = b; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + break; + } + } CHECK_STACK_BOUNDS(1); stack_pointer[-2] = b; stack_pointer[-1] = l; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index a0ee175fd10c1a..2614bcd430a2c5 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -283,7 +283,8 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) (typ == &PyFloat_Type) || (typ == &_PyNone_Type) || (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type); + (typ == &PyFrozenDict_Type) || + (typ == &PyFrozenSet_Type); } void From c398490fbf15ede5de3389b4ca4e32fb9a7c5d67 Mon Sep 17 00:00:00 2001 From: Wulian233 <1055917385@qq.com> Date: Sat, 4 Apr 2026 23:34:55 +0800 Subject: [PATCH 04/13] gh-148074: Fix `typeobject.c` missing error return (#148075) --- Objects/typeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b19aee6338dcc0..0ac5377d168812 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -9345,6 +9345,7 @@ type_ready_post_checks(PyTypeObject *type) PyErr_Format(PyExc_SystemError, "type %s has a tp_dictoffset that is too small", type->tp_name); + return -1; } } return 0; From 853dafe23a138459be544065251d0150df680a2c Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 5 Apr 2026 00:40:12 +0900 Subject: [PATCH 05/13] gh-148083: Prevent constant folding when lhs is container types (gh-148090) --- Include/internal/pycore_optimizer.h | 1 + Python/optimizer_analysis.c | 1 + Python/optimizer_bytecodes.c | 6 ++++-- Python/optimizer_cases.c.h | 6 ++++-- Python/optimizer_symbols.c | 14 ++++++++++++++ 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 2986afb142b5d1..cf01c620476ff7 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -394,6 +394,7 @@ extern JitOptRef _Py_uop_sym_new_type( extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); +bool _Py_uop_sym_is_not_container(JitOptRef sym); _PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); extern bool _Py_uop_sym_has_type(JitOptRef sym); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2953311b392600..92e1c081d524db 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -252,6 +252,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const #define sym_is_safe_const _Py_uop_sym_is_safe_const +#define sym_is_not_container _Py_uop_sym_is_not_container #define sym_get_const _Py_uop_sym_get_const #define sym_new_const_steal _Py_uop_sym_new_const_steal #define sym_get_const_as_stackref _Py_uop_sym_get_const_as_stackref diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 6e9a34384ba531..f2645553513f3d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -515,7 +515,8 @@ dummy_func(void) { res = sym_new_not_null(ctx); ds = dict_st; ss = sub_st; - if (sym_matches_type(dict_st, &PyFrozenDict_Type)) { + if (sym_is_not_container(sub_st) && + sym_matches_type(dict_st, &PyFrozenDict_Type)) { REPLACE_OPCODE_IF_EVALUATES_PURE(dict_st, sub_st, res); } } @@ -706,7 +707,8 @@ dummy_func(void) { b = sym_new_type(ctx, &PyBool_Type); l = left; r = right; - if (sym_matches_type(right, &PyFrozenSet_Type)) { + if (sym_is_not_container(left) && + sym_matches_type(right, &PyFrozenSet_Type)) { REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, b); } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index dc00b6bc1397f5..fb3ec39a42eabc 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1462,7 +1462,8 @@ res = sym_new_not_null(ctx); ds = dict_st; ss = sub_st; - if (sym_matches_type(dict_st, &PyFrozenDict_Type)) { + if (sym_is_not_container(sub_st) && + sym_matches_type(dict_st, &PyFrozenDict_Type)) { if ( sym_is_safe_const(ctx, dict_st) && sym_is_safe_const(ctx, sub_st) @@ -2993,7 +2994,8 @@ b = sym_new_type(ctx, &PyBool_Type); l = left; r = right; - if (sym_matches_type(right, &PyFrozenSet_Type)) { + if (sym_is_not_container(left) && + sym_matches_type(right, &PyFrozenSet_Type)) { if ( sym_is_safe_const(ctx, left) && sym_is_safe_const(ctx, right) diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 2614bcd430a2c5..6230b8948697e2 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -287,6 +287,20 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) (typ == &PyFrozenSet_Type); } +bool +_Py_uop_sym_is_not_container(JitOptRef sym) +{ + PyTypeObject *typ = _Py_uop_sym_get_type(sym); + if (typ == NULL) { + return false; + } + return (typ == &PyLong_Type) || + (typ == &PyFloat_Type) || + (typ == &PyUnicode_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type); +} + void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) { From fbdbea949f0d3e33c5566edd4da102d3db0674d5 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:13:17 +0300 Subject: [PATCH 06/13] Regex HOWTO: invalid string literals result in `SyntaxWarning` (#148092) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/howto/regex.rst | 56 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index 7486a378dbb06f..84ec535ca98e97 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -1,7 +1,7 @@ .. _regex-howto: **************************** - Regular Expression HOWTO + Regular expression HOWTO **************************** :Author: A.M. Kuchling @@ -47,7 +47,7 @@ Python code to do the processing; while Python code will be slower than an elaborate regular expression, it will also probably be more understandable. -Simple Patterns +Simple patterns =============== We'll start by learning about the simplest possible regular expressions. Since @@ -59,7 +59,7 @@ expressions (deterministic and non-deterministic finite automata), you can refer to almost any textbook on writing compilers. -Matching Characters +Matching characters ------------------- Most letters and characters will simply match themselves. For example, the @@ -159,7 +159,7 @@ match even a newline. ``.`` is often used where you want to match "any character". -Repeating Things +Repeating things ---------------- Being able to match varying sets of characters is the first thing regular @@ -210,7 +210,7 @@ this RE against the string ``'abcbd'``. | | | ``[bcd]*`` is only matching | | | | ``bc``. | +------+-----------+---------------------------------+ -| 6 | ``abcb`` | Try ``b`` again. This time | +| 7 | ``abcb`` | Try ``b`` again. This time | | | | the character at the | | | | current position is ``'b'``, so | | | | it succeeds. | @@ -255,7 +255,7 @@ is equivalent to ``+``, and ``{0,1}`` is the same as ``?``. It's better to use to read. -Using Regular Expressions +Using regular expressions ========================= Now that we've looked at some simple regular expressions, how do we actually use @@ -264,7 +264,7 @@ expression engine, allowing you to compile REs into objects and then perform matches with them. -Compiling Regular Expressions +Compiling regular expressions ----------------------------- Regular expressions are compiled into pattern objects, which have @@ -295,7 +295,7 @@ disadvantage which is the topic of the next section. .. _the-backslash-plague: -The Backslash Plague +The backslash plague -------------------- As stated earlier, regular expressions use the backslash character (``'\'``) to @@ -335,7 +335,7 @@ expressions will often be written in Python code using this raw string notation. In addition, special escape sequences that are valid in regular expressions, but not valid as Python string literals, now result in a -:exc:`DeprecationWarning` and will eventually become a :exc:`SyntaxError`, +:exc:`SyntaxWarning` and will eventually become a :exc:`SyntaxError`, which means the sequences will be invalid if raw string notation or escaping the backslashes isn't used. @@ -351,7 +351,7 @@ the backslashes isn't used. +-------------------+------------------+ -Performing Matches +Performing matches ------------------ Once you have an object representing a compiled regular expression, what do you @@ -369,10 +369,10 @@ for a complete listing. | | location where this RE matches. | +------------------+-----------------------------------------------+ | ``findall()`` | Find all substrings where the RE matches, and | -| | returns them as a list. | +| | return them as a list. | +------------------+-----------------------------------------------+ | ``finditer()`` | Find all substrings where the RE matches, and | -| | returns them as an :term:`iterator`. | +| | return them as an :term:`iterator`. | +------------------+-----------------------------------------------+ :meth:`~re.Pattern.match` and :meth:`~re.Pattern.search` return ``None`` if no match can be found. If @@ -473,7 +473,7 @@ Two pattern methods return all of the matches for a pattern. The ``r`` prefix, making the literal a raw string literal, is needed in this example because escape sequences in a normal "cooked" string literal that are not recognized by Python, as opposed to regular expressions, now result in a -:exc:`DeprecationWarning` and will eventually become a :exc:`SyntaxError`. See +:exc:`SyntaxWarning` and will eventually become a :exc:`SyntaxError`. See :ref:`the-backslash-plague`. :meth:`~re.Pattern.findall` has to create the entire list before it can be returned as the @@ -491,7 +491,7 @@ result. The :meth:`~re.Pattern.finditer` method returns a sequence of (29, 31) -Module-Level Functions +Module-level functions ---------------------- You don't have to create a pattern object and call its methods; the @@ -518,7 +518,7 @@ Outside of loops, there's not much difference thanks to the internal cache. -Compilation Flags +Compilation flags ----------------- .. currentmodule:: re @@ -642,7 +642,7 @@ of each one. whitespace is in a character class or preceded by an unescaped backslash; this lets you organize and indent the RE more clearly. This flag also lets you put comments within a RE that will be ignored by the engine; comments are marked by - a ``'#'`` that's neither in a character class or preceded by an unescaped + a ``'#'`` that's neither in a character class nor preceded by an unescaped backslash. For example, here's a RE that uses :const:`re.VERBOSE`; see how much easier it @@ -669,7 +669,7 @@ of each one. to understand than the version using :const:`re.VERBOSE`. -More Pattern Power +More pattern power ================== So far we've only covered a part of the features of regular expressions. In @@ -679,7 +679,7 @@ retrieve portions of the text that was matched. .. _more-metacharacters: -More Metacharacters +More metacharacters ------------------- There are some metacharacters that we haven't covered yet. Most of them will be @@ -875,7 +875,7 @@ Backreferences like this aren't often useful for just searching through a string find out that they're *very* useful when performing string substitutions. -Non-capturing and Named Groups +Non-capturing and named groups ------------------------------ Elaborate REs may use many groups, both to capture substrings of interest, and @@ -979,7 +979,7 @@ current point. The regular expression for finding doubled words, 'the the' -Lookahead Assertions +Lookahead assertions -------------------- Another zero-width assertion is the lookahead assertion. Lookahead assertions @@ -1061,7 +1061,7 @@ end in either ``bat`` or ``exe``: ``.*[.](?!bat$|exe$)[^.]*$`` -Modifying Strings +Modifying strings ================= Up to this point, we've simply performed searches against a static string. @@ -1083,7 +1083,7 @@ using the following pattern methods: +------------------+-----------------------------------------------+ -Splitting Strings +Splitting strings ----------------- The :meth:`~re.Pattern.split` method of a pattern splits a string apart @@ -1137,7 +1137,7 @@ argument, but is otherwise the same. :: ['Words', 'words, words.'] -Search and Replace +Search and replace ------------------ Another common task is to find all the matches for a pattern, and replace them @@ -1236,7 +1236,7 @@ pattern object as the first parameter, or use embedded modifiers in the pattern string, e.g. ``sub("(?i)b+", "x", "bbbb BBBB")`` returns ``'x x'``. -Common Problems +Common problems =============== Regular expressions are a powerful tool for some applications, but in some ways @@ -1244,7 +1244,7 @@ their behaviour isn't intuitive and at times they don't behave the way you may expect them to. This section will point out some of the most common pitfalls. -Use String Methods +Use string methods ------------------ Sometimes using the :mod:`re` module is a mistake. If you're matching a fixed @@ -1310,7 +1310,7 @@ string and then backtracking to find a match for the rest of the RE. Use :func:`re.search` instead. -Greedy versus Non-Greedy +Greedy versus non-greedy ------------------------ When repeating a regular expression, as in ``a*``, the resulting action is to @@ -1388,9 +1388,9 @@ Feedback ======== Regular expressions are a complicated topic. Did this document help you -understand them? Were there parts that were unclear, or Problems you +understand them? Were there parts that were unclear, or problems you encountered that weren't covered here? If so, please send suggestions for -improvements to the author. +improvements to the :ref:`issue tracker `. The most complete book on regular expressions is almost certainly Jeffrey Friedl's Mastering Regular Expressions, published by O'Reilly. Unfortunately, From b1d2d9829cfb33f0487ce00c19fa57ddefeb1b50 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sat, 4 Apr 2026 17:45:52 +0100 Subject: [PATCH 07/13] Docs: Fix a typo in the 'Non-ASCII characters in names' section (#148043) --- Doc/reference/lexical_analysis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ae541680c534d6..f3ed1539493b6a 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -560,7 +560,7 @@ start with a character in the "letter-like" set ``xid_start``, and the remaining characters must be in the "letter- and digit-like" set ``xid_continue``. -These sets based on the *XID_Start* and *XID_Continue* sets as defined by the +These sets are based on the *XID_Start* and *XID_Continue* sets as defined by the Unicode standard annex `UAX-31`_. Python's ``xid_start`` additionally includes the underscore (``_``). Note that Python does not necessarily conform to `UAX-31`_. From 21fb9dc71d2dd4ea9faf966385c102be20ce99e8 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sat, 4 Apr 2026 18:42:30 +0100 Subject: [PATCH 08/13] gh-146527: Heap-allocate gc_stats to avoid bloating PyInterpreterState (#148057) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gc_stats struct contains ring buffers of gc_generation_stats entries (11 young + 3×2 old on default builds). Embedding it inline in _gc_runtime_state, which is itself inline in PyInterpreterState, pushed fields like _gil.locked and threads.head to offsets beyond what out-of-process profilers and debuggers can reasonably read in a single buffer (e.g. offset 9384 for _gil.locked vs an 8 KiB read buffer). Heap-allocate generation_stats via PyMem_RawCalloc in _PyGC_Init and free it in _PyGC_Fini. This shrinks PyInterpreterState by ~1.6 KiB and keeps the GIL, thread-list, and other frequently-inspected fields at stable, low offsets. --- Include/internal/pycore_interp_structs.h | 2 +- Modules/gcmodule.c | 6 +++--- Python/gc.c | 15 +++++++++++---- Python/gc_free_threading.c | 11 +++++++++-- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f76d4f41c55119..c4b084642668a9 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -248,7 +248,7 @@ struct _gc_runtime_state { struct gc_generation old[2]; /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; - struct gc_stats generation_stats; + struct gc_stats *generation_stats; /* true if we are currently running the collector */ int collecting; // The frame that started the current collection. It might be NULL even when diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index c21b61589bd261..8da28130e9da9a 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -347,9 +347,9 @@ gc_get_stats_impl(PyObject *module) /* To get consistent values despite allocations while constructing the result list, we use a snapshot of the running stats. */ GCState *gcstate = get_gc_state(); - stats[0] = gcstate->generation_stats.young.items[gcstate->generation_stats.young.index]; - stats[1] = gcstate->generation_stats.old[0].items[gcstate->generation_stats.old[0].index]; - stats[2] = gcstate->generation_stats.old[1].items[gcstate->generation_stats.old[1].index]; + stats[0] = gcstate->generation_stats->young.items[gcstate->generation_stats->young.index]; + stats[1] = gcstate->generation_stats->old[0].items[gcstate->generation_stats->old[0].index]; + stats[2] = gcstate->generation_stats->old[1].items[gcstate->generation_stats->old[1].index]; PyObject *result = PyList_New(0); if (result == NULL) diff --git a/Python/gc.c b/Python/gc.c index 7bca40f6e3f58e..284ac725d37ac6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -177,6 +177,11 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; + gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats)); + if (gcstate->generation_stats == NULL) { + return _PyStatus_NO_MEMORY(); + } + gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { return _PyStatus_NO_MEMORY(); @@ -1398,13 +1403,13 @@ static struct gc_generation_stats * gc_get_stats(GCState *gcstate, int gen) { if (gen == 0) { - struct gc_young_stats_buffer *buffer = &gcstate->generation_stats.young; + struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young; buffer->index = (buffer->index + 1) % GC_YOUNG_STATS_SIZE; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } else { - struct gc_old_stats_buffer *buffer = &gcstate->generation_stats.old[gen - 1]; + struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1]; buffer->index = (buffer->index + 1) % GC_OLD_STATS_SIZE; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; @@ -1415,12 +1420,12 @@ static struct gc_generation_stats * gc_get_prev_stats(GCState *gcstate, int gen) { if (gen == 0) { - struct gc_young_stats_buffer *buffer = &gcstate->generation_stats.young; + struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } else { - struct gc_old_stats_buffer *buffer = &gcstate->generation_stats.old[gen - 1]; + struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1]; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } @@ -2299,6 +2304,8 @@ _PyGC_Fini(PyInterpreterState *interp) GCState *gcstate = &interp->gc; Py_CLEAR(gcstate->garbage); Py_CLEAR(gcstate->callbacks); + PyMem_RawFree(gcstate->generation_stats); + gcstate->generation_stats = NULL; /* Prevent a subtle bug that affects sub-interpreters that use basic * single-phase init extensions (m_size == -1). Those extensions cause objects diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 7ad60a73a56a69..4b46ca04f56b20 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1698,6 +1698,11 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; + gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats)); + if (gcstate->generation_stats == NULL) { + return _PyStatus_NO_MEMORY(); + } + gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { return _PyStatus_NO_MEMORY(); @@ -2387,12 +2392,12 @@ static struct gc_generation_stats * get_stats(GCState *gcstate, int gen) { if (gen == 0) { - struct gc_young_stats_buffer *buffer = &gcstate->generation_stats.young; + struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } else { - struct gc_old_stats_buffer *buffer = &gcstate->generation_stats.old[gen - 1]; + struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1]; struct gc_generation_stats *stats = &buffer->items[buffer->index]; return stats; } @@ -2831,6 +2836,8 @@ _PyGC_Fini(PyInterpreterState *interp) GCState *gcstate = &interp->gc; Py_CLEAR(gcstate->garbage); Py_CLEAR(gcstate->callbacks); + PyMem_RawFree(gcstate->generation_stats); + gcstate->generation_stats = NULL; /* We expect that none of this interpreters objects are shared with other interpreters. From 75be902a13c670a1ea16aee3644548723b7d7407 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sat, 4 Apr 2026 19:02:16 +0100 Subject: [PATCH 09/13] Docs: Standardize documentation authors (#148102) --- Doc/conf.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index f021c9eabe4dda..e2dff74538a342 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -73,6 +73,7 @@ # General substitutions. project = 'Python' copyright = "2001 Python Software Foundation" +_doc_authors = 'Python documentation authors' # We look for the Include/patchlevel.h file in the current Python source tree # and replace the values accordingly. @@ -361,69 +362,74 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). -_stdauthor = 'The Python development team' latex_documents = [ - ('c-api/index', 'c-api.tex', 'The Python/C API', _stdauthor, 'manual'), + ('c-api/index', 'c-api.tex', 'The Python/C API', _doc_authors, 'manual'), ( 'extending/index', 'extending.tex', 'Extending and Embedding Python', - _stdauthor, + _doc_authors, 'manual', ), ( 'installing/index', 'installing.tex', 'Installing Python Modules', - _stdauthor, + _doc_authors, 'manual', ), ( 'library/index', 'library.tex', 'The Python Library Reference', - _stdauthor, + _doc_authors, 'manual', ), ( 'reference/index', 'reference.tex', 'The Python Language Reference', - _stdauthor, + _doc_authors, 'manual', ), ( 'tutorial/index', 'tutorial.tex', 'Python Tutorial', - _stdauthor, + _doc_authors, 'manual', ), ( 'using/index', 'using.tex', 'Python Setup and Usage', - _stdauthor, + _doc_authors, 'manual', ), ( 'faq/index', 'faq.tex', 'Python Frequently Asked Questions', - _stdauthor, + _doc_authors, 'manual', ), ( 'whatsnew/' + version, 'whatsnew.tex', 'What\'s New in Python', - 'A. M. Kuchling', + _doc_authors, 'howto', ), ] # Collect all HOWTOs individually latex_documents.extend( - ('howto/' + fn[:-4], 'howto-' + fn[:-4] + '.tex', '', _stdauthor, 'howto') + ( + 'howto/' + fn[:-4], + 'howto-' + fn[:-4] + '.tex', + '', + _doc_authors, + 'howto', + ) for fn in os.listdir('howto') if fn.endswith('.rst') and fn != 'index.rst' ) @@ -434,7 +440,7 @@ # Options for Epub output # ----------------------- -epub_author = 'Python Documentation Authors' +epub_author = _doc_authors epub_publisher = 'Python Software Foundation' epub_exclude_files = ( 'index.xhtml', From 8bf8bf92921a13cc18f7d1b5bed8bd32c8485ba4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 Apr 2026 21:26:16 +0300 Subject: [PATCH 10/13] gh-73613: Support Base32 and Base64 without padding (GH-147974) Add the padded parameter in functions related to Base32 and Base64 codecs in the binascii and base64 modules. In the encoding functions it controls whether the pad character can be added in the output, in the decoding functions it controls whether padding is required in input. Padding of input no longer required in base64.urlsafe_b64decode() by default. --- Doc/library/base64.rst | 57 +++++-- Doc/library/binascii.rst | 32 +++- Doc/whatsnew/3.15.rst | 18 +++ .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 + Lib/base64.py | 53 ++++--- Lib/test/test_base64.py | 110 +++++++++++++ Lib/test/test_binascii.py | 66 ++++++++ ...6-04-01-18-17-55.gh-issue-73613.PLEebm.rst | 7 + Modules/binascii.c | 88 +++++++---- Modules/clinic/binascii.c.h | 145 ++++++++++++------ 13 files changed, 463 insertions(+), 120 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-01-18-17-55.gh-issue-73613.PLEebm.rst diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 1a1785cb58772e..425dff8f2a9ad1 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -51,7 +51,7 @@ The :rfc:`4648` encodings are suitable for encoding binary data so that it can b safely sent by email, used as parts of URLs, or included as part of an HTTP POST request. -.. function:: b64encode(s, altchars=None, *, wrapcol=0) +.. function:: b64encode(s, altchars=None, *, padded=True, wrapcol=0) Encode the :term:`bytes-like object` *s* using Base64 and return the encoded :class:`bytes`. @@ -61,6 +61,10 @@ POST request. This allows an application to e.g. generate URL or filesystem safe Base64 strings. The default is ``None``, for which the standard Base64 alphabet is used. + If *padded* is true (default), pad the encoded data with the '=' + character to a size multiple of 4. + If *padded* is false, do not add the pad characters. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character after at most every *wrapcol* characters. If *wrapcol* is zero (default), do not insert any newlines. @@ -69,11 +73,11 @@ POST request. :exc:`TypeError` if *altchars* is not a :term:`bytes-like object`. .. versionchanged:: 3.15 - Added the *wrapcol* parameter. + Added the *padded* and *wrapcol* parameters. -.. function:: b64decode(s, altchars=None, validate=False) - b64decode(s, altchars=None, validate=True, *, ignorechars) +.. function:: b64decode(s, altchars=None, validate=False, *, padded=True) + b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True) Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -82,6 +86,11 @@ POST request. of length 2 which specifies the alternative alphabet used instead of the ``+`` and ``/`` characters. + If *padded* is true, the last group of 4 base 64 alphabet characters must + be padded with the '=' character. + If *padded* is false, the '=' character is treated as other non-alphabet + characters (depending on the value of *validate* and *ignorechars*). + A :exc:`binascii.Error` exception is raised if *s* is incorrectly padded. @@ -106,7 +115,7 @@ POST request. For more information about the strict base64 check, see :func:`binascii.a2b_base64` .. versionchanged:: 3.15 - Added the *ignorechars* parameter. + Added the *ignorechars* and *padded* parameters. .. deprecated:: 3.15 Accepting the ``+`` and ``/`` characters with an alternative alphabet @@ -125,16 +134,19 @@ POST request. Base64 alphabet and return the decoded :class:`bytes`. -.. function:: urlsafe_b64encode(s) +.. function:: urlsafe_b64encode(s, *, padded=True) Encode :term:`bytes-like object` *s* using the URL- and filesystem-safe alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet, and return the encoded :class:`bytes`. The result - can still contain ``=``. + can still contain ``=`` if *padded* is true (default). + + .. versionchanged:: next + Added the *padded* parameter. -.. function:: urlsafe_b64decode(s) +.. function:: urlsafe_b64decode(s, *, padded=False) Decode :term:`bytes-like object` or ASCII string *s* using the URL- and filesystem-safe @@ -142,24 +154,32 @@ POST request. ``/`` in the standard Base64 alphabet, and return the decoded :class:`bytes`. + .. versionchanged:: next + Added the *padded* parameter. + Padding of input is no longer required by default. + .. deprecated:: 3.15 Accepting the ``+`` and ``/`` characters is now deprecated. -.. function:: b32encode(s, *, wrapcol=0) +.. function:: b32encode(s, *, padded=True, wrapcol=0) Encode the :term:`bytes-like object` *s* using Base32 and return the encoded :class:`bytes`. + If *padded* is true (default), pad the encoded data with the '=' + character to a size multiple of 8. + If *padded* is false, do not add the pad characters. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character after at most every *wrapcol* characters. If *wrapcol* is zero (default), do not add any newlines. .. versionchanged:: next - Added the *wrapcol* parameter. + Added the *padded* and *wrapcol* parameters. -.. function:: b32decode(s, casefold=False, map01=None, *, ignorechars=b'') +.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'') Decode the Base32 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -175,6 +195,11 @@ POST request. digit 0 is always mapped to the letter O). For security purposes the default is ``None``, so that 0 and 1 are not allowed in the input. + If *padded* is true, the last group of 8 base 32 alphabet characters must + be padded with the '=' character. + If *padded* is false, the '=' character is treated as other non-alphabet + characters (depending on the value of *ignorechars*). + *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. @@ -183,10 +208,10 @@ POST request. input. .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *padded* parameters. -.. function:: b32hexencode(s, *, wrapcol=0) +.. function:: b32hexencode(s, *, padded=True, wrapcol=0) Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. @@ -194,10 +219,10 @@ POST request. .. versionadded:: 3.10 .. versionchanged:: next - Added the *wrapcol* parameter. + Added the *padded* and *wrapcol* parameters. -.. function:: b32hexdecode(s, casefold=False, *, ignorechars=b'') +.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'') Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. @@ -210,7 +235,7 @@ POST request. .. versionadded:: 3.10 .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *padded* parameters. .. function:: b16encode(s, *, wrapcol=0) diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 4a82d0742ae9db..4f2edb7eff8a8f 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -48,8 +48,8 @@ The :mod:`!binascii` module defines the following functions: Added the *backtick* parameter. -.. function:: a2b_base64(string, /, *, alphabet=BASE64_ALPHABET, strict_mode=False) - a2b_base64(string, /, *, ignorechars, alphabet=BASE64_ALPHABET, strict_mode=True) +.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False) + a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True) Convert a block of base64 data back to binary and return the binary data. More than one line may be passed at a time. @@ -57,6 +57,11 @@ The :mod:`!binascii` module defines the following functions: Optional *alphabet* must be a :class:`bytes` object of length 64 which specifies an alternative alphabet. + If *padded* is true, the last group of 4 base 64 alphabet characters must + be padded with the '=' character. + If *padded* is false, the '=' character is treated as other non-alphabet + characters (depending on the value of *strict_mode* and *ignorechars*). + If *ignorechars* is specified, it should be a :term:`bytes-like object` containing characters to ignore from the input when *strict_mode* is true. If *ignorechars* contains the pad character ``'='``, the pad characters @@ -79,14 +84,18 @@ The :mod:`!binascii` module defines the following functions: Added the *strict_mode* parameter. .. versionchanged:: 3.15 - Added the *alphabet* and *ignorechars* parameters. + Added the *alphabet*, *ignorechars* and *padded* parameters. -.. function:: b2a_base64(data, *, alphabet=BASE64_ALPHABET, wrapcol=0, newline=True) +.. function:: b2a_base64(data, *, padded=True, alphabet=BASE64_ALPHABET, wrapcol=0, newline=True) Convert binary data to a line(s) of ASCII characters in base64 coding, as specified in :rfc:`4648`. + If *padded* is true (default), pad the encoded data with the '=' + character to a size multiple of 4. + If *padded* is false, do not add the pad characters. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character after at most every *wrapcol* characters. If *wrapcol* is zero (default), do not insert any newlines. @@ -98,7 +107,7 @@ The :mod:`!binascii` module defines the following functions: Added the *newline* parameter. .. versionchanged:: 3.15 - Added the *alphabet* and *wrapcol* parameters. + Added the *alphabet*, *padded* and *wrapcol* parameters. .. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'') @@ -190,7 +199,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base32(string, /, *, alphabet=BASE32_ALPHABET, ignorechars=b'') +.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'') Convert base32 data back to binary and return the binary data. @@ -208,6 +217,11 @@ The :mod:`!binascii` module defines the following functions: Optional *alphabet* must be a :class:`bytes` object of length 32 which specifies an alternative alphabet. + If *padded* is true, the last group of 8 base 32 alphabet characters must + be padded with the '=' character. + If *padded* is false, the '=' character is treated as other non-alphabet + characters (depending on the value of *ignorechars*). + *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. If *ignorechars* contains the pad character ``'='``, the pad characters @@ -218,7 +232,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: next -.. function:: b2a_base32(data, /, *, alphabet=BASE32_ALPHABET, wrapcol=0) +.. function:: b2a_base32(data, /, *, padded=True, alphabet=BASE32_ALPHABET, wrapcol=0) Convert binary data to a line of ASCII characters in base32 coding, as specified in :rfc:`4648`. The return value is the converted line. @@ -226,6 +240,10 @@ The :mod:`!binascii` module defines the following functions: Optional *alphabet* must be a :term:`bytes-like object` of length 32 which specifies an alternative alphabet. + If *padded* is true (default), pad the encoded data with the '=' + character to a size multiple of 8. + If *padded* is false, do not add the pad characters. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character after at most every *wrapcol* characters. If *wrapcol* is zero (default), do not insert any newlines. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 287109035f1ee6..d1d4b92bcf4e97 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -657,6 +657,13 @@ base64 * Added the *pad* parameter in :func:`~base64.z85encode`. (Contributed by Hauke Dämpfling in :gh:`143103`.) +* Added the *padded* parameter in + :func:`~base64.b32encode`, :func:`~base64.b32decode`, + :func:`~base64.b32hexencode`, :func:`~base64.b32hexdecode`, + :func:`~base64.b64encode`, :func:`~base64.b64decode`, + :func:`~base64.urlsafe_b64encode`, and :func:`~base64.urlsafe_b64decode`. + (Contributed by Serhiy Storchaka in :gh:`73613`.) + * Added the *wrapcol* parameter in :func:`~base64.b16encode`, :func:`~base64.b32encode`, :func:`~base64.b32hexencode`, :func:`~base64.b64encode`, :func:`~base64.b85encode`, and @@ -686,6 +693,11 @@ binascii (Contributed by James Seo and Serhiy Storchaka in :gh:`101178`.) +* Added the *padded* parameter in + :func:`~binascii.b2a_base32`, :func:`~binascii.a2b_base32`, + :func:`~binascii.b2a_base64`, and :func:`~binascii.a2b_base64`. + (Contributed by Serhiy Storchaka in :gh:`73613`.) + * Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`. (Contributed by Serhiy Storchaka in :gh:`143214`.) @@ -2027,3 +2039,9 @@ that may require changes to your code. *dest* is now ``'foo'`` instead of ``'f'``. Pass an explicit *dest* argument to preserve the old behavior. (Contributed by Serhiy Storchaka in :gh:`138697`.) + +* Padding of input no longer required in :func:`base64.urlsafe_b64decode`. + Pass a new argument ``padded=True`` or use :func:`base64.b64decode` + with argument ``altchars=b'-_'`` (this works with older Python versions) + to make padding required. + (Contributed by Serhiy Storchaka in :gh:`73613`.) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 4b1e289c6ff468..beae65213a27b6 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1974,6 +1974,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pad)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(padded)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parameter)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 6ee649b59a5c37..bb1c6dbaf03906 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -697,6 +697,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(overlapped) STRUCT_FOR_ID(owner) STRUCT_FOR_ID(pad) + STRUCT_FOR_ID(padded) STRUCT_FOR_ID(pages) STRUCT_FOR_ID(parameter) STRUCT_FOR_ID(parent) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 778db946c2a3aa..64b029797ab9b3 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1972,6 +1972,7 @@ extern "C" { INIT_ID(overlapped), \ INIT_ID(owner), \ INIT_ID(pad), \ + INIT_ID(padded), \ INIT_ID(pages), \ INIT_ID(parameter), \ INIT_ID(parent), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index bd8f50ff0ee732..461ee36dcebb6d 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2568,6 +2568,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(padded); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(pages); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/base64.py b/Lib/base64.py index 47b90643e8da73..a94bec4d031c52 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -46,13 +46,15 @@ def _bytes_from_decode_data(s): # Base64 encoding/decoding uses binascii -def b64encode(s, altchars=None, *, wrapcol=0): +def b64encode(s, altchars=None, *, padded=True, wrapcol=0): """Encode the bytes-like object s using Base64 and return a bytes object. Optional altchars should be a byte string of length 2 which specifies an alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. + If padded is false, omit padding in the output. + If wrapcol is non-zero, insert a newline (b'\\n') character after at most every wrapcol characters. """ @@ -60,18 +62,21 @@ def b64encode(s, altchars=None, *, wrapcol=0): if len(altchars) != 2: raise ValueError(f'invalid altchars: {altchars!r}') alphabet = binascii.BASE64_ALPHABET[:-2] + altchars - return binascii.b2a_base64(s, wrapcol=wrapcol, newline=False, + return binascii.b2a_base64(s, padded=padded, wrapcol=wrapcol, newline=False, alphabet=alphabet) - return binascii.b2a_base64(s, wrapcol=wrapcol, newline=False) + return binascii.b2a_base64(s, padded=padded, wrapcol=wrapcol, newline=False) -def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *, ignorechars=_NOT_SPECIFIED): +def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, + *, padded=True, ignorechars=_NOT_SPECIFIED): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 which specifies the alternative alphabet used instead of the '+' and '/' characters. + If padded is false, padding in input is not required. + The result is returned as a bytes object. A binascii.Error is raised if s is incorrectly padded. @@ -105,11 +110,11 @@ def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *, ignorechars=_NOT_SPE alphabet = binascii.BASE64_ALPHABET[:-2] + altchars return binascii.a2b_base64(s, strict_mode=validate, alphabet=alphabet, - ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars) if ignorechars is _NOT_SPECIFIED: ignorechars = b'' result = binascii.a2b_base64(s, strict_mode=validate, - ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars) if badchar is not None: import warnings if validate: @@ -145,17 +150,19 @@ def standard_b64decode(s): _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') -def urlsafe_b64encode(s): +def urlsafe_b64encode(s, *, padded=True): """Encode bytes using the URL- and filesystem-safe Base64 alphabet. Argument s is a bytes-like object to encode. The result is returned as a bytes object. The alphabet uses '-' instead of '+' and '_' instead of '/'. + + If padded is false, omit padding in the output. """ - return binascii.b2a_base64(s, newline=False, + return binascii.b2a_base64(s, padded=padded, newline=False, alphabet=binascii.URLSAFE_BASE64_ALPHABET) -def urlsafe_b64decode(s): +def urlsafe_b64decode(s, *, padded=False): """Decode bytes using the URL- and filesystem-safe Base64 alphabet. Argument s is a bytes-like object or ASCII string to decode. The result @@ -164,6 +171,8 @@ def urlsafe_b64decode(s): alphabet, and are not a plus '+' or slash '/', are discarded prior to the padding check. + If padded is false, padding in input is not required. + The alphabet uses '-' instead of '+' and '_' instead of '/'. """ s = _bytes_from_decode_data(s) @@ -173,7 +182,7 @@ def urlsafe_b64decode(s): badchar = b break s = s.translate(_urlsafe_decode_translation) - result = binascii.a2b_base64(s, strict_mode=False) + result = binascii.a2b_base64(s, strict_mode=False, padded=padded) if badchar is not None: import warnings warnings.warn(f'invalid character {chr(badchar)!a} in URL-safe Base64 data ' @@ -187,6 +196,8 @@ def urlsafe_b64decode(s): _B32_ENCODE_DOCSTRING = ''' Encode the bytes-like objects using {encoding} and return a bytes object. +If padded is false, omit padding in the output. + If wrapcol is non-zero, insert a newline (b'\\n') character after at most every wrapcol characters. ''' @@ -196,6 +207,8 @@ def urlsafe_b64decode(s): Optional casefold is a flag specifying whether a lowercase alphabet is acceptable as input. For security purposes, the default is False. +If padded is false, padding in input is not required. + ignorechars should be a byte string containing characters to ignore from the input. {extra_args} @@ -213,11 +226,11 @@ def urlsafe_b64decode(s): 0 and 1 are not allowed in the input. ''' -def b32encode(s, *, wrapcol=0): - return binascii.b2a_base32(s, wrapcol=wrapcol) +def b32encode(s, *, padded=True, wrapcol=0): + return binascii.b2a_base32(s, padded=padded, wrapcol=wrapcol) b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') -def b32decode(s, casefold=False, map01=None, *, ignorechars=b''): +def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): s = _bytes_from_decode_data(s) # Handle section 2.4 zero and one mapping. The flag map01 will be either # False, or the character to map the digit 1 (one) to. It should be @@ -228,22 +241,22 @@ def b32decode(s, casefold=False, map01=None, *, ignorechars=b''): s = s.translate(bytes.maketrans(b'01', b'O' + map01)) if casefold: s = s.upper() - return binascii.a2b_base32(s, ignorechars=ignorechars) + return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars) b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', extra_args=_B32_DECODE_MAP01_DOCSTRING) -def b32hexencode(s, *, wrapcol=0): - return binascii.b2a_base32(s, wrapcol=wrapcol, +def b32hexencode(s, *, padded=True, wrapcol=0): + return binascii.b2a_base32(s, padded=padded, wrapcol=wrapcol, alphabet=binascii.BASE32HEX_ALPHABET) b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') -def b32hexdecode(s, casefold=False, *, ignorechars=b''): +def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b''): s = _bytes_from_decode_data(s) # base32hex does not have the 01 mapping if casefold: s = s.upper() return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET, - ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars) b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', extra_args='') @@ -341,7 +354,7 @@ def b85encode(b, pad=False, *, wrapcol=0): """ return binascii.b2a_base85(b, wrapcol=wrapcol, pad=pad) -def b85decode(b, *, ignorechars=b''): +def b85decode(b, *, ignorechars=b''): """Decode the base85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. @@ -360,7 +373,7 @@ def z85encode(s, pad=False, *, wrapcol=0): return binascii.b2a_base85(s, wrapcol=wrapcol, pad=pad, alphabet=binascii.Z85_ALPHABET) -def z85decode(s, *, ignorechars=b''): +def z85decode(s, *, ignorechars=b''): """Decode the z85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index d5f8f44e280b54..1a4dd56a553f4d 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -209,6 +209,25 @@ def test_b64encode(self): b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd') self.check_encode_type_errors(base64.urlsafe_b64encode) + def test_b64encode_padded(self): + b64encode = base64.b64encode + self.assertEqual(b64encode(b'', padded=False), b'') + self.assertEqual(b64encode(b'a', padded=False), b'YQ') + self.assertEqual(b64encode(b'ab', padded=False), b'YWI') + self.assertEqual(b64encode(b'abc', padded=False), b'YWJj') + self.assertEqual(b64encode(b'\xfb', padded=False, altchars=b'-_'), b'-w') + self.assertEqual(b64encode(b'\xfb\xff', padded=False, altchars=b'-_'), + b'-_8') + self.assertEqual(b64encode(b'\xfb\xff\xbf', padded=False, altchars=b'-_'), + b'-_-_') + + urlsafe_b64encode = base64.urlsafe_b64encode + self.assertEqual(urlsafe_b64encode(b'', padded=False), b'') + self.assertEqual(urlsafe_b64encode(b'\xfb', padded=False), b'-w') + self.assertEqual(urlsafe_b64encode(b'\xfb\xff', padded=False), b'-_8') + self.assertEqual(urlsafe_b64encode(b'\xfb\xff\xbf', padded=False), + b'-_-_') + def _common_test_wrapcol(self, func, data): eq = self.assertEqual expected = func(data) @@ -314,6 +333,36 @@ def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') + def test_b64decode_padded(self): + b64decode = base64.b64decode + urlsafe_b64decode = base64.urlsafe_b64decode + def check(data, expected, padded=0): + if b'=' in data: + with self.assertRaisesRegex(binascii.Error, 'Padding not allowed'): + b64decode(data, padded=False, validate=True) + self.assertEqual(b64decode(data, padded=False, ignorechars=b'='), + expected) + self.assertEqual(urlsafe_b64decode(data, padded=True), expected) + self.assertEqual(urlsafe_b64decode(data, padded=False), expected) + data = data.replace(b'=', b'') + self.assertEqual(b64decode(data, padded=False), expected) + self.assertEqual(b64decode(data, padded=False, validate=True), + expected) + self.assertEqual(urlsafe_b64decode(data), expected) + + check(b'', b'') + check(b'YQ==', b'a') + check(b'YWI=', b'ab') + check(b'YWJj', b'abc') + check(b'Y=WJj', b'abc') + check(b'YW=Jj', b'abc') + check(b'YWJ=j', b'abc') + + with self.assertRaisesRegex(binascii.Error, 'Incorrect padding'): + urlsafe_b64decode(b'YQ', padded=True) + with self.assertRaisesRegex(binascii.Error, 'Incorrect padding'): + urlsafe_b64decode(b'YWI', padded=True) + def _common_test_ignorechars(self, func): eq = self.assertEqual eq(func(b'', ignorechars=b' \n'), b'') @@ -487,6 +536,15 @@ def test_b32encode(self): self.check_other_types(base64.b32encode, b'abcd', b'MFRGGZA=') self.check_encode_type_errors(base64.b32encode) + def test_b32encode_padded(self): + b32encode = base64.b32encode + self.assertEqual(b32encode(b'', padded=False), b'') + self.assertEqual(b32encode(b'a', padded=False), b'ME') + self.assertEqual(b32encode(b'ab', padded=False), b'MFRA') + self.assertEqual(b32encode(b'abc', padded=False), b'MFRGG') + self.assertEqual(b32encode(b'abcd', padded=False), b'MFRGGZA') + self.assertEqual(b32encode(b'abcde', padded=False), b'MFRGGZDF') + def test_b32encode_wrapcol(self): eq = self.assertEqual b = b'www.python.org' @@ -564,6 +622,31 @@ def test_b32decode_map01(self): eq(base64.b32decode(b'M%c023456' % map01, map01=map01), res) eq(base64.b32decode(b'M%cO23456' % map01, map01=map01), res) + def test_b32decode_padded(self): + b32decode = base64.b32decode + def check(data, expected): + if b'=' in data: + with self.assertRaisesRegex(binascii.Error, 'Padding not allowed'): + b32decode(data, padded=False) + self.assertEqual(b32decode(data, padded=False, ignorechars=b'='), + expected) + data = data.replace(b'=', b'') + self.assertEqual(b32decode(data, padded=False), expected) + + check(b'', b'') + check(b'ME======', b'a') + check(b'MFRA====', b'ab') + check(b'MFRGG===', b'abc') + check(b'MFRGGZA=', b'abcd') + check(b'MFRGGZDF', b'abcde') + check(b'M=FRGGZDF', b'abcde') + check(b'MF=RGGZDF', b'abcde') + check(b'MFR=GGZDF', b'abcde') + check(b'MFRG=GZDF', b'abcde') + check(b'MFRGG=ZDF', b'abcde') + check(b'MFRGGZ=DF', b'abcde') + check(b'MFRGGZD=F', b'abcde') + def test_b32decode_ignorechars(self): self._common_test_ignorechars(base64.b32decode) eq = self.assertEqual @@ -632,6 +715,8 @@ def test_b32hexencode(self): for to_encode, expected in test_cases: with self.subTest(to_decode=to_encode): self.assertEqual(base64.b32hexencode(to_encode), expected) + self.assertEqual(base64.b32hexencode(to_encode, padded=False), + expected.rstrip(b'=')) def test_b32hexencode_other_types(self): self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=') @@ -679,6 +764,31 @@ def test_b32hexdecode_other_types(self): self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc') self.check_decode_type_errors(base64.b32hexdecode) + def test_b32hexdecode_padded(self): + b32hexdecode = base64.b32hexdecode + def check(data, expected): + if b'=' in data: + with self.assertRaisesRegex(binascii.Error, 'Padding not allowed'): + b32hexdecode(data, padded=False) + self.assertEqual(b32hexdecode(data, padded=False, ignorechars=b'='), + expected) + data = data.replace(b'=', b'') + self.assertEqual(b32hexdecode(data, padded=False), expected) + + check(b'', b'') + check(b'C4======', b'a') + check(b'C5H0====', b'ab') + check(b'C5H66===', b'abc') + check(b'C5H66P0=', b'abcd') + check(b'C5H66P35', b'abcde') + check(b'C=5H66P35', b'abcde') + check(b'C5=H66P35', b'abcde') + check(b'C5H=66P35', b'abcde') + check(b'C5H6=6P35', b'abcde') + check(b'C5H66=P35', b'abcde') + check(b'C5H66P=35', b'abcde') + check(b'C5H66P3=5', b'abcde') + def test_b32hexdecode_ignorechars(self): self._common_test_ignorechars(base64.b32hexdecode) eq = self.assertEqual diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index c8dbf3fec40bb7..81cdacb96241e2 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -233,6 +233,28 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertExcessPadding(b'abcd====efgh', b'i\xb7\x1dy\xf8!') assertExcessPadding(b'abcd=====efgh', b'i\xb7\x1dy\xf8!') + def test_a2b_base64_padded(self): + a2b_base64 = binascii.a2b_base64 + t = self.type2test + def check(data, expected): + if b'=' in data: + with self.assertRaisesRegex(binascii.Error, 'Padding not allowed'): + a2b_base64(t(data), padded=False, strict_mode=True) + self.assertEqual(a2b_base64(t(data), padded=False, ignorechars=b'='), + expected) + data = data.replace(b'=', b'') + self.assertEqual(a2b_base64(t(data), padded=False), expected) + self.assertEqual(a2b_base64(t(data), padded=False, strict_mode=True), + expected) + + check(b'', b'') + check(b'YQ==', b'a') + check(b'YWI=', b'ab') + check(b'YWJj', b'abc') + check(b'Y=WJj', b'abc') + check(b'YW=Jj', b'abc') + check(b'YWJ=j', b'abc') + def _common_test_ignorechars(self, func): eq = self.assertEqual empty = self.type2test(b'') @@ -913,6 +935,42 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertInvalidLength(b" ABC=====", ignorechars=b' ') assertInvalidLength(b" ABCDEF==", ignorechars=b' ') + def test_a2b_base32_padded(self): + a2b_base32 = binascii.a2b_base32 + t = self.type2test + def check(data, expected): + if b'=' in data: + with self.assertRaisesRegex(binascii.Error, 'Padding not allowed'): + a2b_base32(t(data), padded=False) + self.assertEqual(a2b_base32(t(data), padded=False, ignorechars=b'='), + expected) + data = data.replace(b'=', b'') + self.assertEqual(a2b_base32(t(data), padded=False), expected) + + check(b'', b'') + check(b'ME======', b'a') + check(b'MFRA====', b'ab') + check(b'MFRGG===', b'abc') + check(b'MFRGGZA=', b'abcd') + check(b'MFRGGZDF', b'abcde') + check(b'M=FRGGZDF', b'abcde') + check(b'MF=RGGZDF', b'abcde') + check(b'MFR=GGZDF', b'abcde') + check(b'MFRG=GZDF', b'abcde') + check(b'MFRGG=ZDF', b'abcde') + check(b'MFRGGZ=DF', b'abcde') + check(b'MFRGGZD=F', b'abcde') + + def test_b2a_base32_padded(self): + b2a_base32 = binascii.b2a_base32 + t = self.type2test + self.assertEqual(b2a_base32(t(b''), padded=False), b'') + self.assertEqual(b2a_base32(t(b'a'), padded=False), b'ME') + self.assertEqual(b2a_base32(t(b'ab'), padded=False), b'MFRA') + self.assertEqual(b2a_base32(t(b'abc'), padded=False), b'MFRGG') + self.assertEqual(b2a_base32(t(b'abcd'), padded=False), b'MFRGGZA') + self.assertEqual(b2a_base32(t(b'abcde'), padded=False), b'MFRGGZDF') + def test_base32_wrapcol(self): self._common_test_wrapcol(binascii.b2a_base32) b = self.type2test(b'www.python.org') @@ -1255,6 +1313,14 @@ def test_b2a_base64_newline(self): self.assertEqual(binascii.b2a_base64(b, newline=True), b'\n') self.assertEqual(binascii.b2a_base64(b, newline=False), b'') + def test_b2a_base64_padded(self): + b2a_base64 = binascii.b2a_base64 + t = self.type2test + self.assertEqual(b2a_base64(t(b''), padded=False), b'\n') + self.assertEqual(b2a_base64(t(b'a'), padded=False), b'YQ\n') + self.assertEqual(b2a_base64(t(b'ab'), padded=False), b'YWI\n') + self.assertEqual(b2a_base64(t(b'abc'), padded=False), b'YWJj\n') + def test_b2a_base64_wrapcol(self): self._common_test_wrapcol(binascii.b2a_base64) b = self.type2test(b'www.python.org') diff --git a/Misc/NEWS.d/next/Library/2026-04-01-18-17-55.gh-issue-73613.PLEebm.rst b/Misc/NEWS.d/next/Library/2026-04-01-18-17-55.gh-issue-73613.PLEebm.rst new file mode 100644 index 00000000000000..8c50972d3ca45a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-01-18-17-55.gh-issue-73613.PLEebm.rst @@ -0,0 +1,7 @@ +Add the *padded* parameter in functions related to Base32 and Base64 codecs +in the :mod:`binascii` and :mod:`base64` modules. +In the encoding functions it controls whether the pad character can be added +in the output, in the decoding functions it controls whether padding is +required in input. +Padding of input no longer required in :func:`base64.urlsafe_b64decode` +by default. diff --git a/Modules/binascii.c b/Modules/binascii.c index d0ef2d2d2cc4ce..9193137877aef9 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -723,6 +723,8 @@ binascii.a2b_base64 When set to true, bytes that are not part of the base64 standard are not allowed. The same applies to excess data after padding (= / ==). Set to True by default if ignorechars is specified, False otherwise. + padded: bool = True + When set to false, padding in input is not required. alphabet: PyBytesObject(c_default="NULL") = BASE64_ALPHABET ignorechars: Py_buffer = NULL A byte string containing characters to ignore from the input when @@ -733,8 +735,9 @@ Decode a line of base64 data. static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=72f15fcc0681d666 input=195c8d60b03aaa6f]*/ + int padded, PyBytesObject *alphabet, + Py_buffer *ignorechars) +/*[clinic end generated code: output=525d840a299ff132 input=74a53dd3b23474b3]*/ { assert(data->len >= 0); @@ -798,7 +801,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, /* Check for pad sequences and ignore ** the invalid ones. */ - if (this_ch == BASE64_PAD) { + if (padded && this_ch == BASE64_PAD) { pads++; if (quad_pos >= 2 && quad_pos + pads <= 4) { continue; @@ -831,7 +834,10 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) { state = get_binascii_state(module); if (state) { - PyErr_SetString(state->Error, "Only base64 data is allowed"); + PyErr_SetString(state->Error, + (this_ch == BASE64_PAD) + ? "Padding not allowed" + : "Only base64 data is allowed"); } goto error_end; } @@ -895,7 +901,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } - if (quad_pos != 0 && quad_pos + pads < 4) { + if (padded && quad_pos != 0 && quad_pos + pads < 4) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Incorrect padding"); @@ -919,6 +925,8 @@ binascii.b2a_base64 data: Py_buffer / * + padded: bool = True + When set to false, omit padding in the output. wrapcol: size_t = 0 newline: bool = True alphabet: Py_buffer(c_default="{NULL, NULL}") = BASE64_ALPHABET @@ -927,9 +935,9 @@ Base64-code line of data. [clinic start generated code]*/ static PyObject * -binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, - int newline, Py_buffer *alphabet) -/*[clinic end generated code: output=9d9657e5fbe28c64 input=ffa3af8520c312ac]*/ +binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int padded, + size_t wrapcol, int newline, Py_buffer *alphabet) +/*[clinic end generated code: output=a2057b906dc201ab input=cfa33ad73051d3f7]*/ { const unsigned char *table_b2a = table_b2a_base64; const unsigned char *bin_data = data->buf; @@ -950,6 +958,11 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, * Use unsigned integer arithmetic to avoid signed integer overflow. */ size_t out_len = ((size_t)bin_len + 2u) / 3u * 4u; + unsigned int pads = (3 - (bin_len % 3)) % 3 * 4 / 3; + if (!padded) { + out_len -= pads; + pads = 0; + } if (wrapcol && out_len) { /* Each line should encode a whole number of bytes. */ wrapcol = wrapcol < 4 ? 4 : wrapcol / 4 * 4; @@ -982,18 +995,23 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, /* Handle remaining 0-2 bytes */ if (bin_len == 1) { /* 1 byte remaining: produces 2 base64 chars + 2 padding */ + assert(!padded || pads == 2); unsigned int val = bin_data[0]; *ascii_data++ = table_b2a[(val >> 2) & 0x3f]; *ascii_data++ = table_b2a[(val << 4) & 0x3f]; - *ascii_data++ = BASE64_PAD; - *ascii_data++ = BASE64_PAD; } else if (bin_len == 2) { /* 2 bytes remaining: produces 3 base64 chars + 1 padding */ + assert(!padded || pads == 1); unsigned int val = ((unsigned int)bin_data[0] << 8) | bin_data[1]; *ascii_data++ = table_b2a[(val >> 10) & 0x3f]; *ascii_data++ = table_b2a[(val >> 4) & 0x3f]; *ascii_data++ = table_b2a[(val << 2) & 0x3f]; + } + else { + assert(pads == 0); + } + for (; pads; pads--) { *ascii_data++ = BASE64_PAD; } @@ -1512,6 +1530,8 @@ binascii.a2b_base32 data: ascii_buffer / * + padded: bool = True + When set to false, padding in input is not required. alphabet: PyBytesObject(c_default="NULL") = BASE32_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. @@ -1520,9 +1540,9 @@ Decode a line of base32 data. [clinic start generated code]*/ static PyObject * -binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, +binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=2cf7c8c9e6e98b88 input=b0333508aad1b3ac]*/ +/*[clinic end generated code: output=7dbbaa816d956b1c input=07a3721acdf9b688]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1581,7 +1601,7 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, unsigned char this_ch = *ascii_data; /* Check for pad sequences. They may only occur at certain positions. */ - if (this_ch == BASE32_PAD) { + if (padded && this_ch == BASE32_PAD) { pads++; if ((octa_pos == 2 || octa_pos == 4 || octa_pos == 5 || octa_pos == 7) @@ -1617,7 +1637,10 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, if (!ignorechar(this_ch, ignorechars, ignorecache)) { state = get_binascii_state(module); if (state) { - PyErr_SetString(state->Error, "Only base32 data is allowed"); + PyErr_SetString(state->Error, + (this_ch == BASE32_PAD) + ? "Padding not allowed" + : "Only base32 data is allowed"); } goto error; } @@ -1692,7 +1715,7 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, goto error; } - if (octa_pos != 0 && octa_pos + pads < 8) { + if (padded && octa_pos != 0 && octa_pos + pads < 8) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Incorrect padding"); @@ -1715,6 +1738,8 @@ binascii.b2a_base32 data: Py_buffer / * + padded: bool = True + When set to false, omit padding in the output. wrapcol: size_t = 0 alphabet: Py_buffer(c_default="{NULL, NULL}") = BASE32_ALPHABET @@ -1722,9 +1747,9 @@ Base32-code line of data. [clinic start generated code]*/ static PyObject * -binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, - Py_buffer *alphabet) -/*[clinic end generated code: output=d41fafbdaf29e280 input=a3d93b73836f2879]*/ +binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, int padded, + size_t wrapcol, Py_buffer *alphabet) +/*[clinic end generated code: output=acc09e685569aab9 input=1889b0c497a1d3c2]*/ { const unsigned char *table_b2a = table_b2a_base32; const unsigned char *bin_data = data->buf; @@ -1746,6 +1771,11 @@ binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, * Use unsigned integer arithmetic to avoid signed integer overflow. */ size_t ascii_len = ((size_t)bin_len + 4u) / 5u * 8u; + unsigned int pads = (5 - (bin_len % 5)) % 5 * 8 / 5; + if (!padded) { + ascii_len -= pads; + pads = 0; + } if (wrapcol && ascii_len) { /* Each line should encode a whole number of bytes. */ wrapcol = wrapcol < 8 ? 8 : wrapcol / 8 * 8; @@ -1774,30 +1804,23 @@ binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, /* Handle the remaining 0-4 bytes. */ if (bin_len == 1) { /* 1 byte remaining: produces 2 encoded + 6 padding chars. */ + assert(!padded || pads == 6); uint32_t val = bin_data[0]; *ascii_data++ = table_b2a[(val >> 3) & 0x1f]; *ascii_data++ = table_b2a[(val << 2) & 0x1f]; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; } else if (bin_len == 2) { /* 2 bytes remaining: produces 4 encoded + 4 padding chars. */ + assert(!padded || pads == 4); uint32_t val = ((uint32_t)bin_data[0] << 8) | bin_data[1]; *ascii_data++ = table_b2a[(val >> 11) & 0x1f]; *ascii_data++ = table_b2a[(val >> 6) & 0x1f]; *ascii_data++ = table_b2a[(val >> 1) & 0x1f]; *ascii_data++ = table_b2a[(val << 4) & 0x1f]; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; } else if (bin_len == 3) { /* 3 bytes remaining: produces 5 encoded + 3 padding chars. */ + assert(!padded || pads == 3); uint32_t val = ((uint32_t)bin_data[0] << 16) | ((uint32_t)bin_data[1] << 8) | bin_data[2]; @@ -1806,12 +1829,10 @@ binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, *ascii_data++ = table_b2a[(val >> 9) & 0x1f]; *ascii_data++ = table_b2a[(val >> 4) & 0x1f]; *ascii_data++ = table_b2a[(val << 1) & 0x1f]; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; - *ascii_data++ = BASE32_PAD; } else if (bin_len == 4) { /* 4 bytes remaining: produces 7 encoded + 1 padding chars. */ + assert(!padded || pads == 1); uint32_t val = ((uint32_t)bin_data[0] << 24) | ((uint32_t)bin_data[1] << 16) | ((uint32_t)bin_data[2] << 8) @@ -1823,6 +1844,11 @@ binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, *ascii_data++ = table_b2a[(val >> 7) & 0x1f]; *ascii_data++ = table_b2a[(val >> 2) & 0x1f]; *ascii_data++ = table_b2a[(val << 3) & 0x1f]; + } + else { + assert(pads == 0); + } + for (; pads; pads--) { *ascii_data++ = BASE32_PAD; } diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index d27a65997244bc..0a2d33c428d10a 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -118,7 +118,8 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(binascii_a2b_base64__doc__, "a2b_base64($module, data, /, *, strict_mode=,\n" -" alphabet=BASE64_ALPHABET, ignorechars=)\n" +" padded=True, alphabet=BASE64_ALPHABET,\n" +" ignorechars=)\n" "--\n" "\n" "Decode a line of base64 data.\n" @@ -127,6 +128,8 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, " When set to true, bytes that are not part of the base64 standard are\n" " not allowed. The same applies to excess data after padding (= / ==).\n" " Set to True by default if ignorechars is specified, False otherwise.\n" +" padded\n" +" When set to false, padding in input is not required.\n" " ignorechars\n" " A byte string containing characters to ignore from the input when\n" " strict_mode is true."); @@ -136,7 +139,8 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, - PyBytesObject *alphabet, Py_buffer *ignorechars); + int padded, PyBytesObject *alphabet, + Py_buffer *ignorechars); static PyObject * binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -144,7 +148,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -153,7 +157,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(strict_mode), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -162,17 +166,18 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "strict_mode", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base64", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int strict_mode = -1; + int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {NULL, NULL}; @@ -197,20 +202,29 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } } if (args[2]) { - if (!PyBytes_Check(args[2])) { - _PyArg_BadArgument("a2b_base64", "argument 'alphabet'", "bytes", args[2]); + padded = PyObject_IsTrue(args[2]); + if (padded < 0) { goto exit; } - alphabet = (PyBytesObject *)args[2]; if (!--noptargs) { goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (!PyBytes_Check(args[3])) { + _PyArg_BadArgument("a2b_base64", "argument 'alphabet'", "bytes", args[3]); + goto exit; + } + alphabet = (PyBytesObject *)args[3]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base64_impl(module, &data, strict_mode, alphabet, &ignorechars); + return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars); exit: /* Cleanup for data */ @@ -225,18 +239,21 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } PyDoc_STRVAR(binascii_b2a_base64__doc__, -"b2a_base64($module, data, /, *, wrapcol=0, newline=True,\n" +"b2a_base64($module, data, /, *, padded=True, wrapcol=0, newline=True,\n" " alphabet=BASE64_ALPHABET)\n" "--\n" "\n" -"Base64-code line of data."); +"Base64-code line of data.\n" +"\n" +" padded\n" +" When set to false, omit padding in the output."); #define BINASCII_B2A_BASE64_METHODDEF \ {"b2a_base64", _PyCFunction_CAST(binascii_b2a_base64), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base64__doc__}, static PyObject * -binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, - int newline, Py_buffer *alphabet); +binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int padded, + size_t wrapcol, int newline, Py_buffer *alphabet); static PyObject * binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -244,7 +261,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -253,7 +270,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(wrapcol), &_Py_ID(newline), &_Py_ID(alphabet), }, + .ob_item = { &_Py_ID(padded), &_Py_ID(wrapcol), &_Py_ID(newline), &_Py_ID(alphabet), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -262,16 +279,17 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "wrapcol", "newline", "alphabet", NULL}; + static const char * const _keywords[] = {"", "padded", "wrapcol", "newline", "alphabet", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "b2a_base64", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + int padded = 1; size_t wrapcol = 0; int newline = 1; Py_buffer alphabet = {NULL, NULL}; @@ -288,7 +306,8 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } if (args[1]) { - if (!_PyLong_Size_t_Converter(args[1], &wrapcol)) { + padded = PyObject_IsTrue(args[1]); + if (padded < 0) { goto exit; } if (!--noptargs) { @@ -296,7 +315,15 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } } if (args[2]) { - newline = PyObject_IsTrue(args[2]); + if (!_PyLong_Size_t_Converter(args[2], &wrapcol)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[3]) { + newline = PyObject_IsTrue(args[3]); if (newline < 0) { goto exit; } @@ -304,11 +331,11 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &alphabet, PyBUF_SIMPLE) != 0) { + if (PyObject_GetBuffer(args[4], &alphabet, PyBUF_SIMPLE) != 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_b2a_base64_impl(module, &data, wrapcol, newline, &alphabet); + return_value = binascii_b2a_base64_impl(module, &data, padded, wrapcol, newline, &alphabet); exit: /* Cleanup for data */ @@ -740,12 +767,14 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } PyDoc_STRVAR(binascii_a2b_base32__doc__, -"a2b_base32($module, data, /, *, alphabet=BASE32_ALPHABET,\n" +"a2b_base32($module, data, /, *, padded=True, alphabet=BASE32_ALPHABET,\n" " ignorechars=b\'\')\n" "--\n" "\n" "Decode a line of base32 data.\n" "\n" +" padded\n" +" When set to false, padding in input is not required.\n" " ignorechars\n" " A byte string containing characters to ignore from the input."); @@ -753,7 +782,7 @@ PyDoc_STRVAR(binascii_a2b_base32__doc__, {"a2b_base32", _PyCFunction_CAST(binascii_a2b_base32), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base32__doc__}, static PyObject * -binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, +binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, PyBytesObject *alphabet, Py_buffer *ignorechars); static PyObject * @@ -762,7 +791,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -771,7 +800,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -780,16 +809,17 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; @@ -805,20 +835,29 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } if (args[1]) { - if (!PyBytes_Check(args[1])) { - _PyArg_BadArgument("a2b_base32", "argument 'alphabet'", "bytes", args[1]); + padded = PyObject_IsTrue(args[1]); + if (padded < 0) { goto exit; } - alphabet = (PyBytesObject *)args[1]; if (!--noptargs) { goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[2]) { + if (!PyBytes_Check(args[2])) { + _PyArg_BadArgument("a2b_base32", "argument 'alphabet'", "bytes", args[2]); + goto exit; + } + alphabet = (PyBytesObject *)args[2]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base32_impl(module, &data, alphabet, &ignorechars); + return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars); exit: /* Cleanup for data */ @@ -833,17 +872,21 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } PyDoc_STRVAR(binascii_b2a_base32__doc__, -"b2a_base32($module, data, /, *, wrapcol=0, alphabet=BASE32_ALPHABET)\n" +"b2a_base32($module, data, /, *, padded=True, wrapcol=0,\n" +" alphabet=BASE32_ALPHABET)\n" "--\n" "\n" -"Base32-code line of data."); +"Base32-code line of data.\n" +"\n" +" padded\n" +" When set to false, omit padding in the output."); #define BINASCII_B2A_BASE32_METHODDEF \ {"b2a_base32", _PyCFunction_CAST(binascii_b2a_base32), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base32__doc__}, static PyObject * -binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, - Py_buffer *alphabet); +binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, int padded, + size_t wrapcol, Py_buffer *alphabet); static PyObject * binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -851,7 +894,7 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -860,7 +903,7 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(wrapcol), &_Py_ID(alphabet), }, + .ob_item = { &_Py_ID(padded), &_Py_ID(wrapcol), &_Py_ID(alphabet), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -869,16 +912,17 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "wrapcol", "alphabet", NULL}; + static const char * const _keywords[] = {"", "padded", "wrapcol", "alphabet", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "b2a_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + int padded = 1; size_t wrapcol = 0; Py_buffer alphabet = {NULL, NULL}; @@ -894,18 +938,27 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } if (args[1]) { - if (!_PyLong_Size_t_Converter(args[1], &wrapcol)) { + padded = PyObject_IsTrue(args[1]); + if (padded < 0) { goto exit; } if (!--noptargs) { goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[2], &alphabet, PyBUF_SIMPLE) != 0) { + if (args[2]) { + if (!_PyLong_Size_t_Converter(args[2], &wrapcol)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[3], &alphabet, PyBUF_SIMPLE) != 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_b2a_base32_impl(module, &data, wrapcol, &alphabet); + return_value = binascii_b2a_base32_impl(module, &data, padded, wrapcol, &alphabet); exit: /* Cleanup for data */ @@ -1581,4 +1634,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=197a0f70aa392d39 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2acab1ceb0058b1a input=a9049054013a1b77]*/ From c43b490ca3f511bbdb3d462587db6c506a410036 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sat, 4 Apr 2026 20:47:11 +0100 Subject: [PATCH 11/13] gh-145000: Find correct merge base in `reusable-check-html-ids.yml` workflow (#147975) --- .github/workflows/reusable-check-html-ids.yml | 45 +++++++++++-------- Tools/build/compute-changes.py | 2 +- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/.github/workflows/reusable-check-html-ids.yml b/.github/workflows/reusable-check-html-ids.yml index 4a1d321a8ce83e..4f827c55cacd06 100644 --- a/.github/workflows/reusable-check-html-ids.yml +++ b/.github/workflows/reusable-check-html-ids.yml @@ -15,11 +15,33 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 30 steps: - - name: 'Check out base commit' + - name: 'Check out PR head' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - ref: ${{ github.event.pull_request.base.sha }} + ref: ${{ github.event.pull_request.head.sha }} + - name: 'Find merge base' + id: merge-base + run: | + BASE="${{ github.event.pull_request.base.sha }}" + HEAD="${{ github.event.pull_request.head.sha }}" + git fetch --depth=$((${{ github.event.pull_request.commits }} + 10)) --no-tags origin "$BASE" "$HEAD" + + if ! MERGE_BASE=$(git merge-base "$BASE" "$HEAD" 2>/dev/null); then + git fetch --deepen=1 --no-tags origin "$BASE" "$HEAD" + + OLDEST=$(git rev-list --reflog --max-parents=0 --reverse "${BASE}^" "${HEAD}^" | head -1) + TIMESTAMP=$(git show --format=%at --no-patch "$OLDEST") + + git fetch --shallow-since="$TIMESTAMP" --no-tags origin "$BASE" "$HEAD" + + MERGE_BASE=$(git merge-base "$BASE" "$HEAD") + fi + echo "sha=$MERGE_BASE" >> "$GITHUB_OUTPUT" + - name: 'Create worktree at merge base' + env: + MERGE_BASE: ${{ steps.merge-base.outputs.sha }} + run: git worktree add /tmp/merge-base "$MERGE_BASE" --detach - name: 'Set up Python' uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: @@ -27,24 +49,11 @@ jobs: cache: 'pip' cache-dependency-path: 'Doc/requirements.txt' - name: 'Install build dependencies' - run: make -C Doc/ venv + run: make -C /tmp/merge-base/Doc/ venv - name: 'Build HTML documentation' - run: make -C Doc/ SPHINXOPTS="--quiet" html - - name: 'Check out PR head tools' - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - sparse-checkout: | - Doc/tools/check-html-ids.py - Doc/tools/removed-ids.txt - sparse-checkout-cone-mode: false - path: pr-head - - name: 'Use PR head tools' - run: | - cp pr-head/Doc/tools/check-html-ids.py Doc/tools/check-html-ids.py - [ -f pr-head/Doc/tools/removed-ids.txt ] && cp pr-head/Doc/tools/removed-ids.txt Doc/tools/removed-ids.txt + run: make -C /tmp/merge-base/Doc/ SPHINXOPTS="--quiet" html - name: 'Collect HTML IDs' - run: python Doc/tools/check-html-ids.py collect Doc/build/html -o /tmp/html-ids-base.json.gz + run: python Doc/tools/check-html-ids.py collect /tmp/merge-base/Doc/build/html -o /tmp/html-ids-base.json.gz - name: 'Download PR head HTML IDs' uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: diff --git a/Tools/build/compute-changes.py b/Tools/build/compute-changes.py index 4870388da0d8a5..53d7b8fe32f89e 100644 --- a/Tools/build/compute-changes.py +++ b/Tools/build/compute-changes.py @@ -238,7 +238,7 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: run_tests = run_ci_fuzz = run_ci_fuzz_stdlib = run_windows_tests = True has_platform_specific_change = False continue - if file.name == "reusable-docs.yml": + if file.name in ("reusable-docs.yml", "reusable-check-html-ids.yml"): run_docs = True continue if file.name == "reusable-windows.yml": From 4ff8b07a3d907c5a755a862a86496ed6c6fb2f3d Mon Sep 17 00:00:00 2001 From: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com> Date: Sat, 4 Apr 2026 20:55:05 +0100 Subject: [PATCH 12/13] gh-142927: Show self time in flamegraph tooltip (#147706) We already show self time in differential flamegraphs, but it should be included in regular flamegraphs as well. Display the time spent in the function body excluding callees, not just the total inclusive time. --- .../sampling/_flamegraph_assets/flamegraph.js | 11 ++++++++++- Lib/profiling/sampling/stack_collector.py | 1 + .../test_sampling_profiler/test_collectors.py | 4 +++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js index 166c03d03fbe5b..d7a8890d4a1ad9 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -292,6 +292,8 @@ function createPythonTooltip(data) { } const timeMs = (d.data.value / 1000).toFixed(2); + const selfSamples = d.data.self || 0; + const selfMs = (selfSamples / 1000).toFixed(2); const percentage = ((d.data.value / data.value) * 100).toFixed(2); const calls = d.data.calls || 0; const childCount = d.children ? d.children.length : 0; @@ -403,9 +405,14 @@ function createPythonTooltip(data) { ${fileLocationHTML}
- Execution Time: + Total Time: ${timeMs} ms + ${selfSamples > 0 ? ` + Self Time: + ${selfMs} ms + ` : ''} + Percentage: ${percentage}% @@ -1271,6 +1278,7 @@ function accumulateInvertedNode(parent, stackFrame, leaf, isDifferential) { const newNode = { name: stackFrame.name, value: 0, + self: 0, children: {}, filename: stackFrame.filename, lineno: stackFrame.lineno, @@ -1293,6 +1301,7 @@ function accumulateInvertedNode(parent, stackFrame, leaf, isDifferential) { const node = parent.children[key]; node.value += leaf.value; + node.self += stackFrame.self || 0; if (leaf.threads) { leaf.threads.forEach(t => node.threads.add(t)); } diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 31102d3eb0ffa6..461ce95a25874b 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -207,6 +207,7 @@ def convert_children(children, min_samples): child_entry = { "name": name_idx, "value": samples, + "self": node.get("self", 0), "children": [], "filename": filename_idx, "lineno": func[1], diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 86fb9d4c05b3bc..503430ddf02163 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -435,12 +435,14 @@ def test_flamegraph_collector_basic(self): strings = data.get("strings", []) name = resolve_name(data, strings) self.assertTrue(name.startswith("Program Root: ")) - self.assertIn("func2 (file.py:20)", name) # formatted name + self.assertIn("func2 (file.py:20)", name) + self.assertEqual(data["self"], 0) # non-leaf: no self time children = data.get("children", []) self.assertEqual(len(children), 1) child = children[0] self.assertIn("func1 (file.py:10)", resolve_name(child, strings)) self.assertEqual(child["value"], 1) + self.assertEqual(child["self"], 1) # leaf: all time is self def test_flamegraph_collector_export(self): """Test flamegraph HTML export functionality.""" From 1f36a510a2a16e8ff15572f44090c7db43bb7935 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Sun, 5 Apr 2026 06:31:54 +0800 Subject: [PATCH 13/13] Add `permissions: {}` to all reusable workflows (#148114) Add permissions: {} to all reusable workflows --- .github/workflows/reusable-check-c-api-docs.yml | 3 +-- .github/workflows/reusable-check-html-ids.yml | 3 +-- .github/workflows/reusable-cifuzz.yml | 2 ++ .github/workflows/reusable-context.yml | 2 ++ .github/workflows/reusable-docs.yml | 3 +-- .github/workflows/reusable-emscripten.yml | 2 ++ .github/workflows/reusable-macos.yml | 2 ++ .github/workflows/reusable-san.yml | 2 ++ .github/workflows/reusable-ubuntu.yml | 2 ++ .github/workflows/reusable-wasi.yml | 2 ++ .github/workflows/reusable-windows-msi.yml | 3 +-- .github/workflows/reusable-windows.yml | 2 ++ 12 files changed, 20 insertions(+), 8 deletions(-) diff --git a/.github/workflows/reusable-check-c-api-docs.yml b/.github/workflows/reusable-check-c-api-docs.yml index 49e5ef7f768b79..5fae57a1dbda36 100644 --- a/.github/workflows/reusable-check-c-api-docs.yml +++ b/.github/workflows/reusable-check-c-api-docs.yml @@ -3,8 +3,7 @@ name: Reusable C API Docs Check on: workflow_call: -permissions: - contents: read +permissions: {} env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-check-html-ids.yml b/.github/workflows/reusable-check-html-ids.yml index 4f827c55cacd06..03ed714ca585fe 100644 --- a/.github/workflows/reusable-check-html-ids.yml +++ b/.github/workflows/reusable-check-html-ids.yml @@ -3,8 +3,7 @@ name: Reusable check HTML IDs on: workflow_call: -permissions: - contents: read +permissions: {} env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-cifuzz.yml b/.github/workflows/reusable-cifuzz.yml index 339fca7919c27e..093b2c859eff7b 100644 --- a/.github/workflows/reusable-cifuzz.yml +++ b/.github/workflows/reusable-cifuzz.yml @@ -13,6 +13,8 @@ on: required: true type: string +permissions: {} + jobs: cifuzz: name: ${{ inputs.oss-fuzz-project-name }} (${{ inputs.sanitizer }}) diff --git a/.github/workflows/reusable-context.yml b/.github/workflows/reusable-context.yml index 0f0ca3475b320e..cc9841ebf32f27 100644 --- a/.github/workflows/reusable-context.yml +++ b/.github/workflows/reusable-context.yml @@ -54,6 +54,8 @@ on: # yamllint disable-line rule:truthy description: Whether to run the Windows tests value: ${{ jobs.compute-changes.outputs.run-windows-tests }} # bool +permissions: {} + jobs: compute-changes: name: Create context from changed files diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 0453b6ab555048..3d534feb2ed3ea 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -4,8 +4,7 @@ on: workflow_call: workflow_dispatch: -permissions: - contents: read +permissions: {} concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} diff --git a/.github/workflows/reusable-emscripten.yml b/.github/workflows/reusable-emscripten.yml index ce3e65f11a3282..300731deb78959 100644 --- a/.github/workflows/reusable-emscripten.yml +++ b/.github/workflows/reusable-emscripten.yml @@ -3,6 +3,8 @@ name: Reusable Emscripten on: workflow_call: +permissions: {} + env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 785dcc77f54227..a372d5715290db 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -12,6 +12,8 @@ on: required: true type: string +permissions: {} + env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-san.yml b/.github/workflows/reusable-san.yml index 4e2891ab9b7759..33cfd578d6819a 100644 --- a/.github/workflows/reusable-san.yml +++ b/.github/workflows/reusable-san.yml @@ -12,6 +12,8 @@ on: type: boolean default: false +permissions: {} + env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 87274a7b8a3848..b2ab525c976330 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -23,6 +23,8 @@ on: type: string default: '' +permissions: {} + env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 9bff508bd6664e..83f9d2399ce100 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -3,6 +3,8 @@ name: Reusable WASI on: workflow_call: +permissions: {} + env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index a74724323ec15f..7c724f184f3ef6 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -8,8 +8,7 @@ on: required: true type: string -permissions: - contents: read +permissions: {} env: FORCE_COLOR: 1 diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index 1c399689cde5b0..2cfe338a6525e6 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -17,6 +17,8 @@ on: required: true type: string +permissions: {} + env: FORCE_COLOR: 1 IncludeUwp: >-