diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 29f83c843561cd..ba13dfe1f6f7bf 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -2,6 +2,7 @@ import os import random +import struct import tempfile import unittest from collections import defaultdict @@ -806,6 +807,35 @@ def test_invalid_file_path(self): reader.replay_samples(RawCollector()) +class TestBinaryFormatValidation(BinaryFormatTestBase): + """Tests for malformed binary files.""" + + HDR_OFF_THREADS = 32 + + def test_replay_rejects_more_threads_than_declared(self): + """Replay rejects files with more unique threads than the header declares.""" + threads = [ + make_thread(1, [make_frame("t1.py", 10, "t1")]), + make_thread(2, [make_frame("t2.py", 20, "t2")]), + ] + samples = [[make_interpreter(0, threads)]] + filename = self.create_binary_file(samples, compression="none") + + with open(filename, "r+b") as raw: + raw.seek(self.HDR_OFF_THREADS) + raw.write(struct.pack("=I", 1)) + + with BinaryReader(filename) as reader: + self.assertEqual(reader.get_info()["thread_count"], 1) + with self.assertRaises(ValueError) as cm: + reader.replay_samples(RawCollector()) + self.assertEqual( + str(cm.exception), + "Invalid thread count: sample data contains more unique " + "threads than declared in header (declared 1, found at least 2)", + ) + + class TestBinaryEncodings(BinaryFormatTestBase): """Tests specifically targeting different stack encodings.""" diff --git a/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst new file mode 100644 index 00000000000000..e2f078742760a5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst @@ -0,0 +1,3 @@ +Validate that :mod:`profiling.sampling` binary profiles do not contain more +unique (thread, interpreter) pairs than declared in the header. Patch by +Maurycy Pawłowski-Wieroński. diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index 6c32ef70ac3f65..b90d9a53cb18a2 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -563,6 +563,14 @@ reader_get_or_create_thread_state(BinaryReader *reader, uint64_t thread_id, } } + if (reader->thread_state_count >= reader->thread_count) { + PyErr_Format(PyExc_ValueError, + "Invalid thread count: sample data contains more unique threads than declared in header " + "(declared %u, found at least %zu)", + reader->thread_count, reader->thread_state_count + 1); + return NULL; + } + if (!reader->thread_states) { reader->thread_state_capacity = 16; reader->thread_states = PyMem_Calloc(reader->thread_state_capacity, sizeof(ReaderThreadState));