Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import random
import struct
import tempfile
import unittest
from collections import defaultdict
Expand Down Expand Up @@ -806,6 +807,35 @@ def test_invalid_file_path(self):
reader.replay_samples(RawCollector())


class TestBinaryFormatValidation(BinaryFormatTestBase):
"""Tests for malformed binary files."""

HDR_OFF_THREADS = 32

def test_replay_rejects_more_threads_than_declared(self):
"""Replay rejects files with more unique threads than the header declares."""
threads = [
make_thread(1, [make_frame("t1.py", 10, "t1")]),
make_thread(2, [make_frame("t2.py", 20, "t2")]),
]
samples = [[make_interpreter(0, threads)]]
filename = self.create_binary_file(samples, compression="none")

with open(filename, "r+b") as raw:
raw.seek(self.HDR_OFF_THREADS)
raw.write(struct.pack("=I", 1))

with BinaryReader(filename) as reader:
self.assertEqual(reader.get_info()["thread_count"], 1)
with self.assertRaises(ValueError) as cm:
reader.replay_samples(RawCollector())
self.assertEqual(
str(cm.exception),
"Invalid thread count: sample data contains more unique "
"threads than declared in header (declared 1, found at least 2)",
)


class TestBinaryEncodings(BinaryFormatTestBase):
"""Tests specifically targeting different stack encodings."""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Validate that :mod:`profiling.sampling` binary profiles do not contain more
unique (thread, interpreter) pairs than declared in the header. Patch by
Maurycy Pawłowski-Wieroński.
8 changes: 8 additions & 0 deletions Modules/_remote_debugging/binary_io_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,14 @@ reader_get_or_create_thread_state(BinaryReader *reader, uint64_t thread_id,
}
}

if (reader->thread_state_count >= reader->thread_count) {
Copy link
Copy Markdown
Contributor Author

@maurycy maurycy Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pablogsal

Wondering if there should be no hard cap but it's pairs and...

const size_t MAX_THREADS_PER_INTERP = 8192;

const size_t MAX_INTERPRETERS = 256;

...the multiplication is still large.

PyErr_Format(PyExc_ValueError,
"Invalid thread count: sample data contains more unique threads than declared in header "
"(declared %u, found at least %zu)",
reader->thread_count, reader->thread_state_count + 1);
return NULL;
}

if (!reader->thread_states) {
reader->thread_state_capacity = 16;
reader->thread_states = PyMem_Calloc(reader->thread_state_capacity, sizeof(ReaderThreadState));
Expand Down
Loading