Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions pre_commit/xargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@
TArg = TypeVar('TArg')
TRet = TypeVar('TRet')

# POSIX minimum for ARG_MAX in bytes (see POSIX.1-2008); Python does not expose
# _POSIX_ARG_MAX, so we define it here once and reuse it.
_POSIX_ARG_MAX = 4096

# Practical upper bound for command-line argument length (128 KB).
# Even though SC_ARG_MAX can be much larger on modern systems, this provides
# a conservative limit that works reliably across different shells and tools.
_PRACTICAL_ARG_MAX = 2 ** 17


def cpu_count() -> int:
try:
Expand All @@ -48,14 +57,27 @@ def _environ_size(_env: MutableMapping[str, str] | None = None) -> int:

def _get_platform_max_length() -> int: # pragma: no cover (platform specific)
if os.name == 'posix':
maximum = os.sysconf('SC_ARG_MAX') - 2048 - _environ_size()
maximum = max(min(maximum, 2 ** 17), 2 ** 12)
return maximum
# Get the base ARG_MAX value from sysconf, or fall back to POSIX minimum
try:
arg_max_base = os.sysconf('SC_ARG_MAX')
except OSError:
# Fall back to the POSIX minimum when sysconf is unavailable or
# blocked by a sandbox (for example, Cursor's restricted
# environment where SC_ARG_MAX raises PermissionError).
arg_max_base = _POSIX_ARG_MAX

# Subtract headroom and environment size (shared sub-expression)
headroom_and_environ = 2048 + _environ_size()
calculated_length = arg_max_base - headroom_and_environ

# Clamp to reasonable bounds: minimum _POSIX_ARG_MAX, maximum _PRACTICAL_ARG_MAX
clamped_length = max(min(calculated_length, _PRACTICAL_ARG_MAX), _POSIX_ARG_MAX)
return clamped_length
elif os.name == 'nt':
return 2 ** 15 - 2048 # UNICODE_STRING max - headroom
else:
# posix minimum
return 2 ** 12
return _POSIX_ARG_MAX


def _command_length(*cmd: str) -> int:
Expand Down Expand Up @@ -134,14 +156,17 @@ def xargs(
*,
color: bool = False,
target_concurrency: int = 1,
_max_length: int = _get_platform_max_length(),
_max_length: int | None = None,
**kwargs: Any,
) -> tuple[int, bytes]:
"""A simplified implementation of xargs.

color: Make a pty if on a platform that supports it
target_concurrency: Target number of partitions to run concurrently
"""
if _max_length is None:
_max_length = _get_platform_max_length()

cmd_fn = cmd_output_p if color else cmd_output_b
retcode = 0
stdout = b''
Expand Down
173 changes: 173 additions & 0 deletions tests/xargs_with_blocked_sysconf_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
"""Tests for pre_commit.xargs when os.sysconf is blocked in a sandbox environment."""
from __future__ import annotations

import subprocess
import sys
import textwrap

# flake8: noqa: E501

# Error message when os.sysconf is blocked in the test subprocess
BLOCKED_SYSCONF_ERROR = 'blocked'


def _create_import_test_script() -> str:
"""Create a Python script that tests importing pre_commit.xargs with blocked os.sysconf.

This script must run in a subprocess (not in the main pytest process) because:
1. We need to patch os.sysconf BEFORE any imports happen
2. conftest.py already imports pre_commit.xargs in the main process
3. A subprocess gives us a clean import state to test against
"""
return textwrap.dedent(f"""
import importlib
import os
from unittest import mock

def main():
# Force a posix-style environment.
os.name = 'posix'

# Block os.sysconf so that any SC_ARG_MAX lookup fails with OSError.
with mock.patch.object(os, 'sysconf', side_effect=OSError('{BLOCKED_SYSCONF_ERROR}')):
# This should NOT raise with the fixed implementation.
importlib.import_module('pre_commit.xargs')

if __name__ == '__main__':
main()
""")


def test_import_module_without_throwing_exception_as_subprocess() -> None:
"""Test that pre_commit.xargs can be imported when os.sysconf is blocked.

This test uses a subprocess to avoid the conftest.py chicken/egg problem:
- Phase 1 (this function): Sets up and runs a subprocess with blocked os.sysconf
- Phase 2 (subprocess script): Attempts to import pre_commit.xargs in that environment

With the current buggy implementation, the subprocess crashes because os.sysconf
is called at import time. After the fix, the subprocess should succeed.
"""
# Create the test script that will run in a subprocess
script = _create_import_test_script()

# Run the script in a fresh subprocess where we control os.sysconf
proc = subprocess.run(
[sys.executable, '-c', script],
capture_output=True,
text=True,
cwd='/Users/michael/repos/pre-commit',
)

# Assert the subprocess succeeded (import worked without exception)
# Test-first: This test FAILS with current buggy behavior (subprocess crashes).
# After fix, subprocess should succeed (returncode == 0).
assert proc.returncode == 0, (
f'Subprocess failed to import pre_commit.xargs when os.sysconf is blocked. '
f'This exposes the bug: os.sysconf is called at import time. '
f'stderr: {proc.stderr!r}'
)


def _create_xargs_test_script(use_explicit_max_length: bool = False) -> str:
"""Create a Python script that tests xargs.xargs() with blocked os.sysconf.

This script must run in a subprocess (not in the main pytest process) because:
1. We need to patch os.sysconf BEFORE any imports happen
2. conftest.py already imports pre_commit.xargs in the main process
3. A subprocess gives us a clean import state to test against

Args:
use_explicit_max_length: If True, pass explicit _max_length parameter.
If False, rely on lazy evaluation of default.
"""
max_length_param = (
', _max_length=4096' if use_explicit_max_length else ''
)
return textwrap.dedent(f"""
import os
import sys
from unittest import mock

def main():
# Force a posix-style environment.
os.name = 'posix'

# Block os.sysconf so that any SC_ARG_MAX lookup fails with OSError.
with mock.patch.object(os, 'sysconf', side_effect=OSError('{BLOCKED_SYSCONF_ERROR}')):
# Import after patching os.sysconf
from pre_commit import xargs

# Test that xargs actually works - call it with a simple command
retcode, stdout = xargs.xargs(
('echo',),
('hello', 'world'){max_length_param}
)

# Verify it succeeded and produced expected output
if retcode != 0:
sys.exit(1)
if b'hello world' not in stdout:
sys.exit(2)
# Success - xargs worked despite blocked sysconf

if __name__ == '__main__':
main()
""")


def test_xargs_without_max_length_when_sysconf_blocked_as_subprocess() -> None:
"""Test that xargs.xargs() works without _max_length when os.sysconf is blocked.

This test verifies that lazy evaluation of _max_length handles blocked sysconf
correctly. When _max_length is not provided, xargs should call
_get_platform_max_length() at runtime, which should handle OSError gracefully.

This test uses a subprocess to avoid the conftest.py chicken/egg problem.
"""
# Create the test script that will run in a subprocess
script = _create_xargs_test_script(use_explicit_max_length=False)

# Run the script in a fresh subprocess where we control os.sysconf
proc = subprocess.run(
[sys.executable, '-c', script],
capture_output=True,
text=True,
cwd='/Users/michael/repos/pre-commit',
)

# Assert the subprocess succeeded (xargs worked without exception)
assert proc.returncode == 0, (
f'Subprocess failed to run xargs.xargs() without _max_length when '
f'os.sysconf is blocked. This exposes the bug: lazy evaluation of '
f'_max_length calls _get_platform_max_length() which may fail. '
f'stderr: {proc.stderr!r}'
)


def test_xargs_with_explicit_max_length_when_sysconf_blocked_as_subprocess() -> None:
"""Test that xargs.xargs() works with explicit _max_length when os.sysconf is blocked.

This test verifies that when _max_length is provided explicitly, xargs works
correctly even when os.sysconf is blocked. This tests the code path where
_get_platform_max_length() is never called.

This test uses a subprocess to avoid the conftest.py chicken/egg problem.
"""
# Create the test script that will run in a subprocess
script = _create_xargs_test_script(use_explicit_max_length=True)

# Run the script in a fresh subprocess where we control os.sysconf
proc = subprocess.run(
[sys.executable, '-c', script],
capture_output=True,
text=True,
cwd='/Users/michael/repos/pre-commit',
)

# Assert the subprocess succeeded (xargs worked with explicit _max_length)
assert proc.returncode == 0, (
f'Subprocess failed to run xargs.xargs() with explicit _max_length when '
f'os.sysconf is blocked. This should always work since _get_platform_max_length() '
f'is never called. stderr: {proc.stderr!r}'
)
Loading