Skip to content

Commit 70dc9fe

Browse files
authored
Support large numbers of input files to Pyright. (#22901)
Pyright can read input file paths from stdin. This helps avoid exceeding the max command line length. This change uses this feature when possible, to support large numbers of input files. Fixes #22779
1 parent 730539e commit 70dc9fe

File tree

3 files changed

+68
-2
lines changed

3 files changed

+68
-2
lines changed

docs/notes/2.31.x.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ The Python Build Standalone backend (`pants.backend.python.providers.experimenta
4949

5050
The default interpreter constraints for tools have been standardized to require Python from 3.9 to 3.14. The default lockfiles have been generated to match. No top-level subsystem tools were updated, but several tool dependencies were. The most notable being `protobuf` updated from 6.33.0 to [6.33.1](https://pypi.org/project/protobuf/6.33.1/).
5151

52+
Pyright support has been improved so that a large number of input files no longer causes an error.
53+
5254
#### Shell
5355

5456
#### Javascript

src/python/pants/backend/python/typecheck/pyright/rules.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33

44
from __future__ import annotations
55

6+
import dataclasses
67
import json
78
import logging
89
import os
10+
import shlex
911
from collections.abc import Iterable
1012
from dataclasses import dataclass, replace
1113

@@ -44,6 +46,7 @@
4446
from pants.core.util_rules import config_files
4547
from pants.core.util_rules.config_files import ConfigFiles, find_config_file
4648
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
49+
from pants.core.util_rules.system_binaries import CatBinary, ShBinary
4750
from pants.engine.collection import Collection
4851
from pants.engine.fs import CreateDigest, FileContent
4952
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
@@ -166,6 +169,8 @@ async def pyright_typecheck_partition(
166169
pyright: Pyright,
167170
pex_environment: PexEnvironment,
168171
nodejs: NodeJS,
172+
sh_binary: ShBinary,
173+
cat_binary: CatBinary,
169174
) -> CheckResult:
170175
root_sources_get = determine_source_files(
171176
SourceFilesRequest(fs.sources for fs in partition.field_sets)
@@ -233,6 +238,9 @@ async def pyright_typecheck_partition(
233238
config_files, requirements_venv_pex.venv_rel_dir, transitive_sources.source_roots
234239
)
235240

241+
# Prepare the process with as much information as we currently have. This will give us the
242+
# process's cwd, which we need in order to calculate the relative paths to the input files.
243+
# We will then manually tweak the argv before actually running.
236244
input_digest = await merge_digests(
237245
MergeDigests(
238246
[
@@ -242,21 +250,65 @@ async def pyright_typecheck_partition(
242250
]
243251
)
244252
)
245-
246253
process = await prepare_tool_process(
247254
pyright.request(
248255
args=(
249256
f"--venvpath={complete_pex_env.pex_root}", # Used with `venv` in config
250257
*pyright.args, # User-added arguments
251-
*(os.path.join("{chroot}", file) for file in root_sources.snapshot.files),
258+
"-", # Read input file paths from stdin
252259
),
253260
input_digest=input_digest,
254261
description=f"Run Pyright on {pluralize(len(root_sources.snapshot.files), 'file')}.",
255262
level=LogLevel.DEBUG,
256263
),
257264
**implicitly(),
258265
)
266+
267+
# We must use relative paths, because we don't know the abspath of the sandbox the process
268+
# will run in, and `{chroot}` interpolation only works on argv, not on the contents of
269+
# __files.txt (see below). Pyright interprets relpaths as relative to its cwd, so we
270+
# prepend the appropriate prefix to each file path.
271+
input_path_prefix = os.path.relpath(".", process.working_directory)
272+
input_files = [os.path.join(input_path_prefix, file) for file in root_sources.snapshot.files]
273+
274+
# We prefer to pass the list of input files via stdin, as large numbers of files can cause us
275+
# to exceed the max command line length. See https://github.com/pantsbuild/pants/issues/22779.
276+
# However Pyright, weirdly, splits stdin on spaces as well as newlines. So we can't pass input
277+
# file paths via stdin if any of them contain spaces.
278+
file_with_spaces = next((file for file in root_sources.snapshot.files if " " in file), None)
279+
if file_with_spaces:
280+
# Fall back to passing paths as args and hope we don't exceed the max command line length.
281+
process = dataclasses.replace(process, argv=(*process.argv[0:-1], *input_files))
282+
else:
283+
# Write the input files out to a text file.
284+
file_list_path = "__files.txt"
285+
file_list_content = "\n".join(input_files).encode()
286+
file_list_digest = await create_digest(
287+
CreateDigest([FileContent(file_list_path, file_list_content)])
288+
)
289+
input_digest = await merge_digests(
290+
MergeDigests(
291+
[
292+
process.input_digest,
293+
file_list_digest,
294+
]
295+
)
296+
)
297+
# Run the underlying process inside a shell script that cats the file list to stdin.
298+
shell_script = f"{cat_binary.path} {os.path.join(input_path_prefix, file_list_path)} | {shlex.join(process.argv)}"
299+
process = dataclasses.replace(
300+
process, argv=(sh_binary.path, "-c", shell_script), input_digest=input_digest
301+
)
302+
259303
result = await execute_process(process, **implicitly())
304+
if result.exit_code == 249 and file_with_spaces:
305+
logger.error(
306+
f"Found input files with spaces in their names, including: {file_with_spaces}. "
307+
"Due to a bug in Pyright this means that the number of input files Pants can pass to "
308+
"Pyright is limited, and exceeding that limit causes it to crash with exit code 249. "
309+
"Please reach out to the Pants team if this happens: "
310+
"https://www.pantsbuild.org/community/getting-help."
311+
)
260312
return CheckResult.from_fallible_process_result(
261313
result,
262314
partition_description=partition.description(),

src/python/pants/backend/python/typecheck/pyright/rules_integration_test.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,18 @@ def test_failing(rule_runner: PythonRuleRunner) -> None:
171171
assert result[0].report == EMPTY_DIGEST
172172

173173

174+
def test_spaces_in_filenames(rule_runner: PythonRuleRunner) -> None:
175+
rule_runner.write_files(
176+
{f"{PACKAGE}/f f.py": GOOD_FILE, f"{PACKAGE}/BUILD": "python_sources()"}
177+
)
178+
tgt = rule_runner.get_target(Address(PACKAGE, relative_file_path="f f.py"))
179+
result = run_pyright(rule_runner, [tgt])
180+
assert len(result) == 1
181+
assert result[0].exit_code == 0
182+
assert "0 errors" in result[0].stdout
183+
assert result[0].report == EMPTY_DIGEST
184+
185+
174186
def test_multiple_targets(rule_runner: PythonRuleRunner) -> None:
175187
rule_runner.write_files(
176188
{

0 commit comments

Comments
 (0)