Skip to content
242 changes: 209 additions & 33 deletions easybuild/easyblocks/generic/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,23 @@
import tempfile
from glob import glob
from pathlib import Path
from typing import Dict, List, Union

import easybuild.tools.environment as env
import easybuild.tools.systemtools as systemtools
from easybuild.framework.easyconfig import CUSTOM
from easybuild.framework.extensioneasyblock import ExtensionEasyBlock
from easybuild.tools import LooseVersion
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.tools.config import build_option
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256, compute_checksum, copy_dir, extract_file, mkdir
from easybuild.tools.filetools import read_file, remove_dir, write_file, which
from easybuild.tools.modules import get_software_version
from easybuild.tools.run import run_shell_cmd
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC

CRATESIO_SOURCE = "https://crates.io/api/v1/crates"
CRATES_REGISTRY_URL = 'registry+https://github.com/rust-lang/crates.io-index'

CONFIG_TOML_SOURCE_VENDOR = """
[source.vendored-sources]
Expand All @@ -75,22 +79,147 @@
replace-with = "vendored-sources"
"""

CONFIG_LOCK_SOURCE = """
[[package]]
name = "{name}"
version = "{version}"
source = "{source}"
# checksum intentionally not set
"""

CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'


def get_workspace_members(crate_dir: Path):
"""Find all members of a cargo workspace in crate_dir.
def parse_toml_list(value: str) -> List[str]:
"""Split a TOML list value"""
if not value.startswith('[') or not value.endswith(']'):
raise ValueError(f"'{value}' is not a TOML list")
value = value[1:-1].strip()
simple_str_markers = ('"""', "'''", "'")
current_value = ''
result = []
while value:
for marker in simple_str_markers:
if value.startswith(marker):
idx = value.index(marker, len(marker))
current_value += value[:idx + len(marker)]
value = value[idx + len(marker):].lstrip()
break
else:
if value.startswith('"'):
m = re.match(r'".*?(?<!\\)"', value, re.M)
current_value += m[0]
value = value[m.end():].lstrip()
# Not inside a string here
if value.startswith(','):
result.append(current_value)
current_value = ''
value = value[1:].lstrip()
else:
m = re.search('"|\'|,', value)
if m:
current_value += value[:m.start()].strip()
value = value[m.end():]
else:
current_value += value.strip()
break
if current_value:
result.append(current_value)
return result


def _clean_line(line: str, expected_end: Union[str, None]) -> str:
"""Remove comments and trim line"""
if '#' not in line:
return line.strip()
if expected_end is not None and expected_end[0] in ("'", '"'):
try:
idx = line.index(expected_end) + len(expected_end)
except ValueError:
return line.strip() # Ignore #-sign in multi-line string
else:
idx = 0
in_str = False
escaped = False
while idx < len(line):
c = line[idx]
if in_str:
if escaped:
if c == '\\':
escaped = False
elif c == '"':
in_str = False
elif c == '\\':
escaped = True
elif c == '#':
break
elif c == '"':
in_str = True
elif c == "'":
try:
idx = line.index("'", idx + 1)
except ValueError:
idx = len(line)
idx += 1
return line[:idx].strip()


def parse_toml(file_or_content: Union[Path, str]) -> Dict[str, str]:
"""Minimally parse a TOML file into sections, keys and values

Values will be the raw strings (including quotes for string-typed values)"""

result: Dict[str, Union[str, List[str]]] = {}
pending_key = None
pending_value = None
expected_end = None
current_section = None
content = read_file(file_or_content) if isinstance(file_or_content, Path) else file_or_content
line_num = raw_line = None
start_end = {
'[': ']',
'{': '}',
'"""': '"""',
"'''": "'''",
}
try:
for line_num, raw_line in enumerate(content.splitlines()): # noqa B007: line_num used in error only
line: str = _clean_line(raw_line, expected_end)
if not line:
continue
if pending_key is None and line.startswith("[") and line.endswith("]"):
current_section = line.strip()[1:-1].strip()
result.setdefault(current_section, {})
continue
if pending_key is None:
key, val = line.split("=", 1)
pending_key = key.strip()
pending_value = val.strip()
for start, end in start_end.items():
if pending_value.startswith(start):
expected_end = end
break
else:
expected_end = None
else:
pending_value += '\n' + line
if expected_end is None or (pending_value != expected_end and pending_value.endswith(expected_end)):
result[current_section][pending_key] = pending_value.strip()
pending_key = None
except Exception as e:
raise ValueError(f'Failed to parse {file_or_content}, error {e} at line {line_num}: {raw_line}')
return result


(Minimally) parse the Cargo.toml file.
def get_workspace_members(cargo_toml: Dict[str, str]):
"""Find all members of a cargo workspace in the parsed the Cargo.toml file.

Return a tuple: (has_package, workspace-members).
has_package determines if it is a virtual workspace ([workspace] and no [package])
workspace-members are all members (subfolder names) if it is a workspace, otherwise None
"""
cargo_toml = crate_dir / 'Cargo.toml'
lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
# A virtual (workspace) manifest has no [package], but only a [workspace] section.
has_package = '[package]' in lines
has_package = 'package' in cargo_toml

# We are looking for this:
# [workspace]
Expand All @@ -101,30 +230,15 @@ def get_workspace_members(crate_dir: Path):
# ]

try:
start_idx = lines.index('[workspace]')
except ValueError:
workspace = cargo_toml['workspace']
except KeyError:
return has_package, None
# Find "members = [" and concatenate the value, stop at end of section or file
member_str = None
for line in lines[start_idx + 1:]:
if line.startswith('#'):
continue # Skip comments
if re.match(r'\[\w+\]', line):
break # New section
if member_str is None:
m = re.match(r'members\s+=\s+\[', line)
if m:
member_str = line[m.end():]
else:
member_str += line
# Stop if we reach the end of the list
if member_str is not None and member_str.endswith(']'):
member_str = member_str[:-1]
break
if member_str is None:
try:
member_strs = parse_toml_list(workspace['members'])
except (KeyError, ValueError):
raise EasyBuildError('Failed to find members in %s', cargo_toml)
# Split at commas after removing possibly trailing ones and remove the quotes
members = [member.strip().strip('"') for member in member_str.rstrip(',').split(',')]
# Remove the quotes
members = [member.strip('"') for member in member_strs]
# Sanity check that we didn't pick up anything unexpected
invalid_members = [member for member in members if not re.match(r'(\w|-)+', member)]
if invalid_members:
Expand All @@ -133,6 +247,39 @@ def get_workspace_members(crate_dir: Path):
return has_package, members


def merge_sub_crate(cargo_toml_path: Path, workspace_toml: Dict[str, str]):
"""Resolve workspace references in the Cargo.toml file"""
# Lines such as 'authors.workspace = true' must be replaced by 'authors = <value from workspace.package>'
content: str = read_file(cargo_toml_path)
SUFFIX = '.workspace' # Suffix of keys that refer to workspace values
if 'workspace = true' not in content:
return
cargo_toml = parse_toml(content)
lines = content.splitlines()

def do_replacement(section, workspace_section):
if not section or not workspace_section:
return

for key, value in section.items():
if (key.endswith(SUFFIX) and value == 'true') or value == '{ workspace = true }':
real_key = key[:-len(SUFFIX)] if key.endswith(SUFFIX) else key
new_value = workspace_section[real_key]
try:
idx = next(idx for idx, line in enumerate(lines)
if line.lstrip().startswith(f'{key} =') and value in line)
except StopIteration:
raise ValueError(f"Failed to find line for key '{key}' while merging {cargo_toml_path}")
lines[idx] = f'{real_key} = {new_value}'

do_replacement(cargo_toml.get('package'), workspace_toml.get('workspace.package'))
do_replacement(cargo_toml.get('dependencies'), workspace_toml.get('workspace.dependencies'))
do_replacement(cargo_toml.get('build-dependencies'), workspace_toml.get('workspace.dependencies'))
do_replacement(cargo_toml.get('dev-dependencies'), workspace_toml.get('workspace.dependencies'))

write_file(cargo_toml_path, '\n'.join(lines))


def get_checksum(src, log):
"""Get the checksum from an extracted source"""
checksum = src['checksum']
Expand Down Expand Up @@ -354,7 +501,8 @@ def _setup_offline_config(self, git_sources):
tmp_dir = Path(tempfile.mkdtemp(dir=self.builddir, prefix='tmp_crate_'))
# Add checksum file for each crate such that it is recognized by cargo.
# Glob to catch multiple folders in a source archive.
for crate_dir in (p.parent for p in Path(self.vendor_dir).glob('*/Cargo.toml')):
for cargo_toml in Path(self.vendor_dir).glob('*/Cargo.toml'):
crate_dir = cargo_toml.parent
src = path_to_source.get(str(crate_dir))
if src:
try:
Expand All @@ -372,7 +520,8 @@ def _setup_offline_config(self, git_sources):
# otherwise (Only "[workspace]" section and no "[package]" section)
# we have to remove the top-level folder or cargo fails with:
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest"
has_package, members = get_workspace_members(crate_dir)
parsed_toml = parse_toml(cargo_toml)
has_package, members = get_workspace_members(parsed_toml)
if members:
self.log.info(f'Found workspace in {crate_dir}. Members: ' + ', '.join(members))
if not any((crate_dir / crate).is_dir() for crate in members):
Expand All @@ -397,6 +546,8 @@ def _setup_offline_config(self, git_sources):
# Use copy_dir to resolve symlinks that might point to the parent folder
copy_dir(tmp_crate_dir / member, target_path, symlinks=False)
cargo_pkg_dirs.append(target_path)
self.log.info(f'Resolving workspace values for crate {member}')
merge_sub_crate(target_path / 'Cargo.toml', parsed_toml)
if has_package:
# Remove the copied crate folders
for member in members:
Expand Down Expand Up @@ -470,8 +621,33 @@ def prepare_step(self, *args, **kwargs):
self.set_cargo_vars()

def configure_step(self):
"""Empty configuration step."""
pass
"""Create lockfile if it doesn't exist"""
cargo_lock = 'Cargo.lock'
if self.crates and os.path.exists('Cargo.toml') and not os.path.exists(cargo_lock):
root_toml = run_shell_cmd('cargo locate-project --message-format=plain --workspace').output
cargo_lock_path = os.path.join(os.path.dirname(root_toml), cargo_lock)
if not os.path.exists(cargo_lock_path):
rust_version = LooseVersion(get_software_version('Rust'))
# File format version, oldest supported used for compatibility
if rust_version <= '1.37':
version = 1
elif rust_version <= '1.81':
version = 2
else:
version = 3
# Use vendored crates to ensure those versions are used
self.log.info(f"No {cargo_lock} file found, creating one at {cargo_lock_path}")
content = f'version = {version}\n'
for crate_info in self.crates:
if len(crate_info) == 2:
name, version = crate_info
source = CRATES_REGISTRY_URL
else:
name, version, repo, rev = crate_info
source = f'git+{repo}?rev={rev}#{rev}'

content += CONFIG_LOCK_SOURCE.format(name=name, version=version, source=source)
write_file(cargo_lock_path, content)

@property
def profile(self):
Expand Down Expand Up @@ -561,7 +737,7 @@ def generate_crate_list(sourcedir):
if name == app_name:
app_in_cratesio = True # exclude app itself, needs to be first in crates list or taken from pypi
else:
if source_url == 'registry+https://github.com/rust-lang/crates.io-index':
if source_url == CRATES_REGISTRY_URL:
crates.append((name, version))
else:
# Lock file has revision and branch in the url
Expand Down
5 changes: 5 additions & 0 deletions easybuild/easyblocks/generic/cargopythonpackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,8 @@ def extra_options(extra_vars=None):
def extract_step(self):
"""Specifically use the overloaded variant from Cargo as is populates vendored sources with checksums."""
return Cargo.extract_step(self)

def configure_step(self):
"""Run configure for Cargo and PythonPackage"""
Cargo.configure_step(self)
PythonPackage.configure_step(self)
Loading