Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
79d5e02
Implement minimal multi-process support (fork/exec/waitpid/pipe)
wdcui Apr 26, 2026
2d4f009
Phase 5-6: Fork-aware FD lifecycle, cross-process signals, SIGPIPE
wdcui Apr 26, 2026
a4168b6
Close-on-exec: close FD_CLOEXEC file descriptors during execve
wdcui Apr 26, 2026
7d09afb
Orphan reparenting: reparent children to init (pid 1) on parent exit
wdcui Apr 26, 2026
e5554f4
Process groups: setpgid/getpgid/getpgrp/setsid syscalls, kill to pgid
wdcui Apr 26, 2026
21aada1
execve PATH resolution: search $PATH for binaries without '/'
wdcui Apr 26, 2026
a8074f1
Multi-threaded fork guard: reject fork when process has >1 threads
wdcui Apr 26, 2026
64db6ea
waitpid: support specific PID, process group, and own-group waits
wdcui Apr 26, 2026
fa161fc
Pipe readv/writev: implement pipe paths for scatter/gather I/O
wdcui Apr 26, 2026
8e74f79
Tests for kill/signal, waitpid WNOHANG, and PATH resolution
wdcui Apr 26, 2026
7110122
Fix review issues: fork bomb protection, signal mailbox cap, error ha…
wdcui Apr 26, 2026
8ac8ef5
Fix review round 2: wait4 race, PID/TID collision, error handling
wdcui Apr 26, 2026
6acd1a6
Fix review round 3: vfork_done on detach failure, double cloexec, loc…
wdcui Apr 26, 2026
f28ad45
fix: resolve all clippy errors for CI (-Dwarnings)
wdcui Apr 26, 2026
5ac9dd4
fix: CI failures — fmt, if-let match guard, Windows AddressSpaceProvider
wdcui Apr 26, 2026
c525826
fix: CI failures — Windows AddressSpaceKind path, let_needless_return…
wdcui Apr 26, 2026
83a32d2
fix: make exit_process graceful when PID not in registry, fix Windows…
wdcui Apr 26, 2026
4efc636
refactor: make fd/mod.rs OS-agnostic — remove POSIX-specific naming a…
wdcui May 1, 2026
5fc0566
refactor: merge increment_process_refcounts into clone_storage_for_child
wdcui May 1, 2026
2d8c427
refactor: inline clone_for_child_selective into clone_storage_for_child
wdcui May 1, 2026
b7250cf
refactor: move clone_for_child to RawDescriptorStorage where it belongs
wdcui May 1, 2026
e3451bb
refactor: rename on_close to on_ref_removed for symmetry with on_ref_…
wdcui May 1, 2026
88c41b0
refactor: remove dead on_ref_added/on_ref_removed hooks from FdEnable…
wdcui May 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 137 additions & 13 deletions litebox/src/fd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,30 @@ impl<Platform: RawSyncPrimitivesProvider> Descriptors<Platform> {
/// Removes the entry at `fd`, closing out the file descriptor.
///
/// Returns the descriptor entry if it is unique (i.e., it was not duplicated, or all duplicates
/// have been cleared out).
/// have been cleared out) AND no other process holds a fork reference to this slot.
///
/// If the `fd` was already closed out, then (obviously) it does not return an entry.
#[expect(
clippy::missing_panics_doc,
reason = "panics only on invariant violation"
)]
pub fn remove<Subsystem: FdEnabledSubsystem>(
&mut self,
fd: &TypedFd<Subsystem>,
) -> Option<Subsystem::Entry> {
let Some(old) = self.entries[fd.x.as_usize()?].take() else {
unreachable!();
};
let idx = fd.x.as_usize()?;
let entry = self.entries[idx].as_mut().unwrap();
fd.x.mark_as_closed();

assert!(entry.process_refcount > 0);
entry.process_refcount -= 1;
if entry.process_refcount > 0 {
// Another process still references this slot — don't remove the entry.
return None;
}

// Last fork reference — truly vacate the slot.
let old = self.entries[idx].take().unwrap();
Arc::into_inner(old.x)
.map(RwLock::into_inner)
.map(DescriptorEntry::into_subsystem_entry::<Subsystem>)
Expand All @@ -137,9 +150,18 @@ impl<Platform: RawSyncPrimitivesProvider> Descriptors<Platform> {
can_close_immediately: F,
) -> Option<CloseResult<Subsystem>> {
let idx = fd.x.as_usize()?;
let Some(old) = self.entries[idx].take() else {
unreachable!();
};
let entry = self.entries[idx].as_mut().unwrap();

// If another process holds a fork reference, just decrement and don't truly close.
assert!(entry.process_refcount > 0);
if entry.process_refcount > 1 {
fd.x.mark_as_closed();
entry.process_refcount -= 1;
return Some(CloseResult::SharedDecremented);
}

// process_refcount == 1: this is the last process. Proceed with normal close logic.
let old = self.entries[idx].take().unwrap();
if Arc::strong_count(&old.x) == 1 {
// Unique, so we can just return it if allowed.
if can_close_immediately(old.x.read().as_subsystem::<Subsystem>()) {
Expand All @@ -157,7 +179,7 @@ impl<Platform: RawSyncPrimitivesProvider> Descriptors<Platform> {
}
} else {
fd.x.mark_as_closed();
// Shared, so we need to duplicate it.
// Shared (via dup), so we need to duplicate it.
let old = self.entries[idx].replace(old);
assert!(old.is_none());
Some(CloseResult::Duplicated(TypedFd {
Expand Down Expand Up @@ -188,23 +210,26 @@ impl<Platform: RawSyncPrimitivesProvider> Descriptors<Platform> {
) -> Vec<Subsystem::Entry> {
// Each FD corresponds to an `IndividualEntry`, which has an Arc to a `DescriptorEntry`. If
// we have the same number of FDs as matching to the strong-count of a descriptor entry,
// AND the slot has process_refcount == 1 (no other process references it),
// then it must be the case that we have everything needed to close the entries out.
let removable_entries: Vec<*const RwLock<_, _>> = {
let mut strong_count_and_count = HashMap::<*const _, (usize, usize)>::new();
let mut strong_count_and_count = HashMap::<*const _, (usize, usize, bool)>::new();
for fd in fds.iter() {
let entry = &self.entries[fd.x.as_usize().unwrap()];
// It would not be "incorrect" to see a closed out entry, but as it currently stands, I
// believe that we'll only see alive entries, so this `unwrap` is confirming that; if we
// need to expand it out, we'd simply have a `continue` here.
let entry = entry.as_ref().unwrap();
strong_count_and_count
let has_shared_refs = entry.process_refcount > 1;
let record = strong_count_and_count
.entry(Arc::as_ptr(&entry.x))
.or_insert((Arc::strong_count(&entry.x), 0))
.1 += 1;
.or_insert((Arc::strong_count(&entry.x), 0, false));
record.1 += 1;
record.2 |= has_shared_refs;
}
strong_count_and_count
.into_iter()
.filter(|(_ptr, (sc, c))| sc == c)
.filter(|(_ptr, (sc, c, has_fork))| sc == c && !has_fork)
.map(|(ptr, _)| ptr)
.collect()
};
Expand Down Expand Up @@ -514,6 +539,26 @@ impl<Platform: RawSyncPrimitivesProvider> Descriptors<Platform> {
.metadata
.insert(metadata)
}

/// Returns the indices of all live entries whose per-FD metadata of type `T` satisfies `pred`.
///
/// **Important**: These are slot indices into `Descriptors.entries`, NOT raw FD numbers.
/// To get raw FD numbers matching metadata, use
/// [`RawDescriptorStorage::raw_fds_matching_metadata`] instead.
pub fn indices_matching_metadata<T: core::any::Any + Send + Sync>(
&self,
pred: impl Fn(&T) -> bool,
) -> alloc::vec::Vec<usize> {
self.entries
.iter()
.enumerate()
.filter_map(|(idx, slot)| {
let entry = slot.as_ref()?;
let matches = entry.metadata.get::<T>().is_some_and(&pred);
matches.then_some(idx)
})
.collect()
}
}

/// A handle to a descriptor entry (via [`Descriptors::entry_handle`]) that can be used without
Expand Down Expand Up @@ -542,6 +587,9 @@ pub(crate) enum CloseResult<Subsystem: FdEnabledSubsystem> {
Duplicated(TypedFd<Subsystem>),
/// The FD was unique but couldn't be closed immediately (e.g., due to pending data)
Deferred,
/// Another process still holds a reference to this slot. The process_refcount
/// was decremented and the FD was marked closed; no further action needed.
SharedDecremented,
}

/// Safe(r) conversions between safely-typed file descriptors and unsafely-typed integers.
Expand Down Expand Up @@ -676,13 +724,85 @@ impl RawDescriptorStorage {
self.stored_fds.get(fd).is_some_and(Option::is_some)
}

/// Clone this FD table for a child process, optionally selecting which raw FD
/// indices to inherit, and increment the process reference counts in the
/// provided [`Descriptors`] for all inherited slots.
///
/// - `inherit = None` — inherit all open FDs (bulk inheritance).
/// - `inherit = Some(fds)` — inherit only the listed raw FD indices (selective
/// inheritance). Indices not present in the slice are skipped.
/// - `inherit = Some(&[])` — inherit nothing (child gets an empty FD table).
///
/// Each slot in the new storage gets a **new, independent** `OwnedFd`
/// (with the same raw index as the parent's), avoiding shared `AtomicBool`
/// poisoning when either process closes the FD independently.
#[must_use]
#[expect(
clippy::missing_panics_doc,
reason = "panics only on invariant violation (slot must exist during child creation)"
)]
pub fn clone_for_child<Platform: RawSyncPrimitivesProvider>(
&self,
descriptors: &mut Descriptors<Platform>,
inherit: Option<&[usize]>,
) -> Self {
let mut stored_fds = Vec::with_capacity(self.stored_fds.len());
for (fd_index, slot) in self.stored_fds.iter().enumerate() {
let cloned = slot.as_ref().and_then(|stored| {
if inherit.is_some_and(|fds| !fds.contains(&fd_index)) {
return None;
}
let raw = stored
.x
.as_usize()
.expect("FD should not be closed during child creation");
let entry = descriptors.entries[raw]
.as_mut()
.expect("child creation: descriptor slot must exist");
entry.process_refcount += 1;
Some(StoredFd {
x: Arc::new(OwnedFd::new(raw)),
subsystem_entry_type_id: stored.subsystem_entry_type_id,
})
});
stored_fds.push(cloned);
}
Self { stored_fds }
}

/// Returns an iterator over raw integer indices that are currently alive (i.e., occupied).
pub fn iter_alive(&self) -> impl Iterator<Item = usize> + '_ {
self.stored_fds
.iter()
.enumerate()
.filter_map(|(i, slot)| slot.as_ref().map(|_| i))
}

/// Returns raw FD numbers whose corresponding `Descriptors` slot has per-FD metadata
/// of type `T` satisfying `pred`.
///
/// This resolves the raw FD → slot index mapping correctly, unlike
/// [`Descriptors::indices_matching_metadata`] which returns slot indices.
pub fn raw_fds_matching_metadata<
Platform: RawSyncPrimitivesProvider,
T: core::any::Any + Send + Sync,
>(
&self,
descriptors: &Descriptors<Platform>,
pred: impl Fn(&T) -> bool,
) -> alloc::vec::Vec<usize> {
self.stored_fds
.iter()
.enumerate()
.filter_map(|(raw_fd, slot)| {
let stored = slot.as_ref()?;
let slot_idx = stored.x.as_usize()?;
let entry = descriptors.entries.get(slot_idx)?.as_ref()?;
let matches = entry.metadata.get::<T>().is_some_and(&pred);
matches.then_some(raw_fd)
})
.collect()
}
}

macro_rules! multi_subsystem_generic {
Expand Down Expand Up @@ -787,6 +907,9 @@ pub enum MetadataError {
struct IndividualEntry<Platform: RawSyncPrimitivesProvider> {
x: Arc<RwLock<Platform, DescriptorEntry>>,
metadata: AnyMap,
/// Number of processes referencing this slot (incremented on fork, decremented on close).
/// Starts at 1 when created or duplicated. When this reaches 0, the slot is truly vacated.
process_refcount: usize,
}
impl<Platform: RawSyncPrimitivesProvider> core::ops::Deref for IndividualEntry<Platform> {
type Target = Arc<RwLock<Platform, DescriptorEntry>>;
Expand All @@ -799,6 +922,7 @@ impl<Platform: RawSyncPrimitivesProvider> IndividualEntry<Platform> {
Self {
x,
metadata: AnyMap::new(),
process_refcount: 1,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions litebox/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub mod net;
pub mod path;
pub mod pipes;
pub mod platform;
pub mod process;
pub mod shim;
pub mod sync;
pub mod tls;
Expand Down
68 changes: 44 additions & 24 deletions litebox/src/mm/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,30 +304,53 @@ pub(super) struct Vmem<Platform: PageManagementProvider<ALIGN> + 'static, const
pub(super) brk: usize,
/// Virtual memory areas.
vmas: RangeMap<usize, VmArea>,
/// Minimum valid address for this address space.
pub(super) addr_min: usize,
/// Maximum valid address (exclusive) for this address space.
pub(super) addr_max: usize,
}

impl<Platform: PageManagementProvider<ALIGN> + 'static, const ALIGN: usize> Vmem<Platform, ALIGN> {
pub(super) const STACK_GUARD_GAP: usize = 256 << 12;

/// Create a new [`Vmem`] instance with the given memory [backend](PageManagementProvider).
/// Create a new [`Vmem`] instance using the platform's default address range.
pub(super) fn new(platform: &'static Platform) -> Self {
Self::new_with_range(platform, Platform::TASK_ADDR_MIN..Platform::TASK_ADDR_MAX)
}

/// Create a new [`Vmem`] instance scoped to the given VA range.
///
/// Used for multi-process support where each process gets a VA partition.
pub(super) fn new_with_range(
platform: &'static Platform,
range: core::ops::Range<usize>,
) -> Self {
assert!(
range.start.is_multiple_of(ALIGN) && range.end.is_multiple_of(ALIGN),
"Vmem: address range must be aligned to {ALIGN} bytes"
);
let mut vmem = Self {
vmas: RangeMap::new(),
brk: 0,
platform,
addr_min: range.start,
addr_max: range.end,
};
for each in platform.reserved_pages() {
assert!(
each.start % ALIGN == 0 && each.end % ALIGN == 0,
"Vmem: reserved range is not aligned to {ALIGN} bytes"
);
vmem.vmas.insert(
each.start..each.end,
VmArea {
flags: VmFlags::empty(),
is_file_backed: false,
},
);
// Only insert reserved pages that fall within our range
if each.start >= range.start && each.end <= range.end {
assert!(
each.start % ALIGN == 0 && each.end % ALIGN == 0,
"Vmem: reserved range is not aligned to {ALIGN} bytes"
);
vmem.vmas.insert(
each.start..each.end,
VmArea {
flags: VmFlags::empty(),
is_file_backed: false,
},
);
}
}
vmem
}
Expand Down Expand Up @@ -453,10 +476,10 @@ impl<Platform: PageManagementProvider<ALIGN> + 'static, const ALIGN: usize> Vmem
fixed_address_behavior: FixedAddressBehavior,
) -> Result<Platform::RawMutPointer<u8>, AllocationError> {
let (start, end) = (suggested_range.start, suggested_range.end);
if start < Platform::TASK_ADDR_MIN {
if start < self.addr_min {
return Err(AllocationError::BelowMinAddress);
}
if end > Platform::TASK_ADDR_MAX {
if end > self.addr_max {
return Err(AllocationError::AboveMaxAddress);
}
let platform_fixed_address_behavior = match fixed_address_behavior {
Expand Down Expand Up @@ -518,8 +541,8 @@ impl<Platform: PageManagementProvider<ALIGN> + 'static, const ALIGN: usize> Vmem
let new_start = ret.as_usize();
let new_end = new_start + suggested_range.len();
self.vmas.insert(new_start..new_end, vma);
debug_assert!(new_start >= Platform::TASK_ADDR_MIN);
debug_assert!(new_end <= Platform::TASK_ADDR_MAX);
debug_assert!(new_start >= self.addr_min);
debug_assert!(new_end <= self.addr_max);
Ok(ret)
}

Expand Down Expand Up @@ -890,11 +913,11 @@ impl<Platform: PageManagementProvider<ALIGN> + 'static, const ALIGN: usize> Vmem
fixed_addr: bool,
) -> Option<usize> {
let size = length.as_usize();
if size > Platform::TASK_ADDR_MAX {
if size > self.addr_max.saturating_sub(self.addr_min) {
return None;
}
if let Some(suggested_address) = suggested_address {
if (Platform::TASK_ADDR_MAX - size) < suggested_address.0 {
if (self.addr_max - size) < suggested_address.0 {
return None;
}
if fixed_addr
Expand All @@ -912,12 +935,9 @@ impl<Platform: PageManagementProvider<ALIGN> + 'static, const ALIGN: usize> Vmem

// top down
// 1. check [last_end, TASK_SIZE_MAX)
let (low_limit, high_limit) = (
Platform::TASK_ADDR_MIN,
Platform::TASK_ADDR_MAX - length.as_usize(),
);
debug_assert!(Platform::TASK_ADDR_MIN % ALIGN == 0);
debug_assert!(Platform::TASK_ADDR_MAX % ALIGN == 0);
let (low_limit, high_limit) = (self.addr_min, self.addr_max - length.as_usize());
debug_assert!(self.addr_min.is_multiple_of(ALIGN));
debug_assert!(self.addr_max.is_multiple_of(ALIGN));
let last_end = self.vmas.last_range_value().map_or(low_limit, |r| r.0.end);
if last_end <= high_limit {
return Some(high_limit);
Expand Down
Loading
Loading