diff --git a/crates/openshell-sandbox/src/procfs.rs b/crates/openshell-sandbox/src/procfs.rs index e488a8b51..a6dd379b8 100644 --- a/crates/openshell-sandbox/src/procfs.rs +++ b/crates/openshell-sandbox/src/procfs.rs @@ -399,6 +399,52 @@ mod tests { use super::*; use std::io::Write; + /// Block until `/proc//exe` points at `target`. `Command::spawn` returns + /// once the child is scheduled, not once it has completed `exec()`; on + /// contended runners the readlink can still show the parent (test harness) + /// binary for a brief window. Byte-level `starts_with` tolerates the kernel's + /// `" (deleted)"` suffix on unlinked executables. + #[cfg(target_os = "linux")] + fn wait_for_child_exec(pid: i32, target: &std::path::Path) { + use std::os::unix::ffi::OsStrExt as _; + let target_bytes = target.as_os_str().as_bytes(); + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(2); + loop { + if let Ok(link) = std::fs::read_link(format!("/proc/{pid}/exe")) + && link.as_os_str().as_bytes().starts_with(target_bytes) + { + return; + } + assert!( + std::time::Instant::now() < deadline, + "child pid {pid} did not exec into {target:?} within 2s" + ); + std::thread::sleep(std::time::Duration::from_millis(10)); + } + } + + /// Retry `Command::spawn` on `ETXTBSY`. The kernel rejects `execve` when + /// `inode->i_writecount > 0`, and the release of that counter after the + /// writer fd is closed isn't synchronous with `close(2)` under contention — + /// so the very-next-instruction `execve` can still race it. Any other error + /// surfaces immediately. + #[cfg(target_os = "linux")] + fn spawn_retrying_on_etxtbsy(cmd: &mut std::process::Command) -> std::process::Child { + let mut attempts = 0; + loop { + match cmd.spawn() { + Ok(child) => return child, + Err(err) + if err.kind() == std::io::ErrorKind::ExecutableFileBusy && attempts < 20 => + { + attempts += 1; + std::thread::sleep(std::time::Duration::from_millis(50)); + } + Err(err) => panic!("spawn failed after {attempts} ETXTBSY retries: {err}"), + } + } + } + #[test] fn file_sha256_computes_correct_hash() { let mut tmp = tempfile::NamedTempFile::new().unwrap(); @@ -457,11 +503,11 @@ mod tests { // child is still running. The child keeps the exec mapping via // `/proc//exe`, but readlink will now return the tainted // " (deleted)" string. - let mut child = std::process::Command::new(&exe_path) - .arg("5") - .spawn() - .unwrap(); + let mut cmd = std::process::Command::new(&exe_path); + cmd.arg("5"); + let mut child = spawn_retrying_on_etxtbsy(&mut cmd); let pid: i32 = child.id().cast_signed(); + wait_for_child_exec(pid, &exe_path); std::fs::remove_file(&exe_path).unwrap(); // Sanity check: the raw readlink should contain " (deleted)". @@ -507,11 +553,11 @@ mod tests { std::fs::copy("/bin/sleep", &exe_path).unwrap(); std::fs::set_permissions(&exe_path, std::fs::Permissions::from_mode(0o755)).unwrap(); - let mut child = std::process::Command::new(&exe_path) - .arg("5") - .spawn() - .unwrap(); + let mut cmd = std::process::Command::new(&exe_path); + cmd.arg("5"); + let mut child = spawn_retrying_on_etxtbsy(&mut cmd); let pid: i32 = child.id().cast_signed(); + wait_for_child_exec(pid, &exe_path); // File is still linked — binary_path must return the path unchanged, // suffix and all. @@ -537,9 +583,8 @@ mod tests { #[test] fn binary_path_strips_suffix_for_non_utf8_filename() { use std::ffi::OsString; - use std::io::Write; use std::os::unix::ffi::{OsStrExt, OsStringExt}; - use std::os::unix::fs::{OpenOptionsExt, PermissionsExt}; + use std::os::unix::fs::PermissionsExt; let tmp = tempfile::TempDir::new().unwrap(); // 0xFF is not valid UTF-8. Build the filename on raw bytes. @@ -548,27 +593,14 @@ mod tests { raw_name.extend_from_slice(b".bin"); let exe_path = tmp.path().join(OsString::from_vec(raw_name)); - // Write bytes explicitly (instead of `std::fs::copy`) with an - // explicit `sync_all()` + scope drop so the write fd is fully closed - // before we `exec()` the file. Otherwise concurrent tests can race - // the kernel into returning ETXTBSY on spawn. - let bytes = std::fs::read("/bin/sleep").expect("read /bin/sleep"); - { - let mut f = std::fs::OpenOptions::new() - .write(true) - .create_new(true) - .mode(0o755) - .open(&exe_path) - .expect("create non-UTF-8 target file"); - f.write_all(&bytes).expect("write bytes"); - f.sync_all().expect("sync_all before exec"); - } + std::fs::copy("/bin/sleep", &exe_path).unwrap(); + std::fs::set_permissions(&exe_path, std::fs::Permissions::from_mode(0o755)).unwrap(); - let mut child = std::process::Command::new(&exe_path) - .arg("5") - .spawn() - .unwrap(); + let mut cmd = std::process::Command::new(&exe_path); + cmd.arg("5"); + let mut child = spawn_retrying_on_etxtbsy(&mut cmd); let pid: i32 = child.id().cast_signed(); + wait_for_child_exec(pid, &exe_path); std::fs::remove_file(&exe_path).unwrap(); // Sanity: raw readlink ends with " (deleted)" and is not valid UTF-8.