diff --git a/audit/audit.ts b/audit/audit.ts index 347d78d3..a0609f7a 100755 --- a/audit/audit.ts +++ b/audit/audit.ts @@ -46,9 +46,19 @@ async function audit_provides(pkg: Package) { for (const provide of await pantry.project(pkg).provides()) { const name = moustaches.apply(provide, versionMap) - const bin = path.join('bin', name) - const sbin = path.join('sbin', name) - if (!bin.isExecutableFile() && !sbin.isExecutableFile()) { + // Windows targets ship binaries with `.exe` suffix. Recipes can + // keep `provides: - bin/foo` without enumerating both — we look + // for the unsuffixed name first (Linux/macOS), then fall back to + // `foo.exe` (windows/*). Cost: one extra stat per provide on + // non-Windows; zero false positives since `.exe` files are + // unconventional on POSIX. + const candidates = [ + path.join('bin', name), + path.join('sbin', name), + path.join('bin', name + '.exe'), + path.join('sbin', name + '.exe'), + ] + if (!candidates.some(p => p.isExecutableFile())) { missing.push([pkg.project, name]) } } diff --git a/lib/bin/fix-pe.ts b/lib/bin/fix-pe.ts new file mode 100755 index 00000000..261f6d3b --- /dev/null +++ b/lib/bin/fix-pe.ts @@ -0,0 +1,164 @@ +#!/usr/bin/env -S pkgx +llvm.org/mingw-w64 deno^1 run -A + +// fix-pe — Windows PE/COFF analog of fix-elf. +// +// PE has NO RPATH equivalent: Windows resolves DLL imports via a fixed +// search order (app dir → System32 → SysWOW64 → %PATH%). There's no +// $ORIGIN, no DT_RPATH, no DT_RUNPATH. So "relocation" of a bottle +// happens entirely by LAYOUT (DLLs co-located with the exe), not by +// patching metadata. +// +// What this script DOES do: +// +// 1. Strip "+brewing" from any embedded strings in PE binaries (same +// problem as glibc's libc.so linker scripts in pkgxdev/pantry#12968: +// the build-time install prefix leaks into binaries via debug-info +// records, manifest paths, etc.). +// +// 2. Strip absolute build paths from .pdb debug-info records (where +// present) — these are non-load-bearing but leak filesystem info. +// +// 3. Audit-only: verify every NEEDED DLL is either Windows-shipped +// (kernel32, ntdll, ucrtbase, vcruntime140, …) or co-located in +// the bottle. Warn (don't fail) if neither is true. +// +// What this script does NOT do: +// +// - Rewrite import paths (Windows DLL search is by basename, not path) +// - Insert "RPATH-equivalent" hints (none exist in PE/COFF) +// - Run patchelf-style surgery on relocations (PE handles its own) +// +// The hermeticity story for Windows is `libexec/bkwinvenv seal` (which +// moves bin/*.exe → libexec/*.exe and emits .cmd wrappers) — orthogonal +// to this pass, just as bklibcvenv is orthogonal to fix-elf on Linux. +// +// Refs: pkgxdev/brewkit#346, #344 (bklibcvenv), #345 (per-package skip). + +import { utils, Installation, hooks, Path } from "pkgx" +const { useCellar } = hooks +const { host } = utils + +// Windows-resident DLLs that the loader always finds; we don't need +// to bundle these and we don't warn when an exe imports them. +const SYSTEM_DLLS = new Set([ + // Kernel + standard runtime + "kernel32.dll", "ntdll.dll", "user32.dll", "gdi32.dll", "advapi32.dll", + "msvcrt.dll", "ucrtbase.dll", + // VC++ redistributable (assumed present from VS runtime install) + "vcruntime140.dll", "vcruntime140_1.dll", "msvcp140.dll", "concrt140.dll", + // Networking / crypto + "ws2_32.dll", "crypt32.dll", "bcrypt.dll", "secur32.dll", + // Common system-level + "shell32.dll", "ole32.dll", "oleaut32.dll", "comctl32.dll", + "iphlpapi.dll", "dbghelp.dll", "psapi.dll", "version.dll", +]) + +const SYSTEM_DLL_PREFIXES = [ + "api-ms-win-", // UCRT API-set forwarders (api-ms-win-crt-runtime-l1-1-0.dll etc.) + "ext-ms-", +] + +function is_system_dll(name: string): boolean { + const lc = name.toLowerCase() + if (SYSTEM_DLLS.has(lc)) return true + return SYSTEM_DLL_PREFIXES.some(p => lc.startsWith(p)) +} + +if (import.meta.main) { + if (host().platform != "windows") { + // fix-pe runs even on Linux/macOS hosts when we cross-compile to + // Windows. The Installation path will contain .exe / .dll files + // that we want to audit + sanitize. + } + const cellar = useCellar() + const [installation_path] = Deno.args + const installed = await cellar.resolve(new Path(installation_path)) + await fix_pe(installed) +} + +export default async function fix_pe(installation: Installation) { + console.info("auditing PE binaries…") + + for await (const path of pe_files(installation.path)) { + await strip_brewing_strings(path) + await audit_imports(path, installation) + } +} + +// Iterate every regular file under installation/{bin,lib,libexec,sbin} +// whose first two bytes are 'MZ' (PE/COFF DOS header). +async function* pe_files(root: Path): AsyncIterable { + for (const dir of ["bin", "lib", "libexec", "sbin"]) { + const d = root.join(dir) + if (!d.isDirectory()) continue + for await (const entry of d.walk()) { + if (!entry.isFile) continue + const f = await Deno.open(entry.path.string, { read: true }) + try { + const magic = new Uint8Array(2) + await f.read(magic) + if (magic[0] === 0x4d && magic[1] === 0x5a) { + yield entry.path + } + } finally { + f.close() + } + } + } +} + +// Replace any literal "+brewing" string in the binary with "" (just +// like the libc.so linker-script fix in pkgxdev/pantry#12968 but +// applied to binary files via stream rewrite, length-preserving). +// +// We use length-preserving substitution by replacing the +brewing +// bytes with NULs. That keeps file size + offsets stable, so PE +// section layout doesn't shift. The NULs are inert as path chars +// (Windows treats path-with-NUL as truncated). +async function strip_brewing_strings(path: Path) { + const data = await Deno.readFile(path.string) + const target = new TextEncoder().encode("+brewing") + let modified = false + outer: + for (let i = 0; i <= data.length - target.length; i++) { + for (let j = 0; j < target.length; j++) { + if (data[i + j] !== target[j]) continue outer + } + // Match found at offset i — zero out. + for (let j = 0; j < target.length; j++) data[i + j] = 0 + modified = true + } + if (modified) { + await Deno.writeFile(path.string, data) + console.info(`stripped +brewing from ${path}`) + } +} + +// llvm-readobj --coff-imports outputs each NEEDED DLL on a "Name: foo.dll" line. +// We audit (warn-only) that every non-system import is either in the bottle +// or somewhere brewkit can resolve via build.dependencies. +async function audit_imports(path: Path, installation: Installation) { + const cmd = new Deno.Command("llvm-readobj", { + args: ["--coff-imports", path.string], + stdout: "piped", stderr: "null", + }) + const { stdout } = await cmd.output() + const text = new TextDecoder().decode(stdout) + const imports = [...text.matchAll(/^\s*Name:\s+(\S+)/gm)].map(m => m[1]) + + const unresolved: string[] = [] + for (const dll of imports) { + if (is_system_dll(dll)) continue + // Co-located check: is this DLL also in the bottle? + const here = installation.path.join("bin").join(dll) + const libexec = installation.path.join("libexec").join(dll) + if (here.exists() || libexec.exists()) continue + unresolved.push(dll) + } + + if (unresolved.length) { + console.warn(`${path}: unresolved DLL imports: ${unresolved.join(", ")}`) + console.warn(` (not in bottle, not a known Windows-system DLL)`) + console.warn(` bottle may need to bundle these via bkwinvenv seal`) + } +} diff --git a/libexec/bkwinvenv b/libexec/bkwinvenv new file mode 100755 index 00000000..b221c5f9 --- /dev/null +++ b/libexec/bkwinvenv @@ -0,0 +1,144 @@ +#!/usr/bin/env -S pkgx +llvm-readobj +install bash +# +# bkwinvenv — hermetic Windows-binary bundler (analog of bkpyvenv). +# +# Sister of libexec/bkpyvenv, addressing the same need on Windows +# targets: a recipe-side helper that turns a freshly-installed +# bin/*.exe + dependent DLLs into a relocatable, hermetic bundle +# whose binaries always use the bottle's bundled DLLs regardless +# of the host's DLL search order. +# +# Phases: +# +# bkwinvenv stage +# Currently a no-op (placeholder for future hooks). Reserved for +# future CC/CXX/LD/LIB wiring if we end up needing per-recipe +# toolchain setup (analog of bkpyvenv's "create venv" phase). +# +# bkwinvenv seal +# For each ELF/PE executable in $prefix/bin/*.exe: +# 1. walk its PE import table (llvm-readobj --coff-imports) +# 2. for each transitive DLL dep that's NOT Windows-shipped +# (i.e. not kernel32, ntdll, ucrtbase, vcruntime140, etc.): +# - locate it in the brewkit env's lib paths +# - copy it into $prefix/libexec/ +# 3. move the exe to $prefix/libexec/$(basename $exe) +# 4. write $prefix/bin/$(basename $exe .exe).cmd: +# @echo off +# "%~dp0\..\libexec\" %* +# +# Why this works without explicit LoadLibrary calls: +# Windows' DLL search order checks the directory of the exe first. +# Co-locating bundled DLLs with the inner exe in libexec/ gets them +# resolved automatically — analogous to a Linux bottle using +# $ORIGIN-relative RPATH. +# +# Comparison vs bkpyvenv: +# bkpyvenv bkwinvenv +# ───────── ───────────── +# .venv/lib/ libexec/ +# Python stub .cmd wrapper +# pip install PE-import walk +# +# Refs: pkgxdev/brewkit#346 (Windows RFC), +# pkgxdev/brewkit#344 (bklibcvenv — same shape for Linux glibc). + +set -eo pipefail + +CMD=$1 +shift + +PREFIX=$1 +shift + +# Windows-resident DLLs we never bundle (kernel + standard runtime). +# Conservative list — expandable, basename-only match (case-insensitive +# since Windows filesystems usually are). +SYSTEM_DLLS=( + kernel32.dll ntdll.dll user32.dll gdi32.dll advapi32.dll + msvcrt.dll ucrtbase.dll vcruntime140.dll vcruntime140_1.dll + msvcp140.dll concrt140.dll + ws2_32.dll crypt32.dll bcrypt.dll secur32.dll + shell32.dll ole32.dll oleaut32.dll comctl32.dll + iphlpapi.dll dbghelp.dll psapi.dll version.dll + api-ms-win-crt-* # UCRT API-set forwarders +) + +is_system_dll() { + local name lc + name=$(basename "$1") + lc=$(echo "$name" | tr '[:upper:]' '[:lower:]') + for sys in "${SYSTEM_DLLS[@]}"; do + case "$lc" in $sys) return 0 ;; esac + done + return 1 +} + +# Walk PE imports recursively. Outputs unique DLL basenames (lowercased). +walk_imports() { + local exe=$1 seen=$2 + llvm-readobj --coff-imports "$exe" 2>/dev/null | \ + awk '/^ Name:/ { print tolower($2) }' | \ + while read dll; do + [ -z "$dll" ] && continue + grep -qFx "$dll" "$seen" 2>/dev/null && continue + echo "$dll" >> "$seen" + echo "$dll" + # Recurse: find DLL on host, walk its imports too. + for d in "$PREFIX/libexec" "$PREFIX/bin" $LD_LIBRARY_PATH; do + [ -f "$d/$dll" ] && walk_imports "$d/$dll" "$seen" && break + done + done +} + +set -x + +case $CMD in + stage) + # Placeholder. Future: wire CC/CXX/LD via the Windows SDK directive. + echo "bkwinvenv stage: no-op (reserved for future toolchain wiring)" + ;; + + seal) + mkdir -p "$PREFIX/libexec" + SEEN=$(mktemp) + trap "rm -f $SEEN" EXIT + + for exe in "$PREFIX/bin/"*.exe; do + [ -f "$exe" ] || continue + name=$(basename "$exe") + stem=${name%.exe} + + echo "::group::sealing $name" + + # 1. walk imports, copy non-system DLLs into libexec/ + walk_imports "$exe" "$SEEN" | while read dll; do + is_system_dll "$dll" && continue + # Find dll on host + for d in "$PREFIX/lib" "$PREFIX/bin" $LD_LIBRARY_PATH; do + if [ -f "$d/$dll" ]; then + cp -n "$d/$dll" "$PREFIX/libexec/$dll" + echo " bundled: $dll" + break + fi + done + done + + # 2. move the exe itself into libexec/ + mv "$exe" "$PREFIX/libexec/$name" + + # 3. write the .cmd wrapper + cat > "$PREFIX/bin/$stem.cmd" <&2 + exit 64 + ;; +esac