diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml new file mode 100644 index 000000000..d08a208bd --- /dev/null +++ b/.github/workflows/release-vm-dev.yml @@ -0,0 +1,518 @@ +name: Release VM Dev + +# Build openshell-vm binaries for all supported platforms and upload them to +# the rolling "vm-dev" GitHub Release. Each binary is self-extracting: it +# embeds pre-built kernel runtime artifacts (from release-vm-kernel.yml) and a +# base rootfs tarball. +# +# Prerequisites: the vm-dev release must already contain kernel runtime +# tarballs. Run the "Release VM Kernel" workflow first if they are missing. + +on: + push: + branches: [main] + workflow_dispatch: + +permissions: + contents: write + packages: read + +# Serialize with release-vm-kernel.yml — both update the vm-dev release. +concurrency: + group: vm-dev-release + cancel-in-progress: false + +defaults: + run: + shell: bash + +jobs: + # --------------------------------------------------------------------------- + # Compute versions (reuse the same logic as release-dev.yml) + # --------------------------------------------------------------------------- + compute-versions: + name: Compute Versions + runs-on: build-amd64 + timeout-minutes: 5 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + outputs: + cargo_version: ${{ steps.v.outputs.cargo }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Fetch tags + run: git fetch --tags --force + + - name: Compute versions + id: v + run: | + set -euo pipefail + echo "cargo=$(uv run python tasks/scripts/release.py get-version --cargo)" >> "$GITHUB_OUTPUT" + + # --------------------------------------------------------------------------- + # Download kernel runtime tarballs from the vm-dev release + # --------------------------------------------------------------------------- + download-kernel-runtime: + name: Download Kernel Runtime + runs-on: build-amd64 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - name: Download all runtime tarballs + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + mkdir -p runtime-artifacts + + for platform in linux-aarch64 linux-x86_64 darwin-aarch64; do + echo "Downloading vm-runtime-${platform}.tar.zst..." + gh release download vm-dev \ + --repo "${GITHUB_REPOSITORY}" \ + --pattern "vm-runtime-${platform}.tar.zst" \ + --dir runtime-artifacts \ + --clobber + done + + echo "Downloaded runtime artifacts:" + ls -lah runtime-artifacts/ + + - name: Verify downloads + run: | + set -euo pipefail + for platform in linux-aarch64 linux-x86_64 darwin-aarch64; do + file="runtime-artifacts/vm-runtime-${platform}.tar.zst" + if [ ! -f "$file" ]; then + echo "ERROR: Missing ${file}" >&2 + echo "" >&2 + echo "The vm-dev release does not have kernel runtime artifacts." >&2 + echo "Run the 'Release VM Kernel' workflow first:" >&2 + echo " gh workflow run release-vm-kernel.yml" >&2 + exit 1 + fi + echo "OK: ${file} ($(du -sh "$file" | cut -f1))" + done + + - name: Upload as workflow artifact + uses: actions/upload-artifact@v4 + with: + name: kernel-runtime-tarballs + path: runtime-artifacts/vm-runtime-*.tar.zst + retention-days: 1 + + # --------------------------------------------------------------------------- + # Build base rootfs tarballs (architecture-specific) + # --------------------------------------------------------------------------- + build-rootfs: + name: Build Rootfs (${{ matrix.arch }}) + needs: [compute-versions] + strategy: + matrix: + include: + - arch: arm64 + runner: build-arm64 + guest_arch: aarch64 + - arch: amd64 + runner: build-amd64 + guest_arch: x86_64 + runs-on: ${{ matrix.runner }} + timeout-minutes: 30 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --privileged + volumes: + - /var/run/docker.sock:/var/run/docker.sock + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENSHELL_IMAGE_TAG: dev + steps: + - uses: actions/checkout@v4 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Log in to GHCR + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Install tools + run: mise install + + - name: Build base rootfs tarball + run: | + set -euo pipefail + crates/openshell-vm/scripts/build-rootfs.sh \ + --base \ + --arch ${{ matrix.guest_arch }} \ + target/rootfs-build + + mkdir -p target/vm-runtime-compressed + tar -C target/rootfs-build -cf - . \ + | zstd -19 -T0 -o target/vm-runtime-compressed/rootfs.tar.zst + + echo "Rootfs tarball: $(du -sh target/vm-runtime-compressed/rootfs.tar.zst | cut -f1)" + + - name: Upload rootfs artifact + uses: actions/upload-artifact@v4 + with: + name: rootfs-${{ matrix.arch }} + path: target/vm-runtime-compressed/rootfs.tar.zst + retention-days: 1 + + # --------------------------------------------------------------------------- + # Build openshell-vm binary (Linux — native on each arch) + # --------------------------------------------------------------------------- + build-vm-linux: + name: Build VM (Linux ${{ matrix.arch }}) + needs: [compute-versions, download-kernel-runtime, build-rootfs] + strategy: + matrix: + include: + - arch: arm64 + runner: build-arm64 + target: aarch64-unknown-linux-gnu + platform: linux-aarch64 + guest_arch: aarch64 + - arch: amd64 + runner: build-amd64 + target: x86_64-unknown-linux-gnu + platform: linux-x86_64 + guest_arch: x86_64 + runs-on: ${{ matrix.runner }} + timeout-minutes: 30 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --privileged + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }} + OPENSHELL_IMAGE_TAG: dev + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Fetch tags + run: git fetch --tags --force + + - name: Install tools + run: mise install + + - name: Cache Rust target and registry + uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2 + with: + shared-key: vm-linux-${{ matrix.arch }} + cache-directories: .cache/sccache + cache-targets: "true" + + - name: Download kernel runtime tarball + uses: actions/download-artifact@v4 + with: + name: kernel-runtime-tarballs + path: runtime-download/ + + - name: Download rootfs tarball + uses: actions/download-artifact@v4 + with: + name: rootfs-${{ matrix.arch }} + path: rootfs-download/ + + - name: Stage compressed runtime for embedding + run: | + set -euo pipefail + COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed" + mkdir -p "$COMPRESSED_DIR" + + # Extract kernel runtime tarball and re-compress individual files + EXTRACT_DIR=$(mktemp -d) + zstd -d "runtime-download/vm-runtime-${{ matrix.platform }}.tar.zst" --stdout \ + | tar -xf - -C "$EXTRACT_DIR" + + echo "Extracted runtime files:" + ls -lah "$EXTRACT_DIR" + + for file in "$EXTRACT_DIR"/*; do + [ -f "$file" ] || continue + name=$(basename "$file") + [ "$name" = "provenance.json" ] && continue + zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file" + done + + # Copy rootfs tarball (already zstd-compressed) + cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst" + + echo "Staged compressed artifacts:" + ls -lah "$COMPRESSED_DIR" + + - name: Scope workspace to VM crates + run: | + set -euo pipefail + sed -i 's|members = \["crates/\*"\]|members = ["crates/openshell-vm", "crates/openshell-core", "crates/openshell-bootstrap", "crates/openshell-policy"]|' Cargo.toml + + - name: Patch workspace version + if: needs.compute-versions.outputs.cargo_version != '' + run: | + set -euo pipefail + sed -i -E '/^\[workspace\.package\]/,/^\[/{s/^version[[:space:]]*=[[:space:]]*".*"/version = "'"${{ needs.compute-versions.outputs.cargo_version }}"'"/}' Cargo.toml + + - name: Build openshell-vm + run: | + set -euo pipefail + OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed" \ + mise x -- cargo build --release -p openshell-vm + + - name: sccache stats + if: always() + run: mise x -- sccache --show-stats + + - name: Package binary + run: | + set -euo pipefail + mkdir -p artifacts + tar -czf "artifacts/openshell-vm-${{ matrix.target }}.tar.gz" \ + -C target/release openshell-vm + ls -lh artifacts/ + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: vm-linux-${{ matrix.arch }} + path: artifacts/*.tar.gz + retention-days: 5 + + # --------------------------------------------------------------------------- + # Build openshell-vm binary (macOS ARM64 via osxcross) + # --------------------------------------------------------------------------- + build-vm-macos: + name: Build VM (macOS) + needs: [compute-versions, download-kernel-runtime, build-rootfs] + runs-on: build-amd64 + timeout-minutes: 60 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --privileged + volumes: + - /var/run/docker.sock:/var/run/docker.sock + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Fetch tags + run: git fetch --tags --force + + - name: Log in to GHCR + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Set up Docker Buildx + uses: ./.github/actions/setup-buildx + + - name: Download kernel runtime tarball + uses: actions/download-artifact@v4 + with: + name: kernel-runtime-tarballs + path: runtime-download/ + + - name: Download rootfs tarball (arm64) + uses: actions/download-artifact@v4 + with: + name: rootfs-arm64 + path: rootfs-download/ + + - name: Prepare compressed runtime directory + run: | + set -euo pipefail + COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed-macos" + mkdir -p "$COMPRESSED_DIR" + + # Extract the darwin runtime tarball and re-compress for embedding. + # The macOS embedded.rs expects: libkrun.dylib.zst, libkrunfw.5.dylib.zst, gvproxy.zst + EXTRACT_DIR=$(mktemp -d) + zstd -d "runtime-download/vm-runtime-darwin-aarch64.tar.zst" --stdout \ + | tar -xf - -C "$EXTRACT_DIR" + + echo "Extracted darwin runtime files:" + ls -lah "$EXTRACT_DIR" + + for file in "$EXTRACT_DIR"/*; do + [ -f "$file" ] || continue + name=$(basename "$file") + [ "$name" = "provenance.json" ] && continue + zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file" + done + + # The macOS VM guest is always Linux ARM64, so use the arm64 rootfs + cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst" + + echo "Staged macOS compressed artifacts:" + ls -lah "$COMPRESSED_DIR" + + - name: Build macOS binary via Docker (osxcross) + run: | + set -euo pipefail + docker buildx build \ + --file deploy/docker/Dockerfile.vm-macos \ + --build-arg OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" \ + --build-arg OPENSHELL_IMAGE_TAG=dev \ + --build-arg CARGO_TARGET_CACHE_SCOPE="${{ github.sha }}" \ + --build-context vm-runtime-compressed="${PWD}/target/vm-runtime-compressed-macos" \ + --target binary \ + --output type=local,dest=out/ \ + . + + - name: Package binary + run: | + set -euo pipefail + mkdir -p artifacts + tar -czf artifacts/openshell-vm-aarch64-apple-darwin.tar.gz \ + -C out openshell-vm + ls -lh artifacts/ + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: vm-macos + path: artifacts/*.tar.gz + retention-days: 5 + + # --------------------------------------------------------------------------- + # Upload all VM binaries to the vm-dev rolling release + # --------------------------------------------------------------------------- + release-vm-dev: + name: Release VM Dev + needs: [build-vm-linux, build-vm-macos] + runs-on: build-amd64 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - name: Download all VM binary artifacts + uses: actions/download-artifact@v4 + with: + pattern: vm-* + path: release/ + merge-multiple: true + + - name: Filter to only binary tarballs + run: | + set -euo pipefail + mkdir -p release-final + # Only include the openshell-vm binary tarballs, not kernel runtime + cp release/openshell-vm-*.tar.gz release-final/ + count=$(ls release-final/openshell-vm-*.tar.gz 2>/dev/null | wc -l) + if [ "$count" -eq 0 ]; then + echo "ERROR: No VM binary tarballs found in release/" >&2 + exit 1 + fi + echo "Release artifacts (${count} binaries):" + ls -lh release-final/ + + - name: Generate checksums + run: | + set -euo pipefail + cd release-final + sha256sum openshell-vm-*.tar.gz > vm-binary-checksums-sha256.txt + cat vm-binary-checksums-sha256.txt + + - name: Ensure vm-dev tag exists + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git tag -fa vm-dev -m "VM Development Build" "${GITHUB_SHA}" + git push --force origin vm-dev + + - name: Prune stale VM binary assets from vm-dev release + uses: actions/github-script@v7 + with: + script: | + const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/'); + let release; + try { + release = await github.rest.repos.getReleaseByTag({ owner, repo, tag: 'vm-dev' }); + } catch (err) { + if (err.status === 404) { + core.info('No existing vm-dev release; will create fresh.'); + return; + } + throw err; + } + // Delete old VM binary assets (keep kernel runtime assets) + for (const asset of release.data.assets) { + if (asset.name.startsWith('openshell-vm-') || asset.name === 'vm-binary-checksums-sha256.txt') { + core.info(`Deleting stale asset: ${asset.name}`); + await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id }); + } + } + + - name: Upload to vm-dev GitHub Release + uses: softprops/action-gh-release@v2 + with: + name: OpenShell VM Development Build + prerelease: true + tag_name: vm-dev + target_commitish: ${{ github.sha }} + body: | + Rolling development build of **openshell-vm** — the MicroVM runtime for OpenShell. + + > **NOTE**: This is a development build, not a tagged release, and may be unstable. + + ### Kernel Runtime Artifacts + + Pre-built kernel runtime (libkrunfw + libkrun + gvproxy) for embedding into + the openshell-vm binary. These are rebuilt when the kernel config or pinned + dependency versions change. + + | Platform | Artifact | + |----------|----------| + | Linux ARM64 | `vm-runtime-linux-aarch64.tar.zst` | + | Linux x86_64 | `vm-runtime-linux-x86_64.tar.zst` | + | macOS ARM64 | `vm-runtime-darwin-aarch64.tar.zst` | + + ### VM Binaries + + Self-extracting openshell-vm binaries with embedded kernel runtime and base + rootfs. These are rebuilt on every push to main. + + | Platform | Artifact | + |----------|----------| + | Linux ARM64 | `openshell-vm-aarch64-unknown-linux-gnu.tar.gz` | + | Linux x86_64 | `openshell-vm-x86_64-unknown-linux-gnu.tar.gz` | + | macOS ARM64 | `openshell-vm-aarch64-apple-darwin.tar.gz` | + + **macOS users:** The binary must be codesigned with the Hypervisor entitlement: + ```bash + codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm + ``` + + files: | + release-final/openshell-vm-aarch64-unknown-linux-gnu.tar.gz + release-final/openshell-vm-x86_64-unknown-linux-gnu.tar.gz + release-final/openshell-vm-aarch64-apple-darwin.tar.gz + release-final/vm-binary-checksums-sha256.txt diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml new file mode 100644 index 000000000..d461cd166 --- /dev/null +++ b/.github/workflows/release-vm-kernel.yml @@ -0,0 +1,247 @@ +name: Release VM Kernel + +# Build custom libkrunfw (kernel firmware) + libkrun (VMM) + gvproxy for all +# supported openshell-vm platforms. Artifacts are uploaded to the rolling +# "vm-dev" GitHub Release and consumed by release-vm-dev.yml when building the +# openshell-vm binary. +# +# This workflow runs on-demand (or when kernel config / pins change). It is +# intentionally decoupled from the per-commit VM binary build because the +# kernel rarely changes and takes 15-45 minutes to compile. + +on: + workflow_dispatch: + +permissions: + contents: write + +# Serialize with release-vm-dev.yml — both update the vm-dev release. +concurrency: + group: vm-dev-release + cancel-in-progress: false + +defaults: + run: + shell: bash + +jobs: + # --------------------------------------------------------------------------- + # Linux ARM64 — native kernel + libkrun build + # --------------------------------------------------------------------------- + build-runtime-linux-arm64: + name: Build Runtime (Linux ARM64) + runs-on: build-arm64 + timeout-minutes: 60 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --privileged + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Build libkrunfw + libkrun from source + run: tasks/scripts/vm/build-libkrun.sh + + - name: Package runtime tarball + run: | + tasks/scripts/vm/package-vm-runtime.sh \ + --platform linux-aarch64 \ + --build-dir target/libkrun-build \ + --output artifacts/vm-runtime-linux-aarch64.tar.zst + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: vm-runtime-linux-arm64 + path: artifacts/vm-runtime-linux-aarch64.tar.zst + retention-days: 5 + + # --------------------------------------------------------------------------- + # Linux AMD64 — native kernel + libkrun build + # --------------------------------------------------------------------------- + build-runtime-linux-amd64: + name: Build Runtime (Linux AMD64) + runs-on: build-amd64 + timeout-minutes: 60 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --privileged + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Build libkrunfw + libkrun from source + run: tasks/scripts/vm/build-libkrun.sh + + - name: Package runtime tarball + run: | + tasks/scripts/vm/package-vm-runtime.sh \ + --platform linux-x86_64 \ + --build-dir target/libkrun-build \ + --output artifacts/vm-runtime-linux-x86_64.tar.zst + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: vm-runtime-linux-amd64 + path: artifacts/vm-runtime-linux-x86_64.tar.zst + retention-days: 5 + + # --------------------------------------------------------------------------- + # macOS ARM64 — kernel built via krunvm, libkrun built natively + # --------------------------------------------------------------------------- + build-runtime-macos-arm64: + name: Build Runtime (macOS ARM64) + runs-on: macos-latest-xlarge + timeout-minutes: 90 + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: | + set -euo pipefail + brew install rust lld dtc xz + # libkrunfw from Homebrew (used as a fallback/reference by build scripts) + brew install libkrunfw + # krunvm is needed to build the Linux kernel inside a Fedora VM + brew tap slp/krun + brew install krunvm + + - name: Build custom libkrunfw (kernel) + run: crates/openshell-vm/runtime/build-custom-libkrunfw.sh + + - name: Build portable libkrun + run: tasks/scripts/vm/build-libkrun-macos.sh + + - name: Package runtime tarball + env: + CUSTOM_PROVENANCE_DIR: target/custom-runtime + run: | + tasks/scripts/vm/package-vm-runtime.sh \ + --platform darwin-aarch64 \ + --build-dir target/libkrun-build \ + --output artifacts/vm-runtime-darwin-aarch64.tar.zst + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: vm-runtime-macos-arm64 + path: artifacts/vm-runtime-darwin-aarch64.tar.zst + retention-days: 5 + + # --------------------------------------------------------------------------- + # Upload all runtime tarballs to the vm-dev rolling release + # --------------------------------------------------------------------------- + release-kernel: + name: Release Kernel Runtime + needs: [build-runtime-linux-arm64, build-runtime-linux-amd64, build-runtime-macos-arm64] + runs-on: build-amd64 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - name: Download all runtime artifacts + uses: actions/download-artifact@v4 + with: + pattern: vm-runtime-* + path: release/ + merge-multiple: true + + - name: Generate checksums + run: | + set -euo pipefail + cd release + sha256sum vm-runtime-*.tar.zst > vm-runtime-checksums-sha256.txt + cat vm-runtime-checksums-sha256.txt + + - name: Ensure vm-dev tag exists + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git tag -fa vm-dev -m "VM Development Build" "${GITHUB_SHA}" + git push --force origin vm-dev + + - name: Prune stale runtime assets from vm-dev release + uses: actions/github-script@v7 + with: + script: | + const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/'); + let release; + try { + release = await github.rest.repos.getReleaseByTag({ owner, repo, tag: 'vm-dev' }); + } catch (err) { + if (err.status === 404) { + core.info('No existing vm-dev release; will create fresh.'); + return; + } + throw err; + } + // Delete old runtime tarballs and checksums (keep vm binary assets) + for (const asset of release.data.assets) { + if (asset.name.startsWith('vm-runtime-')) { + core.info(`Deleting stale asset: ${asset.name}`); + await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id }); + } + } + + - name: Create / update vm-dev GitHub Release + uses: softprops/action-gh-release@v2 + with: + name: OpenShell VM Development Build + prerelease: true + tag_name: vm-dev + target_commitish: ${{ github.sha }} + body: | + Rolling development build of **openshell-vm** — the MicroVM runtime for OpenShell. + + > **NOTE**: This is a development build, not a tagged release, and may be unstable. + > The VM implementation itself is also experimental and may change or break without + > notice. + + ### Kernel Runtime Artifacts + + Pre-built kernel runtime (libkrunfw + libkrun + gvproxy) for embedding into + the openshell-vm binary. These are rebuilt when the kernel config or pinned + dependency versions change. + + | Platform | Artifact | + |----------|----------| + | Linux ARM64 | `vm-runtime-linux-aarch64.tar.zst` | + | Linux x86_64 | `vm-runtime-linux-x86_64.tar.zst` | + | macOS ARM64 | `vm-runtime-darwin-aarch64.tar.zst` | + + ### VM Binaries + + Self-extracting openshell-vm binaries with embedded kernel runtime and base + rootfs. These are rebuilt on every push to main. + + | Platform | Artifact | + |----------|----------| + | Linux ARM64 | `openshell-vm-aarch64-unknown-linux-gnu.tar.gz` | + | Linux x86_64 | `openshell-vm-x86_64-unknown-linux-gnu.tar.gz` | + | macOS ARM64 | `openshell-vm-aarch64-apple-darwin.tar.gz` | + + **macOS users:** The binary must be codesigned with the Hypervisor entitlement: + ```bash + codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm + ``` + + files: | + release/vm-runtime-linux-aarch64.tar.zst + release/vm-runtime-linux-x86_64.tar.zst + release/vm-runtime-darwin-aarch64.tar.zst + release/vm-runtime-checksums-sha256.txt diff --git a/.gitignore b/.gitignore index 32610f714..145c30695 100644 --- a/.gitignore +++ b/.gitignore @@ -181,6 +181,9 @@ kubeconfig # Documentation build output _build/ +# Gateway microVM rootfs build artifacts +rootfs/ + # Docker build artifacts (image tarballs, packaged helm charts) deploy/docker/.build/ diff --git a/AGENTS.md b/AGENTS.md index 79dc29d1b..0972d1d6a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -38,6 +38,7 @@ These pipelines connect skills into end-to-end workflows. Individual skill files | `crates/openshell-core/` | Shared core | Common types, configuration, error handling | | `crates/openshell-providers/` | Provider management | Credential provider backends | | `crates/openshell-tui/` | Terminal UI | Ratatui-based dashboard for monitoring | +| `crates/openshell-vm/` | MicroVM runtime | Experimental, work-in-progress libkrun-based VM execution | | `python/openshell/` | Python SDK | Python bindings and CLI packaging | | `proto/` | Protobuf definitions | gRPC service contracts | | `deploy/` | Docker, Helm, K8s | Dockerfiles, Helm chart, manifests | diff --git a/Cargo.lock b/Cargo.lock index 852d97a0c..dfc75fdc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -106,9 +106,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" @@ -488,9 +488,9 @@ dependencies = [ [[package]] name = "bollard" -version = "0.20.1" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "227aa051deec8d16bd9c34605e7aaf153f240e35483dd42f6f78903847934738" +checksum = "ee04c4c84f1f811b017f2fbb7dd8815c976e7ca98593de9c1e2afad0f636bff4" dependencies = [ "base64 0.22.1", "bollard-stubs", @@ -584,9 +584,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.56" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "jobserver", @@ -710,9 +710,9 @@ checksum = "5417da527aa9bf6a1e10a781231effd1edd3ee82f27d5f8529ac9b279babce96" [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "compact_str" @@ -1125,9 +1125,9 @@ dependencies = [ [[package]] name = "digest" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "285743a676ccb6b3e116bc14cc69319b957867930ae9c4822f8e0f54509d7243" +checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" dependencies = [ "block-buffer 0.12.0", "const-oid 0.10.2", @@ -2112,7 +2112,7 @@ checksum = "fe44f2bbd99fcb302e246e2d6bcf51aeda346d02a365f80296a07a8c711b6da6" dependencies = [ "argon2", "bcrypt-pbkdf", - "digest 0.11.1", + "digest 0.11.2", "ecdsa", "ed25519-dalek", "hex", @@ -2143,9 +2143,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" dependencies = [ "memchr", "serde", @@ -2201,9 +2201,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jobserver" @@ -2468,6 +2468,16 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" version = "0.2.16" @@ -2476,9 +2486,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08" dependencies = [ "bitflags", "libc", @@ -2734,9 +2744,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-integer" @@ -3060,6 +3070,33 @@ dependencies = [ "url", ] +[[package]] +name = "openshell-vm" +version = "0.0.0" +dependencies = [ + "base64 0.22.1", + "clap", + "indicatif", + "libc", + "libloading", + "miette", + "nix", + "openshell-bootstrap", + "openshell-core", + "rustls", + "rustls-pemfile", + "serde", + "serde_json", + "tar", + "thiserror 2.0.18", + "tokio", + "tokio-rustls", + "tonic", + "tracing", + "tracing-subscriber", + "zstd", +] + [[package]] name = "openssh" version = "0.11.6" @@ -3913,7 +3950,7 @@ dependencies = [ "const-oid 0.10.2", "crypto-bigint 0.7.0-rc.18", "crypto-primes", - "digest 0.11.1", + "digest 0.11.2", "pkcs1 0.8.0-rc.4", "pkcs8 0.11.0-rc.11", "rand_core 0.10.0-rc-3", @@ -4105,9 +4142,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.9" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "ring", "rustls-pki-types", @@ -4402,7 +4439,7 @@ checksum = "3b167252f3c126be0d8926639c4c4706950f01445900c4b3db0fd7e89fcb750a" dependencies = [ "cfg-if", "cpufeatures", - "digest 0.11.1", + "digest 0.11.2", ] [[package]] @@ -4424,7 +4461,7 @@ checksum = "7c5f3b1e2dc8aad28310d8410bd4d7e180eca65fca176c52ab00d364475d0024" dependencies = [ "cfg-if", "cpufeatures", - "digest 0.11.1", + "digest 0.11.2", ] [[package]] @@ -4502,7 +4539,7 @@ version = "3.0.0-rc.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a96996ccff7dfa16f052bd995b4cecc72af22c35138738dc029f0ead6608d" dependencies = [ - "digest 0.11.1", + "digest 0.11.2", "rand_core 0.10.0-rc-3", ] @@ -4986,12 +5023,12 @@ dependencies = [ [[package]] name = "terminal_size" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" +checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ "rustix 1.1.4", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5096,9 +5133,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -5422,9 +5459,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", @@ -5530,9 +5567,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b" [[package]] name = "unicode-truncate" @@ -6360,18 +6397,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", @@ -6457,3 +6494,31 @@ name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md new file mode 100644 index 000000000..c2e9b57b7 --- /dev/null +++ b/architecture/custom-vm-runtime.md @@ -0,0 +1,269 @@ +# Custom libkrunfw VM Runtime + +> Status: Experimental and work in progress (WIP). VM support is under active development and may change. + +## Overview + +The OpenShell gateway VM uses [libkrun](https://github.com/containers/libkrun) to boot a +lightweight microVM with Apple Hypervisor.framework (macOS) or KVM (Linux). The kernel +is embedded inside `libkrunfw`, a companion library that packages a pre-built Linux kernel. + +The stock `libkrunfw` from Homebrew ships a minimal kernel without bridge, netfilter, or +conntrack support. This is insufficient for Kubernetes pod networking. + +The custom libkrunfw runtime adds bridge CNI, iptables/nftables, and conntrack support to +the VM kernel, enabling standard Kubernetes networking. + +## Architecture + +```mermaid +graph TD + subgraph Host["Host (macOS / Linux)"] + BIN[openshell-vm binary] + EMB["Embedded runtime (zstd-compressed)\nlibkrun · libkrunfw · gvproxy"] + CACHE["~/.local/share/openshell/vm-runtime/{version}/"] + PROV[Runtime provenance logging] + GVP[gvproxy networking proxy] + + BIN --> EMB + BIN -->|extracts to| CACHE + BIN --> PROV + BIN -->|spawns| GVP + end + + subgraph Guest["Guest VM"] + INIT["openshell-vm-init.sh (PID 1)"] + VAL[Validates kernel capabilities] + CNI[Configures bridge CNI] + EXECA["Starts exec agent\nvsock port 10777"] + PKI[Generates mTLS PKI] + K3S[Execs k3s server] + EXECPY["openshell-vm-exec-agent.py"] + CHK["check-vm-capabilities.sh"] + + INIT --> VAL --> CNI --> EXECA --> PKI --> K3S + end + + BIN -- "fork + krun_start_enter" --> INIT + GVP -- "virtio-net" --> Guest +``` + +## Embedded Runtime + +The openshell-vm binary is fully self-contained, embedding both the VM runtime libraries +and a minimal rootfs as zstd-compressed byte arrays. On first use, the binary extracts +these to XDG cache directories with progress bars: + +``` +~/.local/share/openshell/vm-runtime/{version}/ +├── libkrun.{dylib,so} +├── libkrunfw.{5.dylib,so.5} +└── gvproxy + +~/.local/share/openshell/openshell-vm/{version}/instances//rootfs/ +├── usr/local/bin/k3s +├── opt/openshell/bin/openshell-sandbox +├── opt/openshell/manifests/ +└── ... +``` + +This eliminates the need for separate bundles or downloads - a single ~120MB binary +provides everything needed to run the VM. Old cache versions are automatically +cleaned up when a new version is extracted. + +### Hybrid Approach + +The embedded rootfs uses a "minimal" configuration: +- Includes: Base Ubuntu, k3s binary, supervisor binary, helm charts, manifests +- Excludes: Pre-loaded container images (~1GB savings) + +Container images are pulled on demand when sandboxes are created. First boot takes +~30-60s as k3s initializes; subsequent boots use cached state for ~3-5s startup. + +For fully air-gapped environments requiring pre-loaded images, build with: +```bash +mise run vm:rootfs # Full rootfs (~2GB, includes images) +mise run vm:build # Rebuild binary with full rootfs +``` + +## Network Profile + +The VM uses the bridge CNI profile, which requires a custom libkrunfw with bridge and +netfilter kernel support. The init script validates these capabilities at boot and fails +fast with an actionable error if they are missing. + +### Bridge Profile + +- CNI: bridge plugin with `cni0` interface +- IP masquerade: enabled (iptables-legacy via CNI bridge plugin) +- kube-proxy: enabled (nftables mode) +- Service VIPs: functional (ClusterIP, NodePort) +- hostNetwork workarounds: not required + +## Runtime Provenance + +At boot, the openshell-vm binary logs provenance metadata about the loaded runtime bundle: + +- Library paths and SHA-256 hashes +- Whether the runtime is custom-built or stock +- For custom runtimes: libkrunfw commit, kernel version, build timestamp + +This information is sourced from `provenance.json` (generated by the build script) +and makes it straightforward to correlate VM behavior with a specific runtime artifact. + +## Build Pipeline + +```mermaid +graph LR + subgraph Source["crates/openshell-vm/runtime/"] + BUILD["build-custom-libkrunfw.sh\nClones libkrunfw, applies config, builds"] + KCONF["kernel/openshell.kconfig\nKernel config fragment"] + README["README.md\nOperator documentation"] + end + + subgraph Output["target/custom-runtime/"] + LIB["libkrunfw.dylib\nCustom library"] + META["provenance.json\nBuild metadata"] + FRAG["openshell.kconfig\nConfig fragment used"] + FULL["kernel.config\nFull kernel .config"] + end + + KCONF --> BUILD + BUILD --> LIB + BUILD --> META + BUILD --> FRAG + BUILD --> FULL +``` + +## Kernel Config Fragment + +The `openshell.kconfig` fragment enables these kernel features on top of the stock +libkrunfw kernel: + +| Feature | Key Configs | Purpose | +|---------|-------------|---------| +| Network namespaces | `CONFIG_NET_NS`, `CONFIG_NAMESPACES` | Pod isolation | +| veth | `CONFIG_VETH` | Pod network namespace pairs | +| Bridge device | `CONFIG_BRIDGE`, `CONFIG_BRIDGE_NETFILTER` | cni0 bridge for pod networking, kube-proxy bridge traffic visibility | +| Netfilter framework | `CONFIG_NETFILTER`, `CONFIG_NETFILTER_ADVANCED`, `CONFIG_NETFILTER_XTABLES` | iptables/nftables framework | +| xtables match modules | `CONFIG_NETFILTER_XT_MATCH_CONNTRACK`, `_COMMENT`, `_MULTIPORT`, `_MARK`, `_STATISTIC`, `_ADDRTYPE`, `_RECENT`, `_LIMIT` | kube-proxy and kubelet iptables rules | +| Connection tracking | `CONFIG_NF_CONNTRACK`, `CONFIG_NF_CT_NETLINK` | NAT state tracking | +| NAT | `CONFIG_NF_NAT` | Service VIP DNAT/SNAT | +| iptables | `CONFIG_IP_NF_IPTABLES`, `CONFIG_IP_NF_FILTER`, `CONFIG_IP_NF_NAT`, `CONFIG_IP_NF_MANGLE` | CNI bridge masquerade and compat | +| nftables | `CONFIG_NF_TABLES`, `CONFIG_NFT_CT`, `CONFIG_NFT_NAT`, `CONFIG_NFT_MASQ`, `CONFIG_NFT_NUMGEN`, `CONFIG_NFT_FIB_IPV4` | kube-proxy nftables mode (primary) | +| IP forwarding | `CONFIG_IP_ADVANCED_ROUTER`, `CONFIG_IP_MULTIPLE_TABLES` | Pod-to-pod routing | +| IPVS | `CONFIG_IP_VS`, `CONFIG_IP_VS_RR`, `CONFIG_IP_VS_NFCT` | kube-proxy IPVS mode (optional) | +| Traffic control | `CONFIG_NET_SCH_HTB`, `CONFIG_NET_CLS_CGROUP` | Kubernetes QoS | +| Cgroups | `CONFIG_CGROUPS`, `CONFIG_CGROUP_DEVICE`, `CONFIG_MEMCG`, `CONFIG_CGROUP_PIDS` | Container resource limits | +| TUN/TAP | `CONFIG_TUN` | CNI plugin support | +| Dummy interface | `CONFIG_DUMMY` | Fallback networking | +| Landlock | `CONFIG_SECURITY_LANDLOCK` | Filesystem sandboxing support | +| Seccomp filter | `CONFIG_SECCOMP_FILTER` | Syscall filtering support | + +See `crates/openshell-vm/runtime/kernel/openshell.kconfig` for the full fragment with +inline comments explaining why each option is needed. + +## Verification + +One verification tool is provided: + +1. **Capability checker** (`check-vm-capabilities.sh`): Runs inside the VM to verify + kernel capabilities. Produces pass/fail results for each required feature. + +## Running Commands In A Live VM + +The standalone `openshell-vm` binary supports `openshell-vm exec -- ` for a running VM. + +- Each VM instance stores local runtime state next to its instance rootfs +- libkrun maps a per-instance host Unix socket into the guest on vsock port `10777` +- `openshell-vm-init.sh` starts `openshell-vm-exec-agent.py` during boot +- `openshell-vm exec` connects to the host socket, which libkrun forwards into the guest exec agent +- The guest exec agent spawns the command, then streams stdout, stderr, and exit status back +- The host-side bootstrap also uses the exec agent to read PKI cert files from the guest + (via `cat /opt/openshell/pki/`) instead of requiring a separate vsock server + +`openshell-vm exec` also injects `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` by default so kubectl-style +commands work the same way they would inside the VM shell. + +## Build Commands + +```bash +# One-time setup: download pre-built runtime (~30s) +mise run vm:setup + +# Build and run +mise run vm + +# Build embedded binary with base rootfs (~120MB, recommended) +mise run vm:rootfs -- --base # Build base rootfs tarball +mise run vm:build # Build binary with embedded rootfs + +# Build with full rootfs (air-gapped, ~2GB+) +mise run vm:rootfs # Build full rootfs tarball +mise run vm:build # Rebuild binary + +# With custom kernel (optional, adds ~20 min) +FROM_SOURCE=1 mise run vm:setup # Build runtime from source +mise run vm:build # Then build embedded binary + +# Wipe everything and start over +mise run vm:clean +``` + +## CI/CD + +The openshell-vm build is split into two GitHub Actions workflows that publish to a +rolling `vm-dev` GitHub Release: + +### Kernel Runtime (`release-vm-kernel.yml`) + +Builds the custom libkrunfw (kernel firmware), libkrun (VMM), and gvproxy for all +supported platforms. Runs on-demand or when the kernel config / pinned versions change. + +| Platform | Runner | Build Method | +|----------|--------|-------------| +| Linux ARM64 | `build-arm64` (self-hosted) | Native `build-libkrun.sh` | +| Linux x86_64 | `build-amd64` (self-hosted) | Native `build-libkrun.sh` | +| macOS ARM64 | `macos-latest-xlarge` (GitHub-hosted) | `build-custom-libkrunfw.sh` (krunvm) + `build-libkrun-macos.sh` | + +Artifacts: `vm-runtime-{platform}.tar.zst` containing libkrun, libkrunfw, gvproxy, and +provenance metadata. + +The macOS kernel build requires a real macOS ARM64 runner because it uses `krunvm` to +compile the Linux kernel inside a Fedora VM (Hypervisor.framework). The kernel inside +libkrunfw is always Linux regardless of host platform. + +### VM Binary (`release-vm-dev.yml`) + +Builds the self-extracting openshell-vm binary for all platforms. Runs on every push +to `main` that touches VM-related crates. + +```mermaid +graph TD + CV[compute-versions] --> DL[download-kernel-runtime\nfrom vm-dev release] + DL --> RFS_ARM[build-rootfs arm64] + DL --> RFS_AMD[build-rootfs amd64] + RFS_ARM --> VM_ARM[build-vm linux-arm64] + RFS_AMD --> VM_AMD[build-vm linux-amd64] + RFS_ARM --> VM_MAC["build-vm-macos\n(osxcross, reuses arm64 rootfs)"] + VM_ARM --> REL[release-vm-dev\nupload to rolling release] + VM_AMD --> REL + VM_MAC --> REL +``` + +The macOS binary is cross-compiled via osxcross (no macOS runner needed for the binary +build — only for the kernel build). The macOS VM guest is always Linux ARM64, so it +reuses the arm64 rootfs. + +macOS binaries produced via osxcross are not codesigned. Users must self-sign: +```bash +codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm +``` + +## Rollout Strategy + +1. Custom runtime is embedded by default when building with `mise run vm:build`. +2. The init script validates kernel capabilities at boot and fails fast if missing. +3. For development, override with `OPENSHELL_VM_RUNTIME_DIR` to use a local directory. +4. In CI, kernel runtime is pre-built and cached in the `vm-dev` release. The binary + build downloads it via `download-kernel-runtime.sh`. diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index 8ce10703e..71d223d66 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -6,12 +6,12 @@ pub mod edge_token; pub mod errors; pub mod image; -mod constants; +pub mod constants; mod docker; mod metadata; -mod mtls; -mod paths; -mod pki; +pub mod mtls; +pub mod paths; +pub mod pki; pub(crate) mod push; mod runtime; diff --git a/crates/openshell-bootstrap/src/paths.rs b/crates/openshell-bootstrap/src/paths.rs index cd3cb7693..1c514f370 100644 --- a/crates/openshell-bootstrap/src/paths.rs +++ b/crates/openshell-bootstrap/src/paths.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use miette::Result; -use openshell_core::paths::xdg_config_dir; +use openshell_core::paths::{xdg_config_dir, xdg_data_dir}; use std::path::PathBuf; /// Path to the file that stores the active gateway name. @@ -26,6 +26,13 @@ pub fn last_sandbox_path(gateway: &str) -> Result { Ok(gateways_dir()?.join(gateway).join("last_sandbox")) } +/// Base directory for openshell-vm data (without version). +/// +/// Location: `$XDG_DATA_HOME/openshell/openshell-vm/` +pub fn openshell_vm_base_dir() -> Result { + Ok(xdg_data_dir()?.join("openshell").join("openshell-vm")) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 87d377b39..1d9305ff6 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -62,9 +62,13 @@ fn resolve_gateway( gateway_endpoint: &Option, ) -> Result { if let Some(endpoint) = gateway_endpoint { + // When a gateway name is explicitly provided (via flag or env var), + // trust it directly — don't require metadata to exist yet. This + // avoids a race condition where mTLS certs are stored under the + // real gateway name but the CLI falls back to using the raw + // endpoint URL (producing a mangled path like `https___...`). let name = gateway_flag .clone() - .filter(|name| get_gateway_metadata(name).is_some()) .or_else(|| find_gateway_by_endpoint(endpoint)) .unwrap_or_else(|| endpoint.clone()); return Ok(GatewayContext { diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index c40640c30..218d04e99 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -2118,7 +2118,12 @@ pub async fn sandbox_create( // Track whether we have seen a non-Ready phase during the watch. let mut saw_non_ready = SandboxPhase::try_from(sandbox.phase) != Ok(SandboxPhase::Ready); let start_time = Instant::now(); - let provision_timeout = Duration::from_secs(300); + let provision_timeout = Duration::from_secs( + std::env::var("OPENSHELL_PROVISION_TIMEOUT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(300), + ); // Track whether we saw the gateway become ready (from log messages). let mut saw_gateway_ready = false; diff --git a/crates/openshell-core/src/paths.rs b/crates/openshell-core/src/paths.rs index bd9ce23d4..fd0a141b3 100644 --- a/crates/openshell-core/src/paths.rs +++ b/crates/openshell-core/src/paths.rs @@ -29,6 +29,19 @@ pub fn openshell_config_dir() -> Result { Ok(xdg_config_dir()?.join("openshell")) } +/// Resolve the XDG data base directory. +/// +/// Returns `$XDG_DATA_HOME` if set, otherwise `$HOME/.local/share`. +pub fn xdg_data_dir() -> Result { + if let Ok(path) = std::env::var("XDG_DATA_HOME") { + return Ok(PathBuf::from(path)); + } + let home = std::env::var("HOME") + .into_diagnostic() + .wrap_err("HOME is not set")?; + Ok(PathBuf::from(home).join(".local").join("share")) +} + /// Create a directory (and parents) with owner-only permissions (`0o700`) on /// Unix. On non-Unix platforms, falls back to default permissions. /// diff --git a/crates/openshell-server/src/sandbox/mod.rs b/crates/openshell-server/src/sandbox/mod.rs index c5e9a8335..a5d7dc071 100644 --- a/crates/openshell-server/src/sandbox/mod.rs +++ b/crates/openshell-server/src/sandbox/mod.rs @@ -786,7 +786,11 @@ fn apply_supervisor_sideload(pod_template: &mut serde_json::Value) { /// The init container mounts the PVC at a temporary path so it can still see /// the image's `/sandbox` directory. It checks for a sentinel file and skips /// the copy if the PVC was already initialised. -fn apply_workspace_persistence(pod_template: &mut serde_json::Value, image: &str) { +fn apply_workspace_persistence( + pod_template: &mut serde_json::Value, + image: &str, + image_pull_policy: &str, +) { let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else { return; }; @@ -827,19 +831,24 @@ fn apply_workspace_persistence(pod_template: &mut serde_json::Value, image: &str // read the image's original /sandbox contents. It copies them into // the PVC only when the sentinel file is absent. // + // Prefer a tar stream over `cp -a`: some sandbox images contain + // self-referential symlinks under `/sandbox/.uv`, and GNU cp can + // fail while seeding the PVC even though preserving the symlink as-is + // is valid. `tar` copies the tree without dereferencing those links. + // // The inner `[ -d ... ]` guard handles custom images that don't have // a /sandbox directory — the copy is skipped but the sentinel is // still written so subsequent starts are instant. let copy_cmd = format!( "if [ ! -f {WORKSPACE_INIT_MOUNT_PATH}/{WORKSPACE_SENTINEL} ]; then \ if [ -d {WORKSPACE_MOUNT_PATH} ]; then \ - cp -a {WORKSPACE_MOUNT_PATH}/. {WORKSPACE_INIT_MOUNT_PATH}/; \ + tar -C {WORKSPACE_MOUNT_PATH} -cf - . | tar -C {WORKSPACE_INIT_MOUNT_PATH} -xpf -; \ fi && \ touch {WORKSPACE_INIT_MOUNT_PATH}/{WORKSPACE_SENTINEL}; \ fi" ); - init_containers.push(serde_json::json!({ + let mut init_spec = serde_json::json!({ "name": WORKSPACE_INIT_CONTAINER_NAME, "image": image, "command": ["sh", "-c", copy_cmd], @@ -848,7 +857,11 @@ fn apply_workspace_persistence(pod_template: &mut serde_json::Value, image: &str "name": WORKSPACE_VOLUME_NAME, "mountPath": WORKSPACE_INIT_MOUNT_PATH }] - })); + }); + if !image_pull_policy.is_empty() { + init_spec["imagePullPolicy"] = serde_json::json!(image_pull_policy); + } + init_containers.push(init_spec); } } @@ -1126,7 +1139,7 @@ fn sandbox_template_to_k8s( // that /sandbox data survives pod rescheduling. Skipped when the user // provides custom volumeClaimTemplates to avoid conflicts. if inject_workspace { - apply_workspace_persistence(&mut result, image); + apply_workspace_persistence(&mut result, image, image_pull_policy); } result @@ -2024,7 +2037,11 @@ mod tests { } }); - apply_workspace_persistence(&mut pod_template, "openshell/sandbox:latest"); + apply_workspace_persistence( + &mut pod_template, + "openshell/sandbox:latest", + "IfNotPresent", + ); // Init container let init_containers = pod_template["spec"]["initContainers"] @@ -2033,6 +2050,7 @@ mod tests { assert_eq!(init_containers.len(), 1); assert_eq!(init_containers[0]["name"], WORKSPACE_INIT_CONTAINER_NAME); assert_eq!(init_containers[0]["image"], "openshell/sandbox:latest"); + assert_eq!(init_containers[0]["imagePullPolicy"], "IfNotPresent"); assert_eq!(init_containers[0]["securityContext"]["runAsUser"], 0); // Init container mounts PVC at temp path, not /sandbox @@ -2078,7 +2096,7 @@ mod tests { } }); - apply_workspace_persistence(&mut pod_template, "my-custom-image:v2"); + apply_workspace_persistence(&mut pod_template, "my-custom-image:v2", "IfNotPresent"); let init_image = pod_template["spec"]["initContainers"][0]["image"] .as_str() @@ -2100,7 +2118,7 @@ mod tests { } }); - apply_workspace_persistence(&mut pod_template, "img:latest"); + apply_workspace_persistence(&mut pod_template, "img:latest", "Always"); let cmd = pod_template["spec"]["initContainers"][0]["command"] .as_array() @@ -2111,8 +2129,8 @@ mod tests { "init script must check for sentinel file" ); assert!( - script.contains("cp -a"), - "init script must copy image contents" + script.contains("tar -C"), + "init script must seed image contents with a tar stream" ); } diff --git a/crates/openshell-vm/Cargo.toml b/crates/openshell-vm/Cargo.toml new file mode 100644 index 000000000..7d74b3139 --- /dev/null +++ b/crates/openshell-vm/Cargo.toml @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "openshell-vm" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true +description = "MicroVM runtime using libkrun for hardware-isolated execution" + +[lib] +name = "openshell_vm" +path = "src/lib.rs" + +[[bin]] +name = "openshell-vm" +path = "src/main.rs" + +[dependencies] +base64 = "0.22" +clap = { workspace = true } +indicatif = "0.17" +libc = "0.2" +libloading = "0.8" +miette = { workspace = true } +nix = { workspace = true } +openshell-bootstrap = { path = "../openshell-bootstrap" } +openshell-core = { path = "../openshell-core" } +serde = { workspace = true } +serde_json = "1" +tar = "0.4" +thiserror = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +zstd = "0.13" + +# Async runtime and gRPC for health check +tokio = { workspace = true } +tonic = { workspace = true, features = ["tls", "tls-native-roots"] } +rustls = { workspace = true } +rustls-pemfile = { workspace = true } +tokio-rustls = { workspace = true } + +[build-dependencies] +zstd = "0.13" + +[lints] +workspace = true diff --git a/crates/openshell-vm/README.md b/crates/openshell-vm/README.md new file mode 100644 index 000000000..fcca20d5b --- /dev/null +++ b/crates/openshell-vm/README.md @@ -0,0 +1,244 @@ +# openshell-vm + +> Status: Experimental and work in progress (WIP). VM support is under active development and may change. + +MicroVM runtime for OpenShell, powered by [libkrun](https://github.com/containers/libkrun). Boots a lightweight ARM64 Linux VM on macOS (Apple Hypervisor.framework) or Linux (KVM) running a single-node k3s cluster with the OpenShell control plane. + +## Quick Start + +```bash +# One-time setup: download pre-built runtime (~30s) +mise run vm:setup + +# Build and run the VM +mise run vm +``` + +## Prerequisites + +- **macOS (Apple Silicon)** or **Linux (aarch64 or x86_64 with KVM)** +- Rust toolchain +- [mise](https://mise.jdx.dev/) task runner +- Docker (for rootfs builds) +- `gh` CLI (for downloading pre-built runtime) + +### macOS-Specific + +The binary must be codesigned with the Hypervisor.framework entitlement. The `mise run vm` flow handles this automatically. To codesign manually: + +```bash +codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - target/debug/openshell-vm +``` + +## Setup + +### Download Pre-Built Runtime (Default) + +Downloads libkrun, libkrunfw, and gvproxy from the `vm-dev` GitHub Release: + +```bash +mise run vm:setup +``` + +### Build from Source + +Compiles the runtime from source (15-45 minutes, needed for custom kernel work): + +```bash +FROM_SOURCE=1 mise run vm:setup +``` + +On macOS this builds a custom libkrunfw (kernel firmware with bridge/netfilter support) via `krunvm`, then builds a portable libkrun. On Linux it builds both natively. + +## Build + +Build the openshell-vm binary with embedded runtime: + +```bash +mise run vm:build +``` + +This compresses runtime artifacts, compiles the Rust binary with `include_bytes!()` embedding, codesigns it (macOS), and stages the sidecar runtime bundle. + +## Rootfs + +The rootfs is an Ubuntu filesystem containing k3s, pre-loaded container images, and the OpenShell binaries. Build it with: + +```bash +# Base rootfs (~200-300MB, cold starts in ~30-60s) +mise run vm:rootfs -- --base + +# Full rootfs (~2GB+, pre-initialized, boots in ~3-5s) +mise run vm:rootfs +``` + +## Run + +### Default (Gateway Mode) + +Boots the full OpenShell gateway -- k3s + openshell-server + openshell-sandbox: + +```bash +mise run vm +``` + +Or run the binary directly: + +```bash +./target/debug/openshell-vm +``` + +### Custom Process + +Run an arbitrary process inside a fresh VM instead of k3s: + +```bash +./target/debug/openshell-vm --exec /bin/sh --vcpus 2 --mem 2048 +``` + +### Execute in a Running VM + +Attach to a running VM and run a command: + +```bash +./target/debug/openshell-vm exec -- ls / +./target/debug/openshell-vm exec -- sh # interactive shell +``` + +### Named Instances + +Run multiple isolated VM instances side-by-side: + +```bash +./target/debug/openshell-vm --name dev +./target/debug/openshell-vm --name staging +``` + +Each instance gets its own extracted rootfs under `~/.local/share/openshell/openshell-vm//instances//rootfs`. + +## CLI Reference + +``` +openshell-vm [OPTIONS] [COMMAND] + +Options: + --rootfs Path to aarch64 Linux rootfs directory + --name Named VM instance (auto-clones rootfs) + --exec Run a custom process instead of k3s + --args ... Arguments to the executable + --env ... Environment variables + --workdir Working directory inside the VM [default: /] + -p, --port ... Port mappings (host_port:guest_port) + --vcpus Virtual CPUs [default: 4 gateway, 2 exec] + --mem RAM in MiB [default: 8192 gateway, 2048 exec] + --krun-log-level <0-5> libkrun log level [default: 1] + --net Networking: gvproxy, tsi, none [default: gvproxy] + --reset Wipe runtime state before booting + +Subcommands: + prepare-rootfs Ensure the target rootfs exists + exec Execute a command inside a running VM +``` + +## mise Tasks Reference + +| Task | Description | +|------|-------------| +| `vm` | Build and run the VM | +| `vm:build` | Build openshell-vm binary with embedded runtime | +| `vm:setup` | One-time setup: download (or build) the VM runtime | +| `vm:rootfs` | Build the VM rootfs tarball (`-- --base` for lightweight) | +| `vm:clean` | Remove all VM cached artifacts | +| `e2e:vm` | Boot VM and run smoke e2e tests | + +### Common Workflows + +```bash +# First time setup +mise run vm:setup # download pre-built runtime (~30s) +mise run vm # build + run + +# Day-to-day iteration +mise run vm # incremental build + run + +# Need fresh rootfs +mise run vm:rootfs -- --base # rebuild base rootfs +mise run vm:build # rebuild binary with new rootfs + +# Something broken, start over +mise run vm:clean # wipe everything +mise run vm:setup # re-download runtime +mise run vm # full rebuild + run + +# Custom kernel work (rare) +FROM_SOURCE=1 mise run vm:setup +``` + +## Architecture + +``` +Host (macOS / Linux) + openshell-vm binary + |-- Embedded runtime (libkrun, libkrunfw, gvproxy, rootfs.tar.zst) + |-- FFI: loads libkrun at runtime via dlopen + |-- gvproxy: virtio-net networking (real eth0 + DHCP) + |-- virtio-fs: shares rootfs with guest + \-- vsock: host-to-guest command execution (port 10777) + +Guest VM (aarch64 Linux) + PID 1: openshell-vm-init.sh + |-- Mounts filesystems, configures networking + |-- Sets up bridge CNI, generates PKI + \-- Execs k3s server + |-- openshell-server (gateway control plane) + \-- openshell-sandbox (pod supervisor) +``` + +## Environment Variables + +| Variable | When | Purpose | +|----------|------|---------| +| `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` | Build time | Path to compressed runtime artifacts | +| `OPENSHELL_VM_RUNTIME_DIR` | Runtime | Override the runtime bundle directory | +| `OPENSHELL_VM_DIAG=1` | Runtime | Enable diagnostic output inside the VM | +| `FROM_SOURCE=1` | `vm:setup` | Build runtime from source instead of downloading | + +## Custom Kernel (libkrunfw) + +The stock libkrunfw (e.g. from Homebrew) lacks bridge, netfilter, and conntrack support needed for pod networking. OpenShell builds a custom libkrunfw with these enabled. + +Build it via the setup command: + +```bash +FROM_SOURCE=1 mise run vm:setup +``` + +See [`runtime/README.md`](runtime/README.md) for details on the kernel config and troubleshooting. + +## Testing + +Integration tests require a built rootfs and macOS ARM64 with libkrun: + +```bash +cargo test -p openshell-vm -- --ignored +``` + +Individual tests: + +```bash +# Full gateway boot test (boots VM, waits for gRPC on port 30051) +cargo test -p openshell-vm gateway_boots -- --ignored + +# Run a command inside the VM +cargo test -p openshell-vm gateway_exec_runs -- --ignored + +# Exec into a running VM +cargo test -p openshell-vm gateway_exec_attaches -- --ignored +``` + +Verify kernel capabilities inside a running VM: + +```bash +./target/debug/openshell-vm exec -- /srv/check-vm-capabilities.sh +./target/debug/openshell-vm exec -- /srv/check-vm-capabilities.sh --json +``` diff --git a/crates/openshell-vm/build.rs b/crates/openshell-vm/build.rs new file mode 100644 index 000000000..33fab9a78 --- /dev/null +++ b/crates/openshell-vm/build.rs @@ -0,0 +1,142 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Build script for openshell-vm. +//! +//! This script copies pre-compressed VM runtime artifacts (libkrun, libkrunfw, +//! gvproxy) to `OUT_DIR` for embedding via `include_bytes!()`. +//! +//! The compressed artifacts are expected to be prepared by: +//! `mise run vm:setup` (one-time) then `mise run vm:build` +//! +//! Environment: +//! `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` - Path to compressed artifacts + +use std::path::PathBuf; +use std::{env, fs}; + +fn main() { + println!("cargo:rerun-if-env-changed=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR"); + + // Re-run if any compressed artifact changes. + if let Ok(dir) = env::var("OPENSHELL_VM_RUNTIME_COMPRESSED_DIR") { + println!("cargo:rerun-if-changed={dir}"); + for name in &[ + "libkrun.so.zst", + "libkrunfw.so.5.zst", + "libkrun.dylib.zst", + "libkrunfw.5.dylib.zst", + "gvproxy.zst", + "rootfs.tar.zst", + ] { + println!("cargo:rerun-if-changed={dir}/{name}"); + } + } + + let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); + let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + + // Determine platform-specific file names + let (libkrun_name, libkrunfw_name) = match target_os.as_str() { + "macos" => ("libkrun.dylib", "libkrunfw.5.dylib"), + "linux" => ("libkrun.so", "libkrunfw.so.5"), + _ => { + println!("cargo:warning=VM runtime not available for {target_os}-{target_arch}"); + generate_stub_resources(&out_dir); + return; + } + }; + + // Check for pre-compressed artifacts from mise task + let compressed_dir = if let Ok(dir) = env::var("OPENSHELL_VM_RUNTIME_COMPRESSED_DIR") { + PathBuf::from(dir) + } else { + println!("cargo:warning=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR not set"); + println!("cargo:warning=Run: mise run vm:setup"); + generate_stub_resources(&out_dir); + return; + }; + + if !compressed_dir.is_dir() { + println!( + "cargo:warning=Compressed runtime dir not found: {}", + compressed_dir.display() + ); + println!("cargo:warning=Run: mise run vm:setup"); + generate_stub_resources(&out_dir); + return; + } + + // Copy compressed files to OUT_DIR + let files = [ + (format!("{libkrun_name}.zst"), format!("{libkrun_name}.zst")), + ( + format!("{libkrunfw_name}.zst"), + format!("{libkrunfw_name}.zst"), + ), + ("gvproxy.zst".to_string(), "gvproxy.zst".to_string()), + ("rootfs.tar.zst".to_string(), "rootfs.tar.zst".to_string()), + ]; + + let mut all_found = true; + for (src_name, dst_name) in &files { + let src_path = compressed_dir.join(src_name); + let dst_path = out_dir.join(dst_name); + + if src_path.exists() { + // Remove existing file first (may be read-only from previous build) + if dst_path.exists() { + let _ = fs::remove_file(&dst_path); + } + fs::copy(&src_path, &dst_path).unwrap_or_else(|e| { + panic!( + "Failed to copy {} to {}: {}", + src_path.display(), + dst_path.display(), + e + ) + }); + let size = fs::metadata(&dst_path).map(|m| m.len()).unwrap_or(0); + println!("cargo:warning=Embedded {src_name}: {size} bytes"); + } else { + println!( + "cargo:warning=Missing compressed artifact: {}", + src_path.display() + ); + all_found = false; + } + } + + if !all_found { + println!("cargo:warning=Some artifacts missing. Run: mise run vm:setup"); + generate_stub_resources(&out_dir); + } +} + +/// Generate stub (empty) resource files so the build can complete. +/// The embedded module will fail at runtime if these stubs are used. +fn generate_stub_resources(out_dir: &PathBuf) { + let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + + let (libkrun_name, libkrunfw_name) = match target_os.as_str() { + "macos" => ("libkrun.dylib", "libkrunfw.5.dylib"), + _ => ("libkrun.so", "libkrunfw.so.5"), + }; + + let stubs = [ + format!("{libkrun_name}.zst"), + format!("{libkrunfw_name}.zst"), + "gvproxy.zst".to_string(), + "rootfs.tar.zst".to_string(), + ]; + + for name in &stubs { + let path = out_dir.join(name); + if !path.exists() { + // Write an empty file as a stub + fs::write(&path, b"") + .unwrap_or_else(|e| panic!("Failed to write stub {}: {}", path.display(), e)); + } + } +} diff --git a/crates/openshell-vm/entitlements.plist b/crates/openshell-vm/entitlements.plist new file mode 100644 index 000000000..154f3308e --- /dev/null +++ b/crates/openshell-vm/entitlements.plist @@ -0,0 +1,8 @@ + + + + + com.apple.security.hypervisor + + + diff --git a/crates/openshell-vm/pins.env b/crates/openshell-vm/pins.env new file mode 100644 index 000000000..3c34a4af2 --- /dev/null +++ b/crates/openshell-vm/pins.env @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Pinned dependency versions for openshell-vm builds. +# +# This file is sourced by build-rootfs.sh and +# build-custom-libkrunfw.sh. It centralises version pins and content-addressed +# digests so that builds are reproducible and auditable. +# +# Environment variables override these defaults — CI and local dev workflows +# can still set IMAGE_TAG, K3S_VERSION, etc. as before. +# +# To update a dependency: +# 1. Change the version/digest below. +# 2. Run the relevant build script to verify. +# 3. Commit pins.env alongside any script changes. + +# ── k3s binary ───────────────────────────────────────────────────────── +K3S_VERSION="${K3S_VERSION:-v1.35.2+k3s1}" +K3S_ARM64_SHA256="${K3S_ARM64_SHA256:-228809a7ef47d25c1bdbe746944931ec2fd2edf842b9cf50f1dd4f9ec2505b0e}" +K3S_AMD64_SHA256="${K3S_AMD64_SHA256:-3ae8e35a62ac83e8e197c117858a564134057a7b8703cf73e67ce60d19f4a22b}" + +# ── Base Docker image (digest-pinned) ────────────────────────────────── +# Tag: nvcr.io/nvidia/base/ubuntu:noble-20251013 +VM_BASE_IMAGE="${VM_BASE_IMAGE:-nvcr.io/nvidia/base/ubuntu@sha256:43fa5063e80fbbc533892af3ccca190868ce48db5a8928b19d7815c40436af8e}" + +# ── Container images for rootfs pre-loading (digest-pinned) ──────────── +# Tag: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0 +AGENT_SANDBOX_IMAGE="${AGENT_SANDBOX_IMAGE:-registry.k8s.io/agent-sandbox/agent-sandbox-controller@sha256:ba71ea40ae0872791197badf2ab84f3f482df3902f1fce7ca9e076b1de9b57f6}" +# Tag: ghcr.io/nvidia/openshell-community/sandboxes/base:latest +COMMUNITY_SANDBOX_IMAGE="${COMMUNITY_SANDBOX_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/base@sha256:d446c17105e7448e602238a8a5a4ddd0233c071082406522f81c31f8b1309525}" + +# SERVER_IMAGE is intentionally NOT pinned here — it changes frequently +# during local development. Override via IMAGE_REPO_BASE and IMAGE_TAG +# environment variables (defaults: openshell/gateway:dev). + +# ── gvproxy (networking proxy) ────────────────────────────────────────── +# Repo: https://github.com/containers/gvisor-tap-vsock +GVPROXY_VERSION="${GVPROXY_VERSION:-v0.8.8}" + +# ── libkrunfw upstream (commit-pinned) ───────────────────────────────── +# Repo: https://github.com/containers/libkrunfw +# Pinned: 2026-03-27 (main branch HEAD at time of pinning) +LIBKRUNFW_REF="${LIBKRUNFW_REF:-463f717bbdd916e1352a025b6fb2456e882b0b39}" diff --git a/crates/openshell-vm/runtime/README.md b/crates/openshell-vm/runtime/README.md new file mode 100644 index 000000000..c30308e3a --- /dev/null +++ b/crates/openshell-vm/runtime/README.md @@ -0,0 +1,172 @@ +# Custom libkrunfw Runtime + +> Status: Experimental and work in progress (WIP). VM support is under active development and may change. + +This directory contains the build infrastructure for a custom `libkrunfw` runtime +that enables bridge CNI and netfilter support in the OpenShell gateway VM. + +## Why + +The stock `libkrunfw` (from Homebrew) ships a kernel without bridge, netfilter, +or conntrack support. This means the VM cannot: + +- Create `cni0` bridge interfaces (required by the bridge CNI plugin) +- Run kube-proxy (requires nftables) +- Route service VIP traffic (requires NAT/conntrack) + +The custom runtime builds libkrunfw with an additional kernel config fragment +that enables these networking and sandboxing features. + +## Directory Structure + +``` +runtime/ + build-custom-libkrunfw.sh # Build script for custom libkrunfw + kernel/ + openshell.kconfig # Kernel config fragment (networking + sandboxing) +``` + +## Building + +### Prerequisites + +- Rust toolchain +- make, git, curl +- On macOS: Xcode command line tools and cross-compilation tools for aarch64 + +### Quick Build + +```bash +# Build custom libkrunfw (clones libkrunfw repo, applies config, builds) +./crates/openshell-vm/runtime/build-custom-libkrunfw.sh + +# Or build the full runtime from source via mise: +FROM_SOURCE=1 mise run vm:setup +``` + +### Output + +Build artifacts are placed in `target/custom-runtime/`: + +``` +target/custom-runtime/ + libkrunfw.dylib # The custom library + libkrunfw..dylib # Version-suffixed copy + provenance.json # Build metadata (commit, hash, timestamp) + openshell.kconfig # The config fragment used + kernel.config # Full kernel .config (for debugging) +``` + +### Using the Custom Runtime + +```bash +# Point the bundle script at the custom build and rebuild: +export OPENSHELL_VM_RUNTIME_SOURCE_DIR=target/custom-runtime +mise run vm:build + +# Then boot the VM as usual: +mise run vm +``` + +## Networking + +The VM uses bridge CNI for pod networking with nftables-mode kube-proxy for +service VIP / ClusterIP support. The kernel config fragment enables both +iptables (for CNI bridge masquerade) and nftables (for kube-proxy). + +k3s is started with `--kube-proxy-arg=proxy-mode=nftables` because the +bundled iptables binaries in k3s have revision-negotiation issues with the +libkrun kernel's xt_MARK module. nftables mode uses the kernel's nf_tables +subsystem directly and avoids this entirely. + +## Runtime Provenance + +At VM boot, the openshell-vm binary logs provenance information about the loaded +runtime: + +``` +runtime: /path/to/openshell-vm.runtime + libkrunfw: libkrunfw.dylib + sha256: a1b2c3d4e5f6... + type: custom (OpenShell-built) + libkrunfw-commit: abc1234 + kernel-version: 6.6.30 + build-timestamp: 2026-03-23T10:00:00Z +``` + +For stock runtimes: +``` +runtime: /path/to/openshell-vm.runtime + libkrunfw: libkrunfw.dylib + sha256: f6e5d4c3b2a1... + type: stock (system/homebrew) +``` + +## Verification + +### Capability Check (inside VM) + +```bash +# Run inside the VM to verify kernel capabilities: +/srv/check-vm-capabilities.sh + +# JSON output for CI: +/srv/check-vm-capabilities.sh --json +``` + +### Rollback + +To revert to the stock runtime: + +```bash +# Unset the custom runtime source: +unset OPENSHELL_VM_RUNTIME_SOURCE_DIR + +# Re-download pre-built runtime and rebuild: +mise run vm:setup +mise run vm:build + +# Boot: +mise run vm +``` + +## Troubleshooting + +### "FailedCreatePodSandBox" bridge errors + +The kernel does not have bridge support. Verify: +```bash +# Inside VM: +ip link add test0 type bridge && echo "bridge OK" && ip link del test0 +``` + +If this fails, you are running the stock runtime. Build and use the custom one. + +### kube-proxy CrashLoopBackOff + +kube-proxy runs in nftables mode. If it crashes, verify nftables support: +```bash +# Inside VM: +nft list ruleset +``` + +If this fails, the kernel may lack `CONFIG_NF_TABLES`. Use the custom runtime. + +Common errors: +- `unknown option "--xor-mark"`: kube-proxy is running in iptables mode instead + of nftables. Verify `--kube-proxy-arg=proxy-mode=nftables` is in the k3s args. + +### Runtime mismatch after upgrade + +If libkrunfw is updated (e.g., via `brew upgrade`), the stock runtime may +change. Check provenance: +```bash +# Look for provenance info in VM boot output +grep "runtime:" ~/.local/share/openshell/openshell-vm/console.log +``` + +Re-build the custom runtime if needed: +```bash +FROM_SOURCE=1 mise run vm:setup +mise run vm:build +``` diff --git a/crates/openshell-vm/runtime/build-custom-libkrunfw.sh b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh new file mode 100755 index 000000000..5d50c9153 --- /dev/null +++ b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh @@ -0,0 +1,401 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build a custom libkrunfw with bridge/netfilter kernel support. +# +# This script clones libkrunfw, applies the OpenShell kernel config +# fragment (bridge CNI, iptables, conntrack), builds the library, and +# stages the artifact with provenance metadata. +# +# Prerequisites: +# - Rust toolchain (cargo) +# - make, git, curl +# - Cross-compilation toolchain for aarch64 (if building on x86_64) +# - On macOS: Xcode command line tools +# +# Usage: +# ./build-custom-libkrunfw.sh [--output-dir DIR] [--libkrunfw-ref REF] +# +# Environment: +# LIBKRUNFW_REF - git ref to check out (default: main) +# LIBKRUNFW_REPO - git repo URL (default: github.com/containers/libkrunfw) +# OPENSHELL_RUNTIME_OUTPUT_DIR - output directory for built artifacts + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" +KERNEL_CONFIG_FRAGMENT="${SCRIPT_DIR}/kernel/openshell.kconfig" + +# Source pinned dependency versions (digests, checksums, commit SHAs). +# Environment variables override pins — see pins.env for details. +PINS_FILE="${SCRIPT_DIR}/../pins.env" +if [ -f "$PINS_FILE" ]; then + # shellcheck source=../pins.env + source "$PINS_FILE" +fi + +# Defaults (LIBKRUNFW_REF is commit-pinned in pins.env; falls back to main +# only if pins.env is missing and no env var is set). +LIBKRUNFW_REPO="${LIBKRUNFW_REPO:-https://github.com/containers/libkrunfw.git}" +LIBKRUNFW_REF="${LIBKRUNFW_REF:-main}" +OUTPUT_DIR="${OPENSHELL_RUNTIME_OUTPUT_DIR:-${PROJECT_ROOT}/target/custom-runtime}" +BUILD_DIR="${PROJECT_ROOT}/target/libkrunfw-build" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --output-dir) + OUTPUT_DIR="$2"; shift 2 ;; + --libkrunfw-ref) + LIBKRUNFW_REF="$2"; shift 2 ;; + --help|-h) + echo "Usage: $0 [--output-dir DIR] [--libkrunfw-ref REF]" + echo "" + echo "Build a custom libkrunfw with bridge/netfilter kernel support." + echo "" + echo "Options:" + echo " --output-dir DIR Output directory for built artifacts" + echo " --libkrunfw-ref REF Git ref to check out (default: main)" + echo "" + echo "Environment:" + echo " LIBKRUNFW_REPO Git repo URL" + echo " LIBKRUNFW_REF Git ref (branch/tag/commit)" + echo " OPENSHELL_RUNTIME_OUTPUT_DIR Output directory" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done + +echo "==> Building custom libkrunfw" +echo " Repo: ${LIBKRUNFW_REPO}" +echo " Ref: ${LIBKRUNFW_REF}" +echo " Config fragment: ${KERNEL_CONFIG_FRAGMENT}" +echo " Output: ${OUTPUT_DIR}" +echo "" + +# ── Clone / update libkrunfw ──────────────────────────────────────────── + +if [ -d "${BUILD_DIR}/libkrunfw/.git" ]; then + echo "==> Updating existing libkrunfw checkout..." + git -C "${BUILD_DIR}/libkrunfw" fetch origin + git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}" + git -C "${BUILD_DIR}/libkrunfw" pull --ff-only 2>/dev/null || true +else + echo "==> Cloning libkrunfw..." + mkdir -p "${BUILD_DIR}" + git clone "${LIBKRUNFW_REPO}" "${BUILD_DIR}/libkrunfw" + git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}" +fi + +LIBKRUNFW_DIR="${BUILD_DIR}/libkrunfw" +LIBKRUNFW_COMMIT=$(git -C "${LIBKRUNFW_DIR}" rev-parse HEAD) +LIBKRUNFW_SHORT=$(git -C "${LIBKRUNFW_DIR}" rev-parse --short HEAD) + +echo " Commit: ${LIBKRUNFW_COMMIT}" + +# ── Detect the kernel version libkrunfw targets ──────────────────────── + +# libkrunfw's Makefile typically sets KERNEL_VERSION or has it in a +# config file. Try to detect it. +KERNEL_VERSION="" +if [ -f "${LIBKRUNFW_DIR}/Makefile" ]; then + KERNEL_VERSION=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' "${LIBKRUNFW_DIR}/Makefile" 2>/dev/null | head -1 | sed 's/.*= *//' || true) +fi +if [ -z "$KERNEL_VERSION" ] && [ -f "${LIBKRUNFW_DIR}/kernel_version" ]; then + KERNEL_VERSION=$(cat "${LIBKRUNFW_DIR}/kernel_version") +fi +echo " Kernel version: ${KERNEL_VERSION:-unknown}" + +# ── Apply kernel config fragment ──────────────────────────────────────── + +echo "==> Applying OpenShell kernel config fragment..." + +# libkrunfw builds the kernel with a config generated from its own +# sources. The config merge happens after `make olddefconfig` runs +# on the base config. We use the kernel's scripts/kconfig/merge_config.sh +# when available, otherwise do a simple append+olddefconfig. + +MERGE_HOOK="${LIBKRUNFW_DIR}/openshell-kconfig-hook.sh" +cat > "${MERGE_HOOK}" << 'HOOKEOF' +#!/usr/bin/env bash +# Hook called by the libkrunfw build after extracting the kernel source. +# Merges the OpenShell kernel config fragment into .config. +set -euo pipefail + +KERNEL_DIR="$1" +FRAGMENT="$2" + +if [ ! -d "$KERNEL_DIR" ]; then + echo "ERROR: kernel source dir not found: $KERNEL_DIR" >&2 + exit 1 +fi + +if [ ! -f "$FRAGMENT" ]; then + echo "ERROR: config fragment not found: $FRAGMENT" >&2 + exit 1 +fi + +cd "$KERNEL_DIR" + +if [ -f scripts/kconfig/merge_config.sh ]; then + echo " Using kernel merge_config.sh" + KCONFIG_CONFIG=.config ./scripts/kconfig/merge_config.sh -m .config "$FRAGMENT" +else + echo " Appending fragment and running olddefconfig" + cat "$FRAGMENT" >> .config +fi + +# Detect the kernel ARCH value from the host (or krunvm guest) architecture. +case "$(uname -m)" in + aarch64) KARCH="arm64" ;; + x86_64) KARCH="x86_64" ;; + *) KARCH="$(uname -m)" ;; +esac +echo " Kernel ARCH: ${KARCH}" +make ARCH="${KARCH}" olddefconfig + +# Verify critical configs are set +REQUIRED=( + CONFIG_BRIDGE + CONFIG_BRIDGE_NETFILTER + CONFIG_NETFILTER + CONFIG_NF_CONNTRACK + CONFIG_NF_NAT + CONFIG_IP_NF_IPTABLES + CONFIG_IP_NF_FILTER + CONFIG_IP_NF_NAT + CONFIG_NF_TABLES + CONFIG_NFT_NUMGEN + CONFIG_NFT_FIB_IPV4 + CONFIG_NFT_FIB_IPV6 + CONFIG_NFT_CT + CONFIG_NFT_NAT + CONFIG_NFT_MASQ + CONFIG_VETH + CONFIG_NET_NS +) + +MISSING=() +for cfg in "${REQUIRED[@]}"; do + if ! grep -q "^${cfg}=[ym]" .config; then + MISSING+=("$cfg") + fi +done + +if [ ${#MISSING[@]} -gt 0 ]; then + echo "ERROR: Required kernel configs not set after merge:" >&2 + printf " %s\n" "${MISSING[@]}" >&2 + exit 1 +fi + +echo " All required kernel configs verified." +HOOKEOF +chmod +x "${MERGE_HOOK}" + +# ── Build libkrunfw ──────────────────────────────────────────────────── + +echo "==> Building libkrunfw (this may take 10-30 minutes)..." + +cd "${LIBKRUNFW_DIR}" + +# Detect macOS vs Linux and pick the right library extension / target +if [ "$(uname -s)" = "Darwin" ]; then + LIB_EXT="dylib" +else + LIB_EXT="so" +fi + +# Detect the kernel source directory name from the Makefile +KERNEL_DIR_NAME=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' Makefile | head -1 | sed 's/KERNEL_VERSION *= *//') +if [ -z "$KERNEL_DIR_NAME" ]; then + echo "ERROR: Could not detect KERNEL_VERSION from Makefile" >&2 + exit 1 +fi +echo " Kernel source dir: ${KERNEL_DIR_NAME}" + +if [ "$(uname -s)" = "Darwin" ]; then + # On macOS, use krunvm to build the kernel inside a lightweight Linux VM. + # This matches the upstream libkrunfw build approach and avoids all the + # issues with Docker emulation and APFS filesystem limitations. + # + # Prerequisites: brew tap slp/krun && brew install krunvm + + if ! command -v krunvm &>/dev/null; then + echo "ERROR: krunvm is required to build the kernel on macOS" >&2 + echo " Install with: brew tap slp/krun && brew install krunvm" >&2 + exit 1 + fi + + echo "==> Building kernel inside krunvm (macOS detected)..." + + VM_NAME="libkrunfw-openshell" + + # Clean up any leftover VM from a previous failed run + krunvm delete "${VM_NAME}" 2>/dev/null || true + + # Copy the config fragment into the libkrunfw tree so the VM can see it. + # The merge hook (MERGE_HOOK) is already written there by the cat above. + cp -f "${KERNEL_CONFIG_FRAGMENT}" "${LIBKRUNFW_DIR}/openshell.kconfig" + + echo " Creating VM..." + # krunvm may print "The volume has been configured" on first use of a + # volume path and exit non-zero. Retry once if that happens. + if ! krunvm create fedora \ + --name "${VM_NAME}" \ + --cpus 4 \ + --mem 4096 \ + -v "${LIBKRUNFW_DIR}:/work" \ + -w /work; then + echo " Retrying VM creation..." + krunvm create fedora \ + --name "${VM_NAME}" \ + --cpus 4 \ + --mem 4096 \ + -v "${LIBKRUNFW_DIR}:/work" \ + -w /work + fi + + echo " Installing build dependencies..." + krunvm start "${VM_NAME}" /usr/bin/dnf -- install -y \ + 'dnf-command(builddep)' python3-pyelftools + + krunvm start "${VM_NAME}" /usr/bin/dnf -- builddep -y kernel + + # Step 1: prepare kernel sources (download, extract, patch, base config) + echo " Preparing kernel sources..." + krunvm start "${VM_NAME}" /usr/bin/make -- "${KERNEL_DIR_NAME}" + + # Step 2: merge the OpenShell config fragment + echo " Merging OpenShell kernel config fragment..." + krunvm start "${VM_NAME}" /usr/bin/bash -- \ + /work/openshell-kconfig-hook.sh "/work/${KERNEL_DIR_NAME}" /work/openshell.kconfig + + # Step 3: build the kernel and generate the C bundle + echo " Building kernel (this is the slow part)..." + krunvm start "${VM_NAME}" /usr/bin/make -- -j4 + + echo " Cleaning up VM..." + krunvm delete "${VM_NAME}" + + # Clean up temp files from the libkrunfw tree + rm -f "${LIBKRUNFW_DIR}/openshell.kconfig" + + if [ ! -f "${LIBKRUNFW_DIR}/kernel.c" ]; then + echo "ERROR: kernel.c was not produced — build failed" >&2 + exit 1 + fi + + # Compile the shared library on the host (uses host cc for a .dylib) + echo "==> Compiling libkrunfw.dylib on host..." + ABI_VERSION=$(grep -oE 'ABI_VERSION\s*=\s*[0-9]+' Makefile | head -1 | sed 's/[^0-9]//g') + cc -fPIC -DABI_VERSION="${ABI_VERSION}" -shared -o "libkrunfw.${ABI_VERSION}.dylib" kernel.c +else + # On Linux, we can do everything natively in three steps: + + # Step 1: prepare kernel sources + echo " Preparing kernel sources..." + make "${KERNEL_DIR_NAME}" + + # Step 2: merge config fragment + echo "==> Merging OpenShell kernel config fragment..." + bash "${MERGE_HOOK}" "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}" "${KERNEL_CONFIG_FRAGMENT}" + + # Step 3: build the kernel and shared library + make -j"$(nproc)" "$(grep -oE 'KRUNFW_BINARY_Linux\s*=\s*\S+' Makefile | head -1 | sed 's/[^=]*= *//')" || \ + make -j"$(nproc)" libkrunfw.so +fi + +# ── Stage output artifacts ────────────────────────────────────────────── + +echo "==> Staging artifacts..." +mkdir -p "${OUTPUT_DIR}" + +# Find the built library — check versioned names (e.g. libkrunfw.5.dylib) first +BUILT_LIB="" +for candidate in \ + "${LIBKRUNFW_DIR}"/libkrunfw*.${LIB_EXT} \ + "${LIBKRUNFW_DIR}/libkrunfw.${LIB_EXT}" \ + "${LIBKRUNFW_DIR}/target/release/libkrunfw.${LIB_EXT}" \ + "${LIBKRUNFW_DIR}/build/libkrunfw.${LIB_EXT}"; do + if [ -f "$candidate" ]; then + BUILT_LIB="$candidate" + break + fi +done + +if [ -z "$BUILT_LIB" ]; then + echo "ERROR: Could not find built libkrunfw.${LIB_EXT}" >&2 + echo " Searched in ${LIBKRUNFW_DIR}/ for libkrunfw*.${LIB_EXT}" + exit 1 +fi + +echo " Found library: ${BUILT_LIB}" + +# Compute SHA-256 (shasum on macOS, sha256sum on Linux) +if command -v sha256sum &>/dev/null; then + ARTIFACT_HASH=$(sha256sum "${BUILT_LIB}" | cut -d' ' -f1) +else + ARTIFACT_HASH=$(shasum -a 256 "${BUILT_LIB}" | cut -d' ' -f1) +fi +ARTIFACT_HASH_SHORT="${ARTIFACT_HASH:0:12}" + +# Copy the library — always stage as libkrunfw.dylib / libkrunfw.so +# (the base name the runtime loader expects) plus the original name +cp "${BUILT_LIB}" "${OUTPUT_DIR}/libkrunfw.${LIB_EXT}" +BUILT_BASENAME="$(basename "${BUILT_LIB}")" +if [ "${BUILT_BASENAME}" != "libkrunfw.${LIB_EXT}" ]; then + cp "${BUILT_LIB}" "${OUTPUT_DIR}/${BUILT_BASENAME}" +fi + +# Copy the kernel config that was actually used (for reproducibility) +KERNEL_SRC_DIR="" +for candidate in \ + "${LIBKRUNFW_DIR}/linux-"* \ + "${LIBKRUNFW_DIR}/build/linux-"* \ + "${LIBKRUNFW_DIR}/kernel/linux-"*; do + if [ -d "$candidate" ] && [ -f "${candidate}/.config" ]; then + KERNEL_SRC_DIR="$candidate" + break + fi +done + +if [ -n "$KERNEL_SRC_DIR" ] && [ -f "${KERNEL_SRC_DIR}/.config" ]; then + cp "${KERNEL_SRC_DIR}/.config" "${OUTPUT_DIR}/kernel.config" +fi + +# Copy our fragment for reference +cp "${KERNEL_CONFIG_FRAGMENT}" "${OUTPUT_DIR}/openshell.kconfig" + +# ── Write provenance metadata ────────────────────────────────────────── + +cat > "${OUTPUT_DIR}/provenance.json" << EOF +{ + "artifact": "libkrunfw-custom", + "version": "0.1.0-openshell", + "build_timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", + "libkrunfw_repo": "${LIBKRUNFW_REPO}", + "libkrunfw_ref": "${LIBKRUNFW_REF}", + "libkrunfw_commit": "${LIBKRUNFW_COMMIT}", + "kernel_version": "${KERNEL_VERSION:-unknown}", + "kernel_config_fragment": "openshell.kconfig", + "artifact_sha256": "${ARTIFACT_HASH}", + "host_os": "$(uname -s)", + "host_arch": "$(uname -m)", + "builder": "build-custom-libkrunfw.sh" +} +EOF + +echo "" +echo "==> Build complete" +echo " Library: ${OUTPUT_DIR}/libkrunfw.${LIB_EXT}" +echo " SHA256: ${ARTIFACT_HASH_SHORT}..." +echo " Provenance: ${OUTPUT_DIR}/provenance.json" +echo " Commit: ${LIBKRUNFW_SHORT}" +echo "" +echo "To use this runtime:" +echo " export OPENSHELL_VM_RUNTIME_SOURCE_DIR=${OUTPUT_DIR}" +echo " mise run vm:build" diff --git a/crates/openshell-vm/runtime/kernel/openshell.kconfig b/crates/openshell-vm/runtime/kernel/openshell.kconfig new file mode 100644 index 000000000..cc219f50d --- /dev/null +++ b/crates/openshell-vm/runtime/kernel/openshell.kconfig @@ -0,0 +1,121 @@ +# Custom kernel config fragment for libkrunfw (OpenShell VM) +# +# This fragment is applied on top of libkrunfw's base kernel config +# to enable bridge CNI, netfilter/iptables, and conntrack support +# required for Kubernetes pod networking in the VM. +# +# Apply with: scripts/merge-kconfig.sh +# +# See also: check-vm-capabilities.sh for runtime verification. + +# ── Network Namespaces (required for pod isolation) ───────────────────── +CONFIG_NET_NS=y +CONFIG_NAMESPACES=y + +# ── Virtual Ethernet (veth pairs for pod networking) ──────────────────── +CONFIG_VETH=y + +# ── Linux Bridge (required for bridge CNI plugin) ────────────────────── +CONFIG_BRIDGE=y +CONFIG_BRIDGE_NETFILTER=y +CONFIG_BRIDGE_IGMP_SNOOPING=y + +# ── Netfilter framework ──────────────────────────────────────────────── +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_INGRESS=y +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_NETLINK_LOG=y + +# ── Connection tracking (required for NAT and kube-proxy) ────────────── +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y + +# ── Netfilter xtables match modules (required by kube-proxy & kubelet) ─ +# kube-proxy uses xt_conntrack for stateful rules and xt_comment for +# labeling chains. Without these, iptables fails with: +# "Couldn't load match 'conntrack': No such file or directory" +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y +CONFIG_NETFILTER_XT_MATCH_RECENT=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_TARGET_LOG=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y + +# ── NAT (required for service VIP / DNAT / SNAT) ────────────────────── +CONFIG_NF_NAT=y +CONFIG_NF_NAT_MASQUERADE_IPV4=y + +# ── iptables (CNI bridge masquerade + compat) ────────────────────────── +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_REJECT=y + +# ── nftables (kube-proxy nftables mode — primary proxy backend) ───────── +# kube-proxy nftables proxier requires: numgen (random LB), fib (local +# address detection), counter, ct, nat, masq, reject, limit, redir. +CONFIG_NF_TABLES=y +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_CT=y +CONFIG_NFT_NAT=y +CONFIG_NFT_MASQ=y +CONFIG_NFT_REJECT=y +CONFIG_NFT_COMPAT=y +CONFIG_NFT_NUMGEN=y +CONFIG_NFT_FIB_IPV4=y +CONFIG_NFT_FIB_IPV6=y +CONFIG_NFT_LIMIT=y +CONFIG_NFT_REDIR=y +CONFIG_NFT_TPROXY=y + +# ── IP forwarding and routing (required for pod-to-pod) ──────────────── +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_NET_IP_TUNNEL=y + +# ── IPVS (optional: kube-proxy IPVS mode) ───────────────────────────── +CONFIG_IP_VS=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_RR=y +CONFIG_IP_VS_WRR=y +CONFIG_IP_VS_SH=y +CONFIG_IP_VS_NFCT=y + +# ── Misc networking required by Kubernetes ───────────────────────────── +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_CGROUP_NET_CLASSID=y + +# ── Dummy interface (fallback networking) ────────────────────────────── +CONFIG_DUMMY=y + +# ── TUN/TAP (used by some CNI plugins) ──────────────────────────────── +CONFIG_TUN=y + +# ── Cgroups (already in base, ensure v2 is available) ────────────────── +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PIDS=y +CONFIG_MEMCG=y + +# ── Security features required by the sandbox runtime ─────────────────── +CONFIG_SECURITY_LANDLOCK=y +CONFIG_SECCOMP_FILTER=y diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh new file mode 100755 index 000000000..16a0a23de --- /dev/null +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -0,0 +1,763 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build a Ubuntu rootfs for the openshell-vm microVM. +# +# By default, produces a fully pre-initialized rootfs with k3s pre-installed, +# the OpenShell helm chart and manifests baked in, container images pre-loaded, +# AND a fully initialized k3s cluster state (database, TLS, images imported, +# all services deployed). On first VM boot, k3s resumes from this pre-baked +# state instead of cold-starting, achieving ~3-5s startup times. +# +# With --base, produces a lightweight rootfs (~200-300MB) with: +# - Base Ubuntu with k3s binary +# - OpenShell supervisor binary +# - Helm charts and Kubernetes manifests +# - NO pre-loaded container images (pulled on demand) +# - NO pre-initialized k3s state (cold start on first boot) +# First boot will be slower (~30-60s) as k3s initializes and pulls images. +# +# Supports aarch64 and x86_64 guest architectures. The target architecture +# is auto-detected from the host but can be overridden with --arch. +# +# Usage: +# ./build-rootfs.sh [--base] [--arch aarch64|x86_64] [output_dir] +# +# If output_dir is omitted, the rootfs is built under target/rootfs-build. +# +# Requires: Docker (or compatible container runtime), curl, helm +# Full mode (default) also requires: zstd, sqlite3, a built openshell-vm binary + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source pinned dependency versions (digests, checksums, commit SHAs). +# Environment variables override pins — see pins.env for details. +PINS_FILE="${SCRIPT_DIR}/../pins.env" +if [ -f "$PINS_FILE" ]; then + # shellcheck source=../pins.env + source "$PINS_FILE" +fi + +# ── Argument parsing ─────────────────────────────────────────────────── +BASE_ONLY=false +GUEST_ARCH="" +POSITIONAL_ARGS=() +while [[ $# -gt 0 ]]; do + case "$1" in + --base) + BASE_ONLY=true; shift ;; + --arch) + GUEST_ARCH="$2"; shift 2 ;; + *) + POSITIONAL_ARGS+=("$1"); shift ;; + esac +done + +# ── Architecture detection ───────────────────────────────────────────── +# Allow override via --arch flag; default to host architecture. +if [ -z "$GUEST_ARCH" ]; then + case "$(uname -m)" in + aarch64|arm64) GUEST_ARCH="aarch64" ;; + x86_64) GUEST_ARCH="x86_64" ;; + *) + echo "ERROR: Unsupported host architecture: $(uname -m)" >&2 + echo " Use --arch aarch64 or --arch x86_64 to override." >&2 + exit 1 + ;; + esac +fi + +case "$GUEST_ARCH" in + aarch64) + DOCKER_PLATFORM="linux/arm64" + K3S_BINARY_SUFFIX="-arm64" + K3S_CHECKSUM_VAR="K3S_ARM64_SHA256" + RUST_TARGET="aarch64-unknown-linux-gnu" + ;; + x86_64) + DOCKER_PLATFORM="linux/amd64" + K3S_BINARY_SUFFIX="" # x86_64 binary has no suffix + K3S_CHECKSUM_VAR="K3S_AMD64_SHA256" + RUST_TARGET="x86_64-unknown-linux-gnu" + ;; + *) + echo "ERROR: Unsupported guest architecture: ${GUEST_ARCH}" >&2 + echo " Supported: aarch64, x86_64" >&2 + exit 1 + ;; +esac + +# Project root (two levels up from crates/openshell-vm/scripts/) +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" +DEFAULT_ROOTFS="${PROJECT_ROOT}/target/rootfs-build" +ROOTFS_DIR="${POSITIONAL_ARGS[0]:-${DEFAULT_ROOTFS}}" +CONTAINER_NAME="krun-rootfs-builder" +BASE_IMAGE_TAG="krun-rootfs:openshell-vm" +# K3S_VERSION uses the semver "+" form for GitHub releases. +# The mise env may provide the Docker-tag form with "-" instead of "+"; +# normalise to "+" so the GitHub download URL works. +K3S_VERSION="${K3S_VERSION:-v1.35.2+k3s1}" +K3S_VERSION="${K3S_VERSION//-k3s/+k3s}" + +# Container images to pre-load into k3s (full mode only). +# AGENT_SANDBOX_IMAGE and COMMUNITY_SANDBOX_IMAGE are digest-pinned in pins.env. +# SERVER_IMAGE is intentionally unpinned (local dev artifact). +IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-openshell}" +IMAGE_TAG="${IMAGE_TAG:-dev}" +SERVER_IMAGE="${IMAGE_REPO_BASE}/gateway:${IMAGE_TAG}" + +# Cross-platform checksum helper +verify_checksum() { + local expected="$1" file="$2" + if command -v sha256sum &>/dev/null; then + echo "${expected} ${file}" | sha256sum -c - + else + echo "${expected} ${file}" | shasum -a 256 -c - + fi +} + +if [ "$BASE_ONLY" = true ]; then + echo "==> Building base openshell-vm rootfs" + echo " Guest arch: ${GUEST_ARCH}" + echo " k3s version: ${K3S_VERSION}" + echo " Output: ${ROOTFS_DIR}" + echo " Mode: base (no pre-loaded images, cold start)" +else + echo "==> Building openshell-vm rootfs" + echo " Guest arch: ${GUEST_ARCH}" + echo " k3s version: ${K3S_VERSION}" + echo " Images: ${SERVER_IMAGE}, ${COMMUNITY_SANDBOX_IMAGE}" + echo " Output: ${ROOTFS_DIR}" + echo " Mode: full (pre-loaded images, pre-initialized)" +fi +echo "" + +# ── Check for running VM ──────────────────────────────────────────────── +# If an openshell-vm is using this rootfs via virtio-fs, wiping the rootfs +# corrupts the VM's filesystem (e.g. /var disappears) causing cascading +# k3s failures. We use two checks: +# +# 1. flock: The Rust openshell-vm process holds an exclusive flock on the lock +# file for its entire lifetime. This is the primary guard — it works +# even if the state file was deleted, and the OS releases the lock +# automatically when the process dies (including SIGKILL). +# +# 2. State file: Fallback check for the PID in the state file. This +# catches VMs launched before the flock guard was added. + +VM_LOCK_FILE="$(dirname "${ROOTFS_DIR}")/$(basename "${ROOTFS_DIR}")-vm.lock" +if [ -f "${VM_LOCK_FILE}" ]; then + # Try to acquire the lock non-blocking. Use Python's fcntl.flock() + # because the `flock` CLI tool is not available on macOS. + if ! python3 -c " +import fcntl, os, sys +fd = os.open(sys.argv[1], os.O_RDONLY) +try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.flock(fd, fcntl.LOCK_UN) +except BlockingIOError: + sys.exit(1) +finally: + os.close(fd) +" "${VM_LOCK_FILE}" 2>/dev/null; then + HOLDER_PID=$(cat "${VM_LOCK_FILE}" 2>/dev/null | tr -d '[:space:]') + echo "" + echo "ERROR: An openshell-vm (pid ${HOLDER_PID:-unknown}) holds a lock on this rootfs." + echo " Wiping the rootfs while the VM is running will corrupt its" + echo " filesystem and cause k3s failures." + echo "" + echo " Stop the VM first: kill ${HOLDER_PID:-}" + echo " Then re-run this script." + echo "" + exit 1 + fi +fi + +VM_STATE_FILE="$(dirname "${ROOTFS_DIR}")/$(basename "${ROOTFS_DIR}")-vm-state.json" +if [ -f "${VM_STATE_FILE}" ]; then + VM_PID=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1]))['pid'])" "${VM_STATE_FILE}" 2>/dev/null || echo "") + if [ -n "${VM_PID}" ] && kill -0 "${VM_PID}" 2>/dev/null; then + echo "" + echo "ERROR: An openshell-vm is running (pid ${VM_PID}) using this rootfs." + echo " Wiping the rootfs while the VM is running will corrupt its" + echo " filesystem and cause k3s failures." + echo "" + echo " Stop the VM first: kill ${VM_PID}" + echo " Then re-run this script." + echo "" + exit 1 + else + # Stale state file — VM is no longer running. Clean it up. + rm -f "${VM_STATE_FILE}" + fi +fi + +# ── Download k3s binary (outside Docker — much faster) ───────────────── + +K3S_BIN="/tmp/k3s-${GUEST_ARCH}-${K3S_VERSION}" +if [ -f "${K3S_BIN}" ]; then + echo "==> Using cached k3s binary: ${K3S_BIN}" +else + echo "==> Downloading k3s ${K3S_VERSION} for ${GUEST_ARCH}..." + curl -fSL "https://github.com/k3s-io/k3s/releases/download/${K3S_VERSION}/k3s${K3S_BINARY_SUFFIX}" \ + -o "${K3S_BIN}" + chmod +x "${K3S_BIN}" +fi + +# Verify k3s binary integrity. +K3S_CHECKSUM="${!K3S_CHECKSUM_VAR:-}" +if [ -n "${K3S_CHECKSUM}" ]; then + echo "==> Verifying k3s binary checksum..." + verify_checksum "${K3S_CHECKSUM}" "${K3S_BIN}" +else + echo "WARNING: ${K3S_CHECKSUM_VAR} not set, skipping checksum verification" +fi + +# ── Build base image with dependencies ───────────────────────────────── + +# Clean up any previous run +docker rm -f "${CONTAINER_NAME}" 2>/dev/null || true + +echo "==> Building base image..." +docker build --platform "${DOCKER_PLATFORM}" -t "${BASE_IMAGE_TAG}" \ + --build-arg "BASE_IMAGE=${VM_BASE_IMAGE}" -f - . <<'DOCKERFILE' +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + e2fsprogs \ + iptables \ + iproute2 \ + python3 \ + busybox-static \ + sqlite3 \ + util-linux \ + zstd \ + && rm -rf /var/lib/apt/lists/* +# busybox-static provides udhcpc for DHCP inside the VM. +RUN mkdir -p /usr/share/udhcpc && \ + ln -sf /bin/busybox /sbin/udhcpc +RUN mkdir -p /var/lib/rancher/k3s /etc/rancher/k3s +DOCKERFILE + +# Create a container and export the filesystem +echo "==> Creating container..." +docker create --platform "${DOCKER_PLATFORM}" --name "${CONTAINER_NAME}" "${BASE_IMAGE_TAG}" /bin/true + +echo "==> Exporting filesystem..." +# Previous builds may leave overlayfs work/ dirs with permissions that +# prevent rm on macOS. Force-fix permissions before removing. +if [ -d "${ROOTFS_DIR}" ]; then + chmod -R u+rwx "${ROOTFS_DIR}" 2>/dev/null || true + rm -rf "${ROOTFS_DIR}" +fi +mkdir -p "${ROOTFS_DIR}" +docker export "${CONTAINER_NAME}" | tar -C "${ROOTFS_DIR}" -xf - + +docker rm "${CONTAINER_NAME}" + +# ── Inject k3s binary ──────────────────────────────────────────────── + +echo "==> Injecting k3s binary..." +cp "${K3S_BIN}" "${ROOTFS_DIR}/usr/local/bin/k3s" +chmod +x "${ROOTFS_DIR}/usr/local/bin/k3s" +ln -sf /usr/local/bin/k3s "${ROOTFS_DIR}/usr/local/bin/kubectl" + +# k3s self-extracts runtime binaries (containerd, runc, CNI plugins, +# coreutils, etc.) into a versioned data directory the first time it +# runs. On the pre-initialized rootfs these were extracted during the +# Docker build or VM pre-init phase. docker export and macOS virtio-fs +# can strip execute bits from Linux ELF binaries, so fix them here. +echo " Fixing execute permissions on k3s data binaries..." +chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/* 2>/dev/null || true +chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/aux/* 2>/dev/null || true + +# ── Inject scripts ──────────────────────────────────────────────────── + +echo "==> Injecting scripts..." +mkdir -p "${ROOTFS_DIR}/srv" +cp "${SCRIPT_DIR}/openshell-vm-init.sh" "${ROOTFS_DIR}/srv/openshell-vm-init.sh" +chmod +x "${ROOTFS_DIR}/srv/openshell-vm-init.sh" + +# Inject VM capability checker for runtime diagnostics. +cp "${SCRIPT_DIR}/check-vm-capabilities.sh" "${ROOTFS_DIR}/srv/check-vm-capabilities.sh" +chmod +x "${ROOTFS_DIR}/srv/check-vm-capabilities.sh" + +# Inject the openshell-vm exec agent used by `openshell-vm exec`. +cp "${SCRIPT_DIR}/openshell-vm-exec-agent.py" "${ROOTFS_DIR}/srv/openshell-vm-exec-agent.py" +chmod +x "${ROOTFS_DIR}/srv/openshell-vm-exec-agent.py" + +# ── Build and inject openshell-sandbox supervisor binary ───────────── +# The supervisor binary runs inside every sandbox pod. It is side-loaded +# from the node filesystem via a read-only hostPath volume mount at +# /opt/openshell/bin. In the Docker-based gateway this is built in the +# Dockerfile.cluster supervisor-builder stage; here we cross-compile +# from the host using cargo-zigbuild. + +SUPERVISOR_TARGET="${RUST_TARGET}" +SUPERVISOR_BIN="${PROJECT_ROOT}/target/${SUPERVISOR_TARGET}/release/openshell-sandbox" + +echo "==> Building openshell-sandbox supervisor binary (${SUPERVISOR_TARGET})..." +if ! command -v cargo-zigbuild >/dev/null 2>&1; then + echo "ERROR: cargo-zigbuild is not installed." + echo " Install it with: cargo install cargo-zigbuild" + echo " Also requires: zig (brew install zig)" + exit 1 +fi + +cargo zigbuild --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" \ + --manifest-path "${PROJECT_ROOT}/Cargo.toml" 2>&1 | tail -5 + +if [ ! -f "${SUPERVISOR_BIN}" ]; then + echo "ERROR: supervisor binary not found at ${SUPERVISOR_BIN}" + exit 1 +fi + +echo " Injecting supervisor binary into rootfs..." +mkdir -p "${ROOTFS_DIR}/opt/openshell/bin" +cp "${SUPERVISOR_BIN}" "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" +chmod +x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" +echo " Size: $(du -h "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" | cut -f1)" + +# ── Package and inject helm chart ──────────────────────────────────── + +HELM_CHART_DIR="${PROJECT_ROOT}/deploy/helm/openshell" +CHART_DEST="${ROOTFS_DIR}/var/lib/rancher/k3s/server/static/charts" + +if [ -d "${HELM_CHART_DIR}" ]; then + echo "==> Packaging helm chart..." + mkdir -p "${CHART_DEST}" + helm package "${HELM_CHART_DIR}" -d "${CHART_DEST}" + echo " $(ls "${CHART_DEST}"/*.tgz 2>/dev/null | xargs -I{} basename {})" + # Also stage to /opt/openshell/charts/ so the init script can + # restore them after a --reset wipes server/static/charts/. + mkdir -p "${ROOTFS_DIR}/opt/openshell/charts" + cp "${CHART_DEST}"/*.tgz "${ROOTFS_DIR}/opt/openshell/charts/" +else + echo "WARNING: Helm chart not found at ${HELM_CHART_DIR}, skipping" +fi + +# ── Inject Kubernetes manifests ────────────────────────────────────── +# These are copied to /opt/openshell/manifests/ (staging). openshell-vm-init.sh +# moves them to /var/lib/rancher/k3s/server/manifests/ at boot so the +# k3s Helm Controller auto-deploys them. + +MANIFEST_SRC="${PROJECT_ROOT}/deploy/kube/manifests" +MANIFEST_DEST="${ROOTFS_DIR}/opt/openshell/manifests" + +echo "==> Injecting Kubernetes manifests..." +mkdir -p "${MANIFEST_DEST}" + +for manifest in openshell-helmchart.yaml agent-sandbox.yaml; do + if [ -f "${MANIFEST_SRC}/${manifest}" ]; then + cp "${MANIFEST_SRC}/${manifest}" "${MANIFEST_DEST}/" + echo " ${manifest}" + else + echo "WARNING: ${manifest} not found in ${MANIFEST_SRC}" + fi +done + +# ── Base mode: mark rootfs type and skip pre-loading ─────────────────── + +if [ "$BASE_ONLY" = true ]; then + # k3s expects this directory to exist for airgap image loading. + mkdir -p "${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images" + + # Mark as base (not pre-initialized). The init script checks for + # this file to determine if cold start is expected. + echo "base" > "${ROOTFS_DIR}/opt/openshell/.rootfs-type" + + # ── Verify ───────────────────────────────────────────────────────── + if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then + echo "ERROR: k3s binary not found in rootfs." + exit 1 + fi + + if [ ! -x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" ]; then + echo "ERROR: openshell-sandbox supervisor binary not found in rootfs." + exit 1 + fi + + echo "" + echo "==> Base rootfs ready at: ${ROOTFS_DIR}" + echo " Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)" + echo " Type: base (cold start, images pulled on demand)" + echo "" + echo "Note: First boot will take ~30-60s as k3s initializes." + echo " Container images will be pulled from registries on first use." + exit 0 +fi + +# ══════════════════════════════════════════════════════════════════════════ +# Full mode: pre-load images and pre-initialize k3s cluster state +# ══════════════════════════════════════════════════════════════════════════ + +# ── Pre-load container images ──────────────────────────────────────── +# Pull images for the target architecture and save as tarballs in the +# k3s airgap images directory. k3s auto-imports from +# /var/lib/rancher/k3s/agent/images/ on startup, so no internet access +# is needed at boot time. +# +# Tarballs are cached in a persistent directory outside the rootfs so +# they survive rebuilds. This avoids re-pulling and re-saving ~1 GiB +# of images each time. + +IMAGES_DIR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images" +IMAGE_CACHE_DIR="${XDG_CACHE_HOME:-${HOME}/.cache}/openshell/openshell-vm/images" +mkdir -p "${IMAGES_DIR}" "${IMAGE_CACHE_DIR}" + +echo "==> Pre-loading container images (${GUEST_ARCH})..." + +pull_and_save() { + local image="$1" + local output="$2" + local cache="${IMAGE_CACHE_DIR}/$(basename "${output}")" + + # Use cached tarball if available. + if [ -f "${cache}" ]; then + echo " cached: $(basename "${output}")" + cp "${cache}" "${output}" + return 0 + fi + + # Try to pull; if the registry is unavailable, fall back to the + # local Docker image cache (image may exist from a previous pull). + echo " pulling: ${image}..." + if ! docker pull --platform "${DOCKER_PLATFORM}" "${image}" --quiet 2>/dev/null; then + echo " pull failed, checking local Docker cache..." + if ! docker image inspect "${image}" >/dev/null 2>&1; then + echo "ERROR: image ${image} not available locally or from registry" + exit 1 + fi + echo " using locally cached image" + fi + + echo " saving: $(basename "${output}")..." + # Pipe through zstd for faster decompression and smaller tarballs. + # k3s auto-imports .tar.zst files from the airgap images directory. + # -T0 uses all CPU cores; -3 is a good speed/ratio tradeoff. + docker save "${image}" | zstd -T0 -3 -o "${output}" + # Cache for next rebuild. + cp "${output}" "${cache}" +} + +pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/openshell-server.tar.zst" +pull_and_save "${AGENT_SANDBOX_IMAGE}" "${IMAGES_DIR}/agent-sandbox-controller.tar.zst" +pull_and_save "${COMMUNITY_SANDBOX_IMAGE}" "${IMAGES_DIR}/community-sandbox-base.tar.zst" + +# ── Pre-initialize k3s cluster state ───────────────────────────────── +# Boot k3s inside a Docker container using the rootfs we just built. +# Wait for it to fully initialize (import images, deploy manifests, +# create database), then capture the state back into the rootfs. +# +# This eliminates cold-start latency: on VM boot, k3s finds existing +# state and resumes in ~3-5 seconds instead of 30-60s. + +echo "" +echo "==> Pre-initializing k3s cluster state..." +echo " This boots k3s in a container, waits for full readiness," +echo " then captures the initialized state into the rootfs." + +# Patch the HelmChart manifest for the init container (same patches +# openshell-vm-init.sh applies at runtime). +INIT_MANIFESTS="${ROOTFS_DIR}/var/lib/rancher/k3s/server/manifests" +mkdir -p "${INIT_MANIFESTS}" + +# Copy manifests from staging to the k3s manifest directory. +for manifest in "${MANIFEST_DEST}"/*.yaml; do + [ -f "$manifest" ] || continue + cp "$manifest" "${INIT_MANIFESTS}/" +done + +# Patch HelmChart for local images and VM settings. +HELMCHART="${INIT_MANIFESTS}/openshell-helmchart.yaml" +if [ -f "$HELMCHART" ]; then + # Use local images — explicitly imported into containerd. + sed -i '' 's|__IMAGE_PULL_POLICY__|IfNotPresent|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__IMAGE_PULL_POLICY__|IfNotPresent|g' "$HELMCHART" + sed -i '' 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g' "$HELMCHART" + sed -i '' 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" + # Use the locally imported image references. + sed -i '' -E "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|" "$HELMCHART" 2>/dev/null \ + || sed -i -E "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|" "$HELMCHART" + sed -i '' -E "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART" 2>/dev/null \ + || sed -i -E "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART" + # Clear SSH gateway placeholders. + sed -i '' 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" + sed -i '' 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" + sed -i '' 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART" + sed -i '' 's|__DISABLE_TLS__|false|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__DISABLE_TLS__|false|g' "$HELMCHART" + sed -i '' 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART" + sed -i '' '/__CHART_CHECKSUM__/d' "$HELMCHART" 2>/dev/null \ + || sed -i '/__CHART_CHECKSUM__/d' "$HELMCHART" +fi + +# Patch agent-sandbox manifest for VM networking constraints. +AGENT_MANIFEST="${INIT_MANIFESTS}/agent-sandbox.yaml" +if [ -f "$AGENT_MANIFEST" ]; then + # Keep agent-sandbox on pod networking to avoid host port clashes. + # Point in-cluster client traffic at the API server node IP because + # kube-proxy is disabled in VM mode. + sed -i '' '/hostNetwork: true/d' "$AGENT_MANIFEST" 2>/dev/null \ + || sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" + sed -i '' '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" 2>/dev/null \ + || sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" + sed -i '' 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ + args:\ + - -metrics-bind-address=:8082\ + env:\ + - name: KUBERNETES_SERVICE_HOST\ + value: 192.168.127.2\ + - name: KUBERNETES_SERVICE_PORT\ + value: "6443"|g' "$AGENT_MANIFEST" 2>/dev/null \ + || sed -i 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ + args:\ + - -metrics-bind-address=:8082\ + env:\ + - name: KUBERNETES_SERVICE_HOST\ + value: 192.168.127.2\ + - name: KUBERNETES_SERVICE_PORT\ + value: "6443"|g' "$AGENT_MANIFEST" + if grep -q 'hostNetwork: true' "$AGENT_MANIFEST" \ + || grep -q 'ClusterFirstWithHostNet' "$AGENT_MANIFEST" \ + || ! grep -q 'KUBERNETES_SERVICE_HOST' "$AGENT_MANIFEST" \ + || ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then + echo "ERROR: failed to patch agent-sandbox manifest for VM networking constraints: $AGENT_MANIFEST" >&2 + exit 1 + fi +fi + +# local-path-provisioner (deployed by k3s from local-storage.yaml) provides +# PVC storage for sandbox workspace volumes. It requires CNI bridge +# networking, which is now available in the VM kernel. + +# ── Pre-initialize using the actual libkrun VM ────────────────────────── +# Boot the real VM with the rootfs we just built. This uses the same +# kernel, networking, and kube-proxy config as production — eliminating +# Docker IP mismatches, snapshotter mismatches, and the Docker volume +# copy-back dance. The VM writes state directly into the rootfs via +# virtio-fs. +# +# Requirements: the openshell-vm binary must be built and codesigned. +# mise run vm:build handles this. + +GATEWAY_BIN="${PROJECT_ROOT}/target/debug/openshell-vm" +RUNTIME_DIR="${PROJECT_ROOT}/target/debug/openshell-vm.runtime" + +if [ ! -x "${GATEWAY_BIN}" ]; then + echo "ERROR: openshell-vm binary not found at ${GATEWAY_BIN}" + echo " Run: mise run vm:build" + exit 1 +fi + +if [ ! -d "${RUNTIME_DIR}" ]; then + echo "ERROR: VM runtime bundle not found at ${RUNTIME_DIR}" + echo " Run: mise run vm:build" + exit 1 +fi + +# Helper: run a command inside the VM via the exec agent. +vm_exec() { + if [ "$(uname -s)" = "Darwin" ]; then + DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}" \ + "${GATEWAY_BIN}" --rootfs "${ROOTFS_DIR}" exec -- "$@" 2>&1 + else + LD_LIBRARY_PATH="${RUNTIME_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" \ + "${GATEWAY_BIN}" --rootfs "${ROOTFS_DIR}" exec -- "$@" 2>&1 + fi +} + +# Ensure no stale VM is using this rootfs. +echo " Starting VM for pre-initialization..." +if [ "$(uname -s)" = "Darwin" ]; then + export DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}" +else + export LD_LIBRARY_PATH="${RUNTIME_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" +fi +# Pre-initialize directly on virtio-fs. Runtime boots attach a separate +# block-backed state disk and seed it from the rootfs on first launch. +OPENSHELL_VM_DISABLE_STATE_DISK=1 "${GATEWAY_BIN}" --rootfs "${ROOTFS_DIR}" --reset & +VM_PID=$! + +# Ensure the VM is cleaned up on script exit. +cleanup_vm() { + if kill -0 "${VM_PID}" 2>/dev/null; then + echo " Stopping VM (pid ${VM_PID})..." + kill "${VM_PID}" 2>/dev/null || true + wait "${VM_PID}" 2>/dev/null || true + fi +} +trap cleanup_vm EXIT + +# Wait for the exec agent to become reachable. +echo " Waiting for VM exec agent..." +for i in $(seq 1 120); do + if vm_exec true >/dev/null 2>&1; then + echo " Exec agent ready (${i}s)" + break + fi + if [ "$i" -eq 120 ]; then + echo "ERROR: VM exec agent did not become reachable in 120s" + exit 1 + fi + sleep 1 +done + +# Wait for containerd to be ready. +echo " Waiting for containerd..." +for i in $(seq 1 60); do + if vm_exec k3s ctr version >/dev/null 2>&1; then + echo " Containerd ready (${i}s)" + break + fi + if [ "$i" -eq 60 ]; then + echo "ERROR: containerd did not become ready in 60s" + exit 1 + fi + sleep 1 +done + +# Wait for the openshell namespace (Helm controller creates it). +echo " Waiting for openshell namespace..." +for i in $(seq 1 180); do + if vm_exec kubectl get namespace openshell -o name 2>/dev/null | grep -q openshell; then + echo " Namespace ready (${i}s)" + break + fi + if [ "$i" -eq 180 ]; then + echo "ERROR: openshell namespace did not appear in 180s" + exit 1 + fi + sleep 1 +done + +# Wait for the openshell StatefulSet to have a ready replica. +# The VM init script generates PKI and writes TLS secrets manifests +# automatically — no host-side PKI generation needed. +echo " Waiting for openshell pod to be ready..." +for i in $(seq 1 180); do + ready=$(vm_exec kubectl -n openshell get statefulset openshell \ + -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") + if [ "$ready" = "1" ]; then + echo " OpenShell pod ready (${i}s)" + break + fi + if [ "$i" -eq 180 ]; then + echo "WARNING: openshell pod not ready after 180s, continuing anyway" + vm_exec kubectl -n openshell get pods 2>/dev/null | sed 's/^/ /' || true + break + fi + sleep 1 +done + +# Pre-unpack container images so the overlayfs snapshotter has ready-to-use +# snapshots on first boot. The snapshotter now runs directly on virtio-fs, +# so these unpacked layers persist across VM restarts — eliminating the +# per-boot layer extraction that previously added ~3-5s per container. +echo " Pre-unpacking container images..." +for img in \ + "ghcr.io/nvidia/openshell-community/sandboxes/base:latest" \ + "ghcr.io/nvidia/openshell/gateway:latest"; do + if vm_exec k3s ctr -n k8s.io images ls -q 2>/dev/null | grep -qF "$img"; then + echo " unpacking: $img" + vm_exec k3s ctr -n k8s.io run --rm "$img" "pre-unpack-$(date +%s)" true 2>/dev/null || true + fi +done +echo " Image pre-unpack complete." + +# Stop the VM so the kine SQLite DB is flushed. +echo " Stopping VM..." +kill "${VM_PID}" 2>/dev/null || true +wait "${VM_PID}" 2>/dev/null || true + +# Surgically clean the kine SQLite DB. Runtime objects (pods, events, +# leases) created during pre-initialization would cause the VM's kubelet +# to reconcile against an empty containerd on first real boot. +# +# NOTE: This is build-time cleanup only — it produces a clean rootfs +# image. At runtime, state.db is preserved across VM restarts so that +# pods and other cluster objects persist. The init script +# (openshell-vm-init.sh) handles stale bootstrap lock cleanup via +# sqlite3, and the host-side Rust code (exec.rs) handles actual DB +# corruption by removing the file. +echo " Cleaning runtime objects from kine DB..." +DB="${ROOTFS_DIR}/var/lib/rancher/k3s/server/db/state.db" +if [ -f "$DB" ]; then + echo " Before: $(sqlite3 "$DB" "SELECT COUNT(*) FROM kine;") kine records" + sqlite3 "$DB" <<'EOSQL' +DELETE FROM kine WHERE name LIKE '/registry/pods/%'; +DELETE FROM kine WHERE name LIKE '/registry/events/%'; +DELETE FROM kine WHERE name LIKE '/registry/leases/%'; +DELETE FROM kine WHERE name LIKE '/registry/endpointslices/%'; +DELETE FROM kine WHERE name LIKE '/registry/masterleases/%'; +PRAGMA wal_checkpoint(TRUNCATE); +VACUUM; +EOSQL + echo " After: $(sqlite3 "$DB" "SELECT COUNT(*) FROM kine;") kine records" +else + echo "WARNING: state.db not found at ${DB}" +fi + +# Clean up runtime artifacts that shouldn't persist. +echo " Cleaning runtime artifacts..." +rm -rf "${ROOTFS_DIR}/var/lib/rancher/k3s/server/tls/temporary-certs" 2>/dev/null || true +rm -f "${ROOTFS_DIR}/var/lib/rancher/k3s/server/kine.sock" 2>/dev/null || true +find "${ROOTFS_DIR}/var/lib/rancher/k3s" -name '*.sock' -delete 2>/dev/null || true +find "${ROOTFS_DIR}/run" -name '*.sock' -delete 2>/dev/null || true + +# Write sentinel file so openshell-vm-init.sh and the host-side bootstrap +# know this rootfs has pre-initialized state. +echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "${ROOTFS_DIR}/opt/openshell/.initialized" + +echo " Pre-initialization complete." + +# ── Verify ──────────────────────────────────────────────────────────── + +if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then + echo "ERROR: k3s binary not found in rootfs. Something went wrong." + exit 1 +fi + +if [ ! -f "${ROOTFS_DIR}/opt/openshell/.initialized" ]; then + echo "WARNING: Pre-initialization sentinel not found. Cold starts will be slow." +fi + +if [ ! -x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" ]; then + echo "ERROR: openshell-sandbox supervisor binary not found in rootfs." + echo " Sandbox pods will fail with CreateContainerError." + exit 1 +fi + +echo "" +echo "==> Rootfs ready at: ${ROOTFS_DIR}" +echo " Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)" +echo " Pre-initialized: $(cat "${ROOTFS_DIR}/opt/openshell/.initialized" 2>/dev/null || echo 'no')" + +# Show k3s data size +K3S_DATA="${ROOTFS_DIR}/var/lib/rancher/k3s" +if [ -d "${K3S_DATA}" ]; then + echo " k3s state: $(du -sh "${K3S_DATA}" | cut -f1)" +fi + +# PKI is generated at first VM boot by the init script — not baked. + +# Show supervisor binary +if [ -x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" ]; then + echo " Supervisor: $(du -h "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" | cut -f1)" +fi + +echo "" +echo "Next steps:" +echo " 1. Run: openshell-vm" +echo " Expected startup time: ~3-5 seconds (pre-initialized)" diff --git a/crates/openshell-vm/scripts/check-vm-capabilities.sh b/crates/openshell-vm/scripts/check-vm-capabilities.sh new file mode 100755 index 000000000..2e758f5e0 --- /dev/null +++ b/crates/openshell-vm/scripts/check-vm-capabilities.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# VM Kernel Capability Checker +# +# Runs inside the guest VM (or a container with the same rootfs) to +# verify that the kernel has the capabilities required for bridge CNI +# networking, kube-proxy, and Kubernetes pod networking. +# +# Usage: +# ./check-vm-capabilities.sh [--json] +# +# Exit codes: +# 0 = all required capabilities present +# 1 = one or more required capabilities missing +# 2 = script error + +set -euo pipefail + +JSON_OUTPUT=false +if [ "${1:-}" = "--json" ]; then + JSON_OUTPUT=true +fi + +PASS=0 +FAIL=0 +WARN=0 +RESULTS=() + +# ── Helpers ───────────────────────────────────────────────────────────── + +check() { + local name="$1" + local category="$2" + local required="$3" # "required" or "optional" + local description="$4" + shift 4 + local cmd=("$@") + + if eval "${cmd[@]}" >/dev/null 2>&1; then + RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"pass\",\"required\":\"$required\",\"description\":\"$description\"}") + PASS=$((PASS + 1)) + if [ "$JSON_OUTPUT" = false ]; then + printf " ✓ %-40s %s\n" "$name" "$description" + fi + else + if [ "$required" = "required" ]; then + RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"fail\",\"required\":\"$required\",\"description\":\"$description\"}") + FAIL=$((FAIL + 1)) + if [ "$JSON_OUTPUT" = false ]; then + printf " ✗ %-40s %s (REQUIRED)\n" "$name" "$description" + fi + else + RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"warn\",\"required\":\"$required\",\"description\":\"$description\"}") + WARN=$((WARN + 1)) + if [ "$JSON_OUTPUT" = false ]; then + printf " ~ %-40s %s (optional)\n" "$name" "$description" + fi + fi + fi +} + +check_module() { + local module="$1" + # Check /proc/modules (loaded), /proc/config.gz (builtin), or /sys/module + if [ -d "/sys/module/$module" ]; then + return 0 + fi + if grep -q "^${module} " /proc/modules 2>/dev/null; then + return 0 + fi + # Check if compiled in via /proc/config.gz or /boot/config + local config_key + config_key="CONFIG_$(echo "$module" | tr '[:lower:]-' '[:upper:]_')" + if [ -f /proc/config.gz ]; then + if zcat /proc/config.gz 2>/dev/null | grep -q "^${config_key}=[ym]"; then + return 0 + fi + fi + return 1 +} + +# ── Capability Checks ────────────────────────────────────────────────── + +if [ "$JSON_OUTPUT" = false ]; then + echo "VM Kernel Capability Check" + echo "==========================" + echo "" + echo "Kernel: $(uname -r)" + echo "" +fi + +# --- Network Namespaces --- +if [ "$JSON_OUTPUT" = false ]; then echo "[Network Namespaces]"; fi + +check "net_namespace" "netns" "required" \ + "network namespace support (CONFIG_NET_NS)" \ + "test -d /proc/self/ns && ls /proc/self/ns/net" + +check "veth_pair" "netns" "required" \ + "veth pair creation (CONFIG_VETH)" \ + "ip link add _chk0 type veth peer name _chk1 && ip link del _chk0" + +# --- Linux Bridge --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Linux Bridge]"; fi + +check "bridge_module" "bridge" "required" \ + "bridge device support (CONFIG_BRIDGE)" \ + "ip link add _chkbr0 type bridge && ip link del _chkbr0" + +check "bridge_nf_call" "bridge" "required" \ + "bridge netfilter (CONFIG_BRIDGE_NETFILTER)" \ + "check_module bridge && test -f /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null || check_module br_netfilter" + +# --- Netfilter / iptables --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Netfilter / iptables]"; fi + +check "netfilter" "netfilter" "required" \ + "netfilter framework (CONFIG_NETFILTER)" \ + "check_module nf_conntrack || check_module ip_tables || test -d /proc/sys/net/netfilter" + +check "nf_conntrack" "netfilter" "required" \ + "connection tracking (CONFIG_NF_CONNTRACK)" \ + "check_module nf_conntrack" + +check "nf_nat" "netfilter" "required" \ + "NAT support (CONFIG_NF_NAT)" \ + "check_module nf_nat" + +check "iptables_filter" "netfilter" "required" \ + "iptables filter (CONFIG_IP_NF_FILTER)" \ + "check_module ip_tables || iptables -L -n >/dev/null 2>&1" + +check "iptables_nat" "netfilter" "required" \ + "iptables NAT (CONFIG_IP_NF_NAT)" \ + "check_module iptable_nat || iptables -t nat -L -n >/dev/null 2>&1" + +check "iptables_mangle" "netfilter" "optional" \ + "iptables mangle (CONFIG_IP_NF_MANGLE)" \ + "check_module iptable_mangle || iptables -t mangle -L -n >/dev/null 2>&1" + +check "nf_conntrack_netlink" "netfilter" "optional" \ + "conntrack netlink (CONFIG_NF_CT_NETLINK)" \ + "check_module nf_conntrack_netlink" + +check "nftables" "netfilter" "optional" \ + "nftables (CONFIG_NF_TABLES)" \ + "check_module nf_tables || nft list ruleset >/dev/null 2>&1" + +# --- IP Forwarding / Routing --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[IP Forwarding]"; fi + +check "ip_forward" "routing" "required" \ + "IP forwarding (sysctl)" \ + "test -f /proc/sys/net/ipv4/ip_forward" + +check "ip_route" "routing" "required" \ + "IP routing" \ + "ip route show >/dev/null 2>&1" + +# --- CNI Plugin Dependencies --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[CNI Plugins]"; fi + +check "cni_bridge_bin" "cni" "required" \ + "bridge CNI plugin binary" \ + "test -x /opt/cni/bin/bridge || find /var/lib/rancher/k3s/data -name bridge -type f 2>/dev/null | head -1 | grep -q ." + +check "cni_host_local_bin" "cni" "required" \ + "host-local IPAM plugin binary" \ + "test -x /opt/cni/bin/host-local || find /var/lib/rancher/k3s/data -name host-local -type f 2>/dev/null | head -1 | grep -q ." + +check "cni_loopback_bin" "cni" "required" \ + "loopback CNI plugin binary" \ + "test -x /opt/cni/bin/loopback || find /var/lib/rancher/k3s/data -name loopback -type f 2>/dev/null | head -1 | grep -q ." + +check "cni_portmap_bin" "cni" "optional" \ + "portmap CNI plugin binary (needs iptables)" \ + "test -x /opt/cni/bin/portmap || find /var/lib/rancher/k3s/data -name portmap -type f 2>/dev/null | head -1 | grep -q ." + +# --- Userspace Tools --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Userspace Tools]"; fi + +check "iptables_bin" "userspace" "required" \ + "iptables binary" \ + "command -v iptables" + +check "conntrack_bin" "userspace" "optional" \ + "conntrack binary" \ + "command -v conntrack" + +check "ip_bin" "userspace" "required" \ + "iproute2 (ip command)" \ + "command -v ip" + +# ── Summary ──────────────────────────────────────────────────────────── + +if [ "$JSON_OUTPUT" = true ]; then + echo "{" + echo " \"kernel\": \"$(uname -r)\"," + echo " \"timestamp\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"," + echo " \"pass\": $PASS," + echo " \"fail\": $FAIL," + echo " \"warn\": $WARN," + echo " \"results\": [" + local_first=true + for r in "${RESULTS[@]}"; do + if [ "$local_first" = true ]; then + local_first=false + else + echo "," + fi + printf " %s" "$r" + done + echo "" + echo " ]" + echo "}" +else + echo "" + echo "─────────────────────────────────────────" + printf "Results: %d passed, %d failed, %d warnings\n" "$PASS" "$FAIL" "$WARN" + + if [ "$FAIL" -gt 0 ]; then + echo "" + echo "FAIL: $FAIL required capabilities missing." + echo "The VM kernel needs to be rebuilt with the missing features." + echo "See: crates/openshell-vm/runtime/kernel/README.md" + exit 1 + else + echo "" + echo "PASS: All required capabilities present." + exit 0 + fi +fi diff --git a/crates/openshell-vm/scripts/openshell-vm-exec-agent.py b/crates/openshell-vm/scripts/openshell-vm-exec-agent.py new file mode 100644 index 000000000..d7ffd81df --- /dev/null +++ b/crates/openshell-vm/scripts/openshell-vm-exec-agent.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import base64 +import json +import os +import socket +import subprocess +import sys +import threading + + +PORT = 10777 + + +def recv_line(sock_file): + line = sock_file.readline() + if not line: + return None + return json.loads(line.decode("utf-8")) + + +def send_frame(sock_file, lock, frame): + data = (json.dumps(frame, separators=(",", ":")) + "\n").encode("utf-8") + with lock: + sock_file.write(data) + sock_file.flush() + + +def validate_env(env_items): + env = {} + for item in env_items: + if "=" not in item: + raise ValueError(f"invalid env item: {item}") + key, value = item.split("=", 1) + if not key or not (key[0] == "_" or key[0].isalpha()): + raise ValueError(f"invalid env key: {key}") + if not all(ch == "_" or ch.isalnum() for ch in key): + raise ValueError(f"invalid env key: {key}") + env[key] = value + return env + + +def stream_reader(pipe, frame_type, sock_file, lock): + try: + while True: + chunk = pipe.read(8192) + if not chunk: + break + send_frame( + sock_file, + lock, + {"type": frame_type, "data": base64.b64encode(chunk).decode("ascii")}, + ) + finally: + pipe.close() + + +def stdin_writer(proc, sock_file, sock, lock): + """Forward stdin frames from the client to the subprocess. + + When the client sends ``stdin_close`` (or the connection drops), we + close the subprocess's stdin pipe so it sees EOF. We must NOT + terminate the subprocess or shut down the socket here — the main + thread needs the process to finish naturally and the stdout/stderr + reader threads still need to flush their data back to the client. + """ + try: + while True: + frame = recv_line(sock_file) + if frame is None: + break + kind = frame.get("type") + if kind == "stdin": + payload = base64.b64decode(frame.get("data", "")) + if proc.stdin is not None: + proc.stdin.write(payload) + proc.stdin.flush() + elif kind == "stdin_close": + break + else: + send_frame( + sock_file, + lock, + {"type": "error", "message": f"unknown frame type: {kind}"}, + ) + break + except BrokenPipeError: + pass + finally: + try: + if proc.stdin is not None: + proc.stdin.close() + except OSError: + pass + + +def handle_client(conn): + sock_file = conn.makefile("rwb", buffering=0) + lock = threading.Lock() + try: + request = recv_line(sock_file) + if request is None: + return + + argv = request.get("argv") or ["sh"] + cwd = request.get("cwd") + env = os.environ.copy() + env.update(validate_env(request.get("env") or [])) + + proc = subprocess.Popen( + argv, + cwd=cwd or "/", + env=env, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout_thread = threading.Thread( + target=stream_reader, + args=(proc.stdout, "stdout", sock_file, lock), + daemon=True, + ) + stderr_thread = threading.Thread( + target=stream_reader, + args=(proc.stderr, "stderr", sock_file, lock), + daemon=True, + ) + stdin_thread = threading.Thread( + target=stdin_writer, args=(proc, sock_file, conn, lock), daemon=True + ) + + stdout_thread.start() + stderr_thread.start() + stdin_thread.start() + + code = proc.wait() + stdout_thread.join() + stderr_thread.join() + send_frame(sock_file, lock, {"type": "exit", "code": code}) + except Exception as exc: + try: + send_frame(sock_file, lock, {"type": "error", "message": str(exc)}) + except Exception: + pass + finally: + try: + sock_file.close() + except Exception: + pass + conn.close() + + +def main(): + if not hasattr(socket, "AF_VSOCK"): + print("AF_VSOCK is not available", file=sys.stderr) + return 1 + + server = socket.socket(socket.AF_VSOCK, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind((socket.VMADDR_CID_ANY, PORT)) + server.listen(16) + + while True: + conn, _addr = server.accept() + thread = threading.Thread(target=handle_client, args=(conn,), daemon=True) + thread.start() + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/crates/openshell-vm/scripts/openshell-vm-init.sh b/crates/openshell-vm/scripts/openshell-vm-init.sh new file mode 100755 index 000000000..1cb686a31 --- /dev/null +++ b/crates/openshell-vm/scripts/openshell-vm-init.sh @@ -0,0 +1,833 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Init script for the openshell-vm microVM. Runs as PID 1 inside the libkrun VM. +# +# Mounts essential virtual filesystems, configures networking, then execs +# k3s server. If the rootfs was pre-initialized by build-rootfs.sh (sentinel +# at /opt/openshell/.initialized), the full manifest setup is skipped and +# k3s resumes from its persisted state (~3-5s startup). + +set -euo pipefail + +BOOT_START=$(date +%s%3N 2>/dev/null || date +%s) + +ts() { + local now + now=$(date +%s%3N 2>/dev/null || date +%s) + local elapsed=$(( (now - BOOT_START) )) + printf "[%d.%03ds] %s\n" $((elapsed / 1000)) $((elapsed % 1000)) "$*" +} + +PRE_INITIALIZED=false +if [ -f /opt/openshell/.initialized ]; then + PRE_INITIALIZED=true + ts "pre-initialized rootfs detected (fast path)" +fi + +# ── Mount essential filesystems (parallel) ────────────────────────────── +# These are independent; mount them concurrently. + +mount -t proc proc /proc 2>/dev/null & +mount -t sysfs sysfs /sys 2>/dev/null & +mount -t tmpfs tmpfs /tmp 2>/dev/null & +mount -t tmpfs tmpfs /run 2>/dev/null & +mount -t devtmpfs devtmpfs /dev 2>/dev/null & +wait + +# These depend on /dev being mounted. +mkdir -p /dev/pts /dev/shm +mount -t devpts devpts /dev/pts 2>/dev/null & +mount -t tmpfs tmpfs /dev/shm 2>/dev/null & + +# cgroup2 (unified hierarchy) — required by k3s/containerd. +mkdir -p /sys/fs/cgroup +mount -t cgroup2 cgroup2 /sys/fs/cgroup 2>/dev/null & +wait + +ts "filesystems mounted" + +# ── Networking ────────────────────────────────────────────────────────── + +# Non-critical: hostname is cosmetic. +hostname openshell-vm 2>/dev/null || true + +# Ensure loopback is up (k3s binds to 127.0.0.1). +ip link set lo up 2>/dev/null || true + +# Detect whether we have a real network interface (gvproxy) or need a +# dummy interface (TSI / no networking). +if ip link show eth0 >/dev/null 2>&1; then + # gvproxy networking — bring up eth0 and get an IP via DHCP. + # gvproxy has a built-in DHCP server that assigns 192.168.127.2/24 + # with gateway 192.168.127.1 and configures ARP properly. + ts "detected eth0 (gvproxy networking)" + ip link set eth0 up 2>/dev/null || true + + # Use DHCP to get IP and configure routes. gvproxy's DHCP server + # handles ARP resolution which static config does not. + if command -v udhcpc >/dev/null 2>&1; then + # udhcpc needs a script to apply the lease. Use the busybox + # default script if available, otherwise write a minimal one. + UDHCPC_SCRIPT="/usr/share/udhcpc/default.script" + if [ ! -f "$UDHCPC_SCRIPT" ]; then + mkdir -p /usr/share/udhcpc + cat > "$UDHCPC_SCRIPT" << 'DHCP_SCRIPT' +#!/bin/sh +case "$1" in + bound|renew) + ip addr flush dev "$interface" + ip addr add "$ip/$mask" dev "$interface" + if [ -n "$router" ]; then + ip route add default via $router dev "$interface" + fi + if [ -n "$dns" ]; then + echo -n > /etc/resolv.conf + for d in $dns; do + echo "nameserver $d" >> /etc/resolv.conf + done + fi + ;; +esac +DHCP_SCRIPT + chmod +x "$UDHCPC_SCRIPT" + fi + # -f: stay in foreground, -q: quit after obtaining lease, + # -n: exit if no lease, -T 1: 1s between retries, -t 3: 3 retries + # -A 1: wait 1s before first retry (aggressive for local gvproxy) + if ! udhcpc -i eth0 -f -q -n -T 1 -t 3 -A 1 -s "$UDHCPC_SCRIPT" 2>&1; then + ts "WARNING: DHCP failed, falling back to static config" + ip addr add 192.168.127.2/24 dev eth0 2>/dev/null || true + ip route add default via 192.168.127.1 2>/dev/null || true + fi + else + # Fallback to static config if no DHCP client available. + ts "no DHCP client, using static config" + ip addr add 192.168.127.2/24 dev eth0 2>/dev/null || true + ip route add default via 192.168.127.1 2>/dev/null || true + fi + + # Ensure DNS is configured. DHCP should have set /etc/resolv.conf, + # but if it didn't (or static fallback was used), provide a default. + if [ ! -s /etc/resolv.conf ]; then + echo "nameserver 8.8.8.8" > /etc/resolv.conf + echo "nameserver 8.8.4.4" >> /etc/resolv.conf + fi + + # Read back the IP we got (from DHCP or static). + NODE_IP=$(ip -4 addr show eth0 2>/dev/null | awk '/inet / {split($2,a,"/"); print a[1]; exit}') + NODE_IP="${NODE_IP:-192.168.127.2}" + ts "eth0 IP: $NODE_IP" +else + # TSI or no networking — create a dummy interface for k3s. + ts "no eth0 found, using dummy interface (TSI mode)" + ip link add dummy0 type dummy 2>/dev/null || true + ip addr add 10.0.2.15/24 dev dummy0 2>/dev/null || true + ip link set dummy0 up 2>/dev/null || true + ip route add default dev dummy0 2>/dev/null || true + + NODE_IP="10.0.2.15" +fi + +# ── k3s data directories ─────────────────────────────────────────────── + +mkdir -p /var/lib/rancher/k3s +mkdir -p /etc/rancher/k3s + +ROOTFS_CONTAINERD_DIR="/var/lib/rancher/k3s/agent/containerd" +CONTAINERD_DIR="$ROOTFS_CONTAINERD_DIR" + +# ── State disk: mount ALL mutable runtime state on the block device ──── +# +# The virtio-fs share is the immutable OS image (read-only at runtime). +# All state that changes after first boot lives on an ext4 virtio-blk +# disk (/dev/vda). This gives full filesystem semantics (chown, hard +# links, fsync) and keeps every writable path off the host filesystem. +# +# Directories on the state disk: +# containerd/ → k3s/agent/containerd (overlayfs snapshotter) +# k3s-agent/ → k3s/agent (kubelet certs, kubeconfigs) +# k3s-server-db/ → k3s/server/db (kine SQLite) +# k3s-server-tls/ → k3s/server/tls (cluster TLS certs) +# k3s-server-cred/ → k3s/server/cred (bootstrap credentials) +# k3s-server-etc/ → k3s/server/etc (k3s-generated config) +# local-path-storage/ → k3s/storage (PVC data) +# pki/ → opt/openshell/pki (mTLS CA + server/client certs) +# +# Directories that stay on virtio-fs (read-only seeds from build-rootfs.sh): +# k3s/server/manifests (k3s auto-deploy manifests, written by init script) +# k3s/server/static (k3s bundled charts) +# k3s/agent/images (airgap image tarballs, seeded once then on disk) + +STATE_DISK_DEVICE="${OPENSHELL_VM_STATE_DISK_DEVICE:-/dev/vda}" +STATE_MOUNT_DIR="/mnt/openshell-state" +STATE_DISK_ACTIVE=false +mkdir -p "$STATE_MOUNT_DIR" + +if [ -b "$STATE_DISK_DEVICE" ]; then + ts "configuring block-backed runtime state on ${STATE_DISK_DEVICE}" + if ! blkid "$STATE_DISK_DEVICE" >/dev/null 2>&1; then + mkfs.ext4 -F -L openshell-state "$STATE_DISK_DEVICE" >/dev/null 2>&1 + ts "formatted state disk" + fi + mount -t ext4 -o noatime "$STATE_DISK_DEVICE" "$STATE_MOUNT_DIR" + + # ── k3s agent: seed images once, then bind entire agent dir ────────── + # agent/images contains airgap image tarballs baked into the rootfs. + # Seed them to the block device on first use so containerd can import + # them; after that they live on the block device alongside everything else. + STATE_K3S_AGENT_DIR="${STATE_MOUNT_DIR}/k3s-agent" + mkdir -p "$STATE_K3S_AGENT_DIR" + if [ ! -f "${STATE_MOUNT_DIR}/.seeded-agent-images" ]; then + VIRTIOFS_AGENT_IMAGES="/var/lib/rancher/k3s/agent/images" + if [ -d "$VIRTIOFS_AGENT_IMAGES" ] && [ -n "$(ls -A "$VIRTIOFS_AGENT_IMAGES" 2>/dev/null)" ]; then + ts "seeding agent images to block device" + mkdir -p "${STATE_K3S_AGENT_DIR}/images" + tar -C "$VIRTIOFS_AGENT_IMAGES" -cf - . | tar -C "${STATE_K3S_AGENT_DIR}/images" -xf - + fi + date -u +%Y-%m-%dT%H:%M:%SZ > "${STATE_MOUNT_DIR}/.seeded-agent-images" + fi + mkdir -p /var/lib/rancher/k3s/agent + mount --bind "$STATE_K3S_AGENT_DIR" /var/lib/rancher/k3s/agent + + # ── containerd: bind on top of agent ───────────────────────────────── + # Seeded from the virtiofs rootfs on first use (overlayfs snapshots, + # content store, meta.db pre-populated by build-rootfs.sh). + STATE_CONTAINERD_DIR="${STATE_MOUNT_DIR}/containerd" + mkdir -p "$STATE_CONTAINERD_DIR" + if [ ! -f "${STATE_MOUNT_DIR}/.seeded-containerd" ]; then + if [ -d "$ROOTFS_CONTAINERD_DIR" ] && [ -n "$(ls -A "$ROOTFS_CONTAINERD_DIR" 2>/dev/null)" ]; then + ts "seeding containerd state to block device" + tar -C "$ROOTFS_CONTAINERD_DIR" -cf - . | tar -C "$STATE_CONTAINERD_DIR" -xf - + else + ts "containerd state is empty; starting fresh" + fi + date -u +%Y-%m-%dT%H:%M:%SZ > "${STATE_MOUNT_DIR}/.seeded-containerd" + fi + mkdir -p "$ROOTFS_CONTAINERD_DIR" + mount --bind "$STATE_CONTAINERD_DIR" "$ROOTFS_CONTAINERD_DIR" + + # ── k3s server runtime state ────────────────────────────────────────── + # server/manifests and server/static stay on virtiofs (written by init + # script each boot from /opt/openshell/manifests; read-only after that). + for pair in \ + "k3s-server-db:/var/lib/rancher/k3s/server/db" \ + "k3s-server-tls:/var/lib/rancher/k3s/server/tls" \ + "k3s-server-cred:/var/lib/rancher/k3s/server/cred" \ + "k3s-server-etc:/var/lib/rancher/k3s/server/etc" + do + src="${STATE_MOUNT_DIR}/${pair%%:*}" + dst="${pair#*:}" + mkdir -p "$src" "$dst" + mount --bind "$src" "$dst" + done + + # ── local-path PVC storage ───────────────────────────────────────────── + mkdir -p "${STATE_MOUNT_DIR}/local-path-storage" /var/lib/rancher/k3s/storage + mount --bind "${STATE_MOUNT_DIR}/local-path-storage" /var/lib/rancher/k3s/storage + + # ── PKI ──────────────────────────────────────────────────────────────── + # Certs live on the block device; the host reads them via the exec + # agent (vsock port 10777) instead of polling the virtiofs rootfs path. + mkdir -p "${STATE_MOUNT_DIR}/pki" /opt/openshell/pki + mount --bind "${STATE_MOUNT_DIR}/pki" /opt/openshell/pki + + STATE_DISK_ACTIVE=true + ts "all runtime state mounted from block device" +else + ts "no block device found; using virtiofs-backed runtime state" +fi + +# Clean stale sockets from previous boots. Sockets live in /run (tmpfs) +# and /var/lib/rancher/k3s — they're stale on every boot regardless of +# whether state is on virtiofs or the block device. +find /var/lib/rancher/k3s -name '*.sock' -delete 2>/dev/null || true +find /run -name '*.sock' -delete 2>/dev/null || true +# On the block-device path, node-passwd is regenerated by k3s on each +# start; clear it so k3s doesn't fail node re-registration validation. +rm -f /var/lib/rancher/k3s/server/cred/node-passwd 2>/dev/null || true + +# Clean stale containerd runtime state from previous boots. +# +# The rootfs persists across VM restarts via virtio-fs. The overlayfs +# snapshotter now lives on the host-backed state disk when present, so +# snapshot data and meta.db persist across boots. We only clean runtime +# state (shim PIDs, sockets) that becomes stale when the VM restarts. +if [ -d "$CONTAINERD_DIR" ]; then + # Remove runtime task state (stale shim PIDs, sockets from dead processes). + rm -rf "${CONTAINERD_DIR}/io.containerd.runtime.v2.task" 2>/dev/null || true + # Remove sandbox controller shim state. Stale sandbox records cause + # containerd to reuse network namespaces from previous boots, which + # already have routes configured. The CNI bridge plugin then fails + # with "file exists" when adding the default route on retry. + rm -rf "${CONTAINERD_DIR}/io.containerd.sandbox.controller.v1.shim" 2>/dev/null || true + # Clean stale ingest temp files from the content store. + rm -rf "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest" 2>/dev/null || true + mkdir -p "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest" + # meta.db and overlayfs snapshots persist across boots on virtio-fs. + # No need to delete meta.db — snapshot metadata remains valid since + # the snapshotter directory is no longer backed by volatile tmpfs. + ts "cleaned containerd runtime state (meta.db + snapshots preserved)" +fi +rm -rf /run/k3s 2>/dev/null || true + +# Ensure the overlayfs snapshotter directory exists. The snapshotter +# runs directly on virtio-fs, so layer data and snapshot metadata +# persist across VM restarts. This eliminates the need to re-import +# image tarballs and re-extract layers on every boot, significantly +# reducing sandbox creation time. +OVERLAYFS_DIR="${CONTAINERD_DIR}/io.containerd.snapshotter.v1.overlayfs" +mkdir -p "$OVERLAYFS_DIR" +if [ "$STATE_DISK_ACTIVE" = true ]; then + ts "overlayfs snapshotter on block-backed containerd state" +else + ts "overlayfs snapshotter on virtio-fs (persistent)" +fi + +ts "stale artifacts cleaned" + +# ── Clean stale CNI / pod networking state ────────────────────────────── +# The rootfs persists across VM restarts via virtio-fs. Previous pod +# sandboxes leave behind veth pairs, bridge routes, host-local IPAM +# allocations, and network namespaces. If not cleaned, the bridge CNI +# plugin fails with: +# "failed to add route ... file exists" +# because the default route via cni0 already exists from the prior boot, +# or a stale network namespace already has the route configured. + +# Tear down the CNI bridge and its associated routes. +if ip link show cni0 >/dev/null 2>&1; then + ip link set cni0 down 2>/dev/null || true + ip link delete cni0 2>/dev/null || true + ts "deleted stale cni0 bridge" +fi + +# Remove any leftover veth pairs (CNI bridge plugin creates vethXXXX). +veths=$(ip -o link show type veth 2>/dev/null | awk -F': ' '{print $2}' | cut -d'@' -f1 || true) +for veth in $veths; do + ip link delete "$veth" 2>/dev/null || true +done + +# Flush host-local IPAM allocations so IPs can be reassigned cleanly. +rm -rf /var/lib/cni/networks 2>/dev/null || true +rm -rf /var/lib/cni/results 2>/dev/null || true + +# Flush any stale CNI-added routes for the pod CIDR. These can conflict +# with routes the bridge plugin tries to add on the next boot. +ip route flush 10.42.0.0/24 2>/dev/null || true + +# Clean up stale pod network namespaces from previous boots. Containerd +# creates named netns under /var/run/netns/ for each pod sandbox. If +# these persist across VM restarts, the CNI bridge plugin fails when +# adding routes because the stale netns already has the default route +# configured from the prior boot. Removing all named network namespaces +# forces containerd to create fresh ones. +if [ -d /var/run/netns ]; then + netns_list=$(ip netns list 2>/dev/null | awk '{print $1}' || true) + for ns in $netns_list; do + ip netns delete "$ns" 2>/dev/null || true + done +fi +# Also clean the netns bind-mount directory used by containerd/CRI. +# Containerd may use /run/netns/ or /var/run/netns/ (same via tmpfs). +rm -rf /run/netns/* 2>/dev/null || true +rm -rf /var/run/netns/* 2>/dev/null || true + +ts "stale CNI networking state cleaned" + +# ── Network profile detection ─────────────────────────────────────────── +# Detect early so manifest patching and k3s flags both use the same value. +# +# "bridge" is the only supported profile. It requires a custom libkrunfw +# with CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT built in. If the +# kernel lacks these capabilities the VM cannot run pod networking and we +# fail fast with an actionable error. + +NET_PROFILE="bridge" + +ts "network profile: ${NET_PROFILE}" + +# Validate that the kernel actually has the required capabilities. +_caps_ok=true +if ! ip link add _cap_br0 type bridge 2>/dev/null; then + echo "ERROR: kernel lacks bridge support (CONFIG_BRIDGE). Use a custom libkrunfw." >&2 + _caps_ok=false +else + ip link del _cap_br0 2>/dev/null || true +fi +if [ ! -d /proc/sys/net/netfilter ] && [ ! -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + echo "ERROR: kernel lacks netfilter support (CONFIG_NETFILTER). Use a custom libkrunfw." >&2 + _caps_ok=false +fi +if [ "$_caps_ok" = false ]; then + echo "FATAL: required kernel capabilities missing — cannot configure pod networking." >&2 + echo "See: architecture/custom-vm-runtime.md for build instructions." >&2 + exit 1 +fi + +# ── Deploy bundled manifests (cold boot only) ─────────────────────────── +# On pre-initialized rootfs, manifests are already in place from the +# build-time k3s boot. Skip this entirely for fast startup. + +K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests" +BUNDLED_MANIFESTS="/opt/openshell/manifests" + +if [ "$PRE_INITIALIZED" = false ]; then + + mkdir -p "$K3S_MANIFESTS" + + if [ -d "$BUNDLED_MANIFESTS" ]; then + ts "deploying bundled manifests (cold boot)..." + for manifest in "$BUNDLED_MANIFESTS"/*.yaml; do + [ ! -f "$manifest" ] && continue + cp "$manifest" "$K3S_MANIFESTS/" + done + + # Remove stale OpenShell-managed manifests from previous boots. + for existing in "$K3S_MANIFESTS"/openshell-*.yaml \ + "$K3S_MANIFESTS"/agent-*.yaml; do + [ ! -f "$existing" ] && continue + basename=$(basename "$existing") + if [ ! -f "$BUNDLED_MANIFESTS/$basename" ]; then + rm -f "$existing" + fi + done + fi + + # Restore helm chart tarballs from staging. A --reset wipes + # server/static/charts/ but the bundled charts survive in + # /opt/openshell/charts/. + BUNDLED_CHARTS="/opt/openshell/charts" + K3S_CHARTS="/var/lib/rancher/k3s/server/static/charts" + if [ -d "$BUNDLED_CHARTS" ]; then + mkdir -p "$K3S_CHARTS" + cp "$BUNDLED_CHARTS"/*.tgz "$K3S_CHARTS/" 2>/dev/null || true + ts "helm charts restored from staging" + fi + + ts "manifests deployed" +else + ts "skipping manifest deploy (pre-initialized)" +fi + +# Patch manifests for VM deployment constraints. +HELMCHART="$K3S_MANIFESTS/openshell-helmchart.yaml" +if [ -f "$HELMCHART" ]; then + # Use pre-loaded images and a tmp-backed database in the VM. + sed -i 's|__IMAGE_PULL_POLICY__|IfNotPresent|g' "$HELMCHART" + sed -i 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g' "$HELMCHART" + sed -i 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" + # Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM). + sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" + sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" + sed -i 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART" + sed -i 's|__DISABLE_TLS__|false|g' "$HELMCHART" + sed -i 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART" + sed -i '/__CHART_CHECKSUM__/d' "$HELMCHART" +fi + +AGENT_MANIFEST="$K3S_MANIFESTS/agent-sandbox.yaml" +if [ -f "$AGENT_MANIFEST" ]; then + # Bridge CNI: agent-sandbox uses normal pod networking. + # kube-proxy is enabled so kubernetes.default.svc is reachable + # via ClusterIP — no need for KUBERNETES_SERVICE_HOST override. + sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" + sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" + ts "agent-sandbox: using pod networking (bridge profile)" +fi + +# ── CNI configuration (bridge) ────────────────────────────────────────── +# Uses the bridge CNI plugin with iptables masquerade. Requires +# CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT in the VM kernel +# (validated above at boot). kube-proxy uses nftables mode for service +# VIP routing. + +CNI_CONF_DIR="/etc/cni/net.d" +CNI_BIN_DIR="/opt/cni/bin" +mkdir -p "$CNI_CONF_DIR" "$CNI_BIN_DIR" + +# Enable IP forwarding (required for masquerade). +if ! echo 1 > /proc/sys/net/ipv4/ip_forward 2>/dev/null; then + echo "FATAL: failed to enable IP forwarding — pod networking will not work" >&2 + exit 1 +fi + +# Enable bridge netfilter call (required for CNI bridge masquerade to +# see bridged traffic). +if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + if ! echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null; then + ts "WARNING: failed to enable bridge-nf-call-iptables — CNI masquerade may not work" + fi +fi + +cat > "$CNI_CONF_DIR/10-bridge.conflist" << 'CNICFG' +{ + "cniVersion": "1.0.0", + "name": "bridge", + "plugins": [ + { + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "isDefaultGateway": true, + "ipMasq": true, + "hairpinMode": true, + "ipam": { + "type": "host-local", + "ranges": [[{ "subnet": "10.42.0.0/24" }]] + } + }, + { + "type": "portmap", + "capabilities": { "portMappings": true }, + "snat": true + }, + { + "type": "loopback" + } + ] +} +CNICFG + +# Remove any stale legacy ptp config. +rm -f "$CNI_CONF_DIR/10-ptp.conflist" 2>/dev/null || true + +ts "bridge CNI configured (cni0 + iptables masquerade)" + +# Start the local exec agent before k3s so `openshell-vm exec` works as soon as +# the VM has booted. It only listens on vsock, not on the guest network. +if command -v python3 >/dev/null 2>&1; then + ts "starting openshell-vm exec agent" + mkdir -p /run/openshell + setsid python3 /srv/openshell-vm-exec-agent.py >/run/openshell/openshell-vm-exec-agent.log 2>&1 & +else + ts "WARNING: python3 missing, openshell-vm exec agent disabled" +fi + +# Symlink k3s-bundled CNI binaries to the default containerd bin path. +# k3s extracts its tools to /var/lib/rancher/k3s/data//bin/ at startup. +# On cold boot this directory doesn't exist yet (k3s hasn't run), so we +# first try synchronously, then fall back to a background watcher that +# polls until k3s extracts the binaries and creates the symlinks before +# any pods can schedule. +link_cni_binaries() { + local data_bin="$1" + # Ensure execute permissions on all binaries. The rootfs may have + # been built on macOS where virtio-fs or docker export can strip + # execute bits from Linux ELF binaries. + chmod +x "$data_bin"/* 2>/dev/null || true + if [ -d "$data_bin/aux" ]; then + chmod +x "$data_bin/aux"/* 2>/dev/null || true + fi + for plugin in bridge host-local loopback bandwidth portmap; do + [ -e "$data_bin/$plugin" ] && ln -sf "$data_bin/$plugin" "$CNI_BIN_DIR/$plugin" + done +} + +# Find the k3s data bin dir, excluding temporary extraction directories +# (k3s extracts to -tmp/ then renames to /). +find_k3s_data_bin() { + find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null \ + | grep -v '\-tmp/' | head -1 || true +} + +K3S_DATA_BIN=$(find_k3s_data_bin) +if [ -n "$K3S_DATA_BIN" ]; then + link_cni_binaries "$K3S_DATA_BIN" + ts "CNI binaries linked from $K3S_DATA_BIN" +else + # Cold boot: k3s hasn't extracted binaries yet. Launch a background + # watcher that polls until the data dir appears (k3s creates it in + # the first ~2s of startup) and then symlinks the CNI plugins. + # We exclude -tmp directories to avoid symlinking to the transient + # extraction path that k3s renames once extraction completes. + ts "CNI binaries not yet available, starting background watcher" + setsid sh -c ' + CNI_BIN_DIR="/opt/cni/bin" + for i in $(seq 1 60); do + K3S_DATA_BIN=$(find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null \ + | grep -v "\-tmp/" | head -1) + if [ -n "$K3S_DATA_BIN" ]; then + chmod +x "$K3S_DATA_BIN"/* 2>/dev/null || true + if [ -d "$K3S_DATA_BIN/aux" ]; then + chmod +x "$K3S_DATA_BIN/aux"/* 2>/dev/null || true + fi + for plugin in bridge host-local loopback bandwidth portmap; do + [ -e "$K3S_DATA_BIN/$plugin" ] && ln -sf "$K3S_DATA_BIN/$plugin" "$CNI_BIN_DIR/$plugin" + done + echo "[cni-watcher] CNI binaries linked from $K3S_DATA_BIN after ${i}s" + exit 0 + fi + sleep 1 + done + echo "[cni-watcher] ERROR: k3s data bin dir not found after 60s" + ' & +fi + +# Also clean up any flannel config from the k3s-specific CNI directory +# (pre-baked state from the Docker build used host-gw flannel). +rm -f "/var/lib/rancher/k3s/agent/etc/cni/net.d/10-flannel.conflist" 2>/dev/null || true + +# ── PKI: generate once, read via exec agent ─────────────────────────── +# Certs are generated on first boot and stored at /opt/openshell/pki/. +# With the block-device layout this path is on the state disk, fully +# isolated from the virtiofs host filesystem. +# The host-side bootstrap reads certs via the exec agent (vsock port +# 10777) by running `cat` on each PEM file. + +PKI_DIR="/opt/openshell/pki" +if [ ! -f "$PKI_DIR/ca.crt" ]; then + ts "generating PKI (first boot)..." + mkdir -p "$PKI_DIR" + + # CA + openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "$PKI_DIR/ca.key" -out "$PKI_DIR/ca.crt" \ + -days 3650 -nodes -subj "/O=openshell/CN=openshell-ca" 2>/dev/null + + # Server cert with SANs + cat > "$PKI_DIR/server.cnf" </dev/null + openssl x509 -req -in "$PKI_DIR/server.csr" \ + -CA "$PKI_DIR/ca.crt" -CAkey "$PKI_DIR/ca.key" -CAcreateserial \ + -out "$PKI_DIR/server.crt" -days 3650 \ + -extensions v3_req -extfile "$PKI_DIR/server.cnf" 2>/dev/null + + # Client cert (must be v3 — rustls rejects v1) + cat > "$PKI_DIR/client.cnf" </dev/null + openssl x509 -req -in "$PKI_DIR/client.csr" \ + -CA "$PKI_DIR/ca.crt" -CAkey "$PKI_DIR/ca.key" -CAcreateserial \ + -out "$PKI_DIR/client.crt" -days 3650 \ + -extensions v3_client -extfile "$PKI_DIR/client.cnf" 2>/dev/null + + # Clean up CSRs + rm -f "$PKI_DIR"/*.csr "$PKI_DIR"/*.cnf "$PKI_DIR"/*.srl + + ts "PKI generated" +else + ts "existing PKI found, skipping generation" +fi + +SSH_HANDSHAKE_SECRET_FILE="${PKI_DIR}/ssh-handshake-secret" +if [ ! -f "$SSH_HANDSHAKE_SECRET_FILE" ]; then + ts "generating SSH handshake secret (first boot)..." + head -c 32 /dev/urandom | od -A n -t x1 | tr -d ' \n' > "$SSH_HANDSHAKE_SECRET_FILE" + chmod 600 "$SSH_HANDSHAKE_SECRET_FILE" +else + ts "existing SSH handshake secret found, reusing" +fi + +# Write TLS secrets as a k3s auto-deploy manifest. k3s applies any YAML +# in server/manifests/ on startup. We write this on every boot so that: +# - A --reset (which wipes the kine DB and server/ tree) gets secrets re-applied. +# - A corrupt kine DB (removed by the host-side corruption check) gets secrets +# re-applied on the fresh database. +# This is idempotent — k3s checksums manifests and only re-applies on change. +ts "writing TLS secrets manifest..." +mkdir -p "$K3S_MANIFESTS" +CA_CRT_B64=$(base64 -w0 < "$PKI_DIR/ca.crt") +SERVER_CRT_B64=$(base64 -w0 < "$PKI_DIR/server.crt") +SERVER_KEY_B64=$(base64 -w0 < "$PKI_DIR/server.key") +CLIENT_CRT_B64=$(base64 -w0 < "$PKI_DIR/client.crt") +CLIENT_KEY_B64=$(base64 -w0 < "$PKI_DIR/client.key") +SSH_HANDSHAKE_SECRET_B64=$(base64 -w0 < "$SSH_HANDSHAKE_SECRET_FILE") + +cat > "$K3S_MANIFESTS/openshell-tls-secrets.yaml" < "$DIAG" + exit 1 + fi + { + echo "=== [DIAG $(date +%s)] nft binary: $NFT ===" + echo "=== [DIAG] nft list tables ===" + "$NFT" list tables 2>&1 + echo "=== [DIAG] nft list ruleset (kube-proxy) ===" + "$NFT" list ruleset 2>&1 + echo "=== [DIAG] ss -tlnp ===" + ss -tlnp 2>&1 || busybox netstat -tlnp 2>&1 || echo "ss/netstat not available" + echo "=== [DIAG] ip addr ===" + ip addr 2>&1 + echo "=== [DIAG] ip route ===" + ip route 2>&1 + echo "=== [DIAG] iptables -t nat -L -n -v ===" + iptables -t nat -L -n -v 2>&1 + echo "=== [DIAG] kube-proxy healthz ===" + wget -q -O - http://127.0.0.1:10256/healthz 2>&1 || echo "healthz failed" + echo "=== [DIAG] conntrack -L ===" + conntrack -L 2>&1 || echo "conntrack not available" + echo "=== [DIAG] done ===" + } > "$DIAG" 2>&1 +' & +fi + +# ── Clear stale kine bootstrap lock ───────────────────────────────────── +# k3s uses kine with a SQLite backend at state.db. When k3s starts, kine +# sets a bootstrap lock row; if k3s is killed before completing bootstrap +# (SIGKILL, host crash, power loss), the lock persists and the next k3s +# instance hangs forever on: +# "Bootstrap key already locked — waiting for data to be populated by +# another server" +# +# We clear the lock row before starting k3s so that a warm boot with +# persistent state.db succeeds. If state.db doesn't exist (first boot or +# --reset), this is a harmless no-op. If state.db is corrupt, sqlite3 +# fails silently (|| true) and the host-side corruption check in exec.rs +# will have already removed the file. +KINE_DB="/var/lib/rancher/k3s/server/db/state.db" +if [ -f "$KINE_DB" ]; then + ts "clearing stale kine bootstrap lock (if any)" + # If sqlite3 fails (corrupt DB, missing binary), log the failure. + # The host-side corruption check in exec.rs handles the corrupt case, + # but we should still know about it. + if ! sqlite3 "$KINE_DB" "DELETE FROM kine WHERE name LIKE '/bootstrap/%';" 2>/dev/null; then + ts "WARNING: failed to clear kine bootstrap lock — k3s may hang if DB is corrupt" + fi + if ! sqlite3 "$KINE_DB" "PRAGMA wal_checkpoint(TRUNCATE);" 2>/dev/null; then + ts "WARNING: failed to checkpoint kine WAL" + fi +fi + +exec /usr/local/bin/k3s server "${K3S_ARGS[@]}" diff --git a/crates/openshell-vm/src/embedded.rs b/crates/openshell-vm/src/embedded.rs new file mode 100644 index 000000000..15eaf4bee --- /dev/null +++ b/crates/openshell-vm/src/embedded.rs @@ -0,0 +1,442 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Embedded VM runtime resources. +//! +//! Native libraries (libkrun, libkrunfw, gvproxy) and the rootfs are embedded as +//! zstd-compressed byte arrays and extracted to XDG cache directories on first use. +//! +//! Cache locations: +//! - Runtime: `~/.local/share/openshell/vm-runtime/{version}/` +//! - Rootfs: `~/.local/share/openshell/openshell-vm/{version}/instances//rootfs/` + +use std::fs; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + +use indicatif::{ProgressBar, ProgressStyle}; + +use crate::VmError; + +// ── Platform-specific embedded resources ─────────────────────────────────── + +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +mod resources { + pub const LIBKRUN: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrun.dylib.zst")); + pub const LIBKRUNFW: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrunfw.5.dylib.zst")); + pub const GVPROXY: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/gvproxy.zst")); + pub const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst")); + pub const LIBKRUN_NAME: &str = "libkrun.dylib"; + pub const LIBKRUNFW_NAME: &str = "libkrunfw.5.dylib"; +} + +#[cfg(all(target_os = "linux", target_arch = "aarch64"))] +mod resources { + pub const LIBKRUN: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrun.so.zst")); + pub const LIBKRUNFW: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrunfw.so.5.zst")); + pub const GVPROXY: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/gvproxy.zst")); + pub const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst")); + pub const LIBKRUN_NAME: &str = "libkrun.so"; + pub const LIBKRUNFW_NAME: &str = "libkrunfw.so.5"; +} + +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +mod resources { + pub const LIBKRUN: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrun.so.zst")); + pub const LIBKRUNFW: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrunfw.so.5.zst")); + pub const GVPROXY: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/gvproxy.zst")); + pub const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst")); + pub const LIBKRUN_NAME: &str = "libkrun.so"; + pub const LIBKRUNFW_NAME: &str = "libkrunfw.so.5"; +} + +// Fallback for unsupported platforms (will fail at runtime) +#[cfg(not(any( + all(target_os = "macos", target_arch = "aarch64"), + all(target_os = "linux", target_arch = "aarch64"), + all(target_os = "linux", target_arch = "x86_64"), +)))] +mod resources { + pub const LIBKRUN: &[u8] = &[]; + pub const LIBKRUNFW: &[u8] = &[]; + pub const GVPROXY: &[u8] = &[]; + pub const ROOTFS: &[u8] = &[]; + pub const LIBKRUN_NAME: &str = "libkrun"; + pub const LIBKRUNFW_NAME: &str = "libkrunfw"; +} + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +// ── Public API ───────────────────────────────────────────────────────────── + +/// Ensures the embedded VM runtime is extracted to the cache directory. +/// +/// Returns the path to the runtime directory containing: +/// - libkrun.{dylib,so} +/// - libkrunfw.{5.dylib,.so.5} +/// - gvproxy +/// +/// On first call, this extracts the compressed embedded resources to the cache. +/// Subsequent calls return the cached path if valid. +pub fn ensure_runtime_extracted() -> Result { + // Check if embedded resources are available (non-empty) + if resources::LIBKRUN.is_empty() { + return Err(VmError::HostSetup( + "VM runtime not embedded for this platform. \ + Supported: macOS ARM64, Linux ARM64, Linux x86_64" + .to_string(), + )); + } + + let cache_dir = runtime_cache_dir()?; + let version_marker = cache_dir.join(".version"); + + // Cache key: version + content fingerprint (so dev builds at 0.0.0 + // still invalidate when the embedded libraries change). + let cache_key = runtime_cache_key(); + + // Check if already extracted with the correct cache key + if version_marker.exists() + && let Ok(cached_key) = fs::read_to_string(&version_marker) + && cached_key.trim() == cache_key + { + // Validate files exist + if validate_runtime_dir(&cache_dir).is_ok() { + tracing::debug!( + path = %cache_dir.display(), + "Using cached VM runtime" + ); + return Ok(cache_dir); + } + } + + // Clean up old versions before extracting new one + cleanup_old_versions(&cache_dir)?; + + // Create fresh directory + if cache_dir.exists() { + fs::remove_dir_all(&cache_dir) + .map_err(|e| VmError::HostSetup(format!("remove old cache: {e}")))?; + } + fs::create_dir_all(&cache_dir) + .map_err(|e| VmError::HostSetup(format!("create cache dir: {e}")))?; + + tracing::info!( + path = %cache_dir.display(), + version = VERSION, + "Extracting embedded VM runtime" + ); + + // Extract all resources + extract_resource(resources::LIBKRUN, &cache_dir.join(resources::LIBKRUN_NAME))?; + extract_resource( + resources::LIBKRUNFW, + &cache_dir.join(resources::LIBKRUNFW_NAME), + )?; + extract_resource(resources::GVPROXY, &cache_dir.join("gvproxy"))?; + + // Make gvproxy executable + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + fs::set_permissions(cache_dir.join("gvproxy"), fs::Permissions::from_mode(0o755)) + .map_err(|e| VmError::HostSetup(format!("chmod gvproxy: {e}")))?; + } + + // Write version marker (includes content fingerprint for cache invalidation) + fs::write(&version_marker, runtime_cache_key()) + .map_err(|e| VmError::HostSetup(format!("write version marker: {e}")))?; + + tracing::info!( + path = %cache_dir.display(), + "VM runtime extracted successfully" + ); + + Ok(cache_dir) +} + +/// Returns the path where the runtime would be cached (without extracting). +pub fn runtime_cache_path() -> Result { + runtime_cache_dir() +} + +/// Extract the embedded rootfs to the given destination directory. +/// +/// If the destination already exists, it is returned as-is (no re-extraction). +/// Otherwise the embedded `rootfs.tar.zst` is decompressed and unpacked into `dest`. +/// +/// A `.version` marker is written after successful extraction so that +/// version-mismatched rootfs directories are detected and rebuilt. +pub fn extract_rootfs_to(dest: &Path) -> Result<(), VmError> { + if resources::ROOTFS.is_empty() { + return Err(VmError::HostSetup( + "Rootfs not embedded. Build with: mise run vm:build:embedded".to_string(), + )); + } + + let version_marker = dest.join(".version"); + + // Already extracted with the correct version — nothing to do. + if version_marker.exists() + && let Ok(cached_version) = fs::read_to_string(&version_marker) + && cached_version.trim() == VERSION + { + tracing::debug!( + path = %dest.display(), + "Using cached rootfs" + ); + return Ok(()); + } + + // Remove existing if present (version mismatch or incomplete extraction). + if dest.exists() { + eprintln!("Removing outdated rootfs at {}...", dest.display()); + fs::remove_dir_all(dest) + .map_err(|e| VmError::HostSetup(format!("remove old rootfs: {e}")))?; + } + + // Extract with progress bar. + extract_rootfs_with_progress(resources::ROOTFS, dest)?; + + // Write version marker. + fs::write(&version_marker, VERSION) + .map_err(|e| VmError::HostSetup(format!("write rootfs version marker: {e}")))?; + + Ok(()) +} + +/// Clean up rootfs directories from older versions. +/// +/// Call this periodically (e.g. at startup) to reclaim disk from previous +/// releases. Removes all version directories under the openshell-vm base +/// except the current version. +pub fn cleanup_old_rootfs() -> Result<(), VmError> { + let base = rootfs_cache_base()?; + if !base.exists() { + return Ok(()); + } + + let current_version_dir = base.join(VERSION); + cleanup_old_versions_in_base(&base, ¤t_version_dir) +} + +/// Check if the rootfs is embedded (non-empty). +pub fn has_embedded_rootfs() -> bool { + !resources::ROOTFS.is_empty() +} + +// ── Internal helpers ─────────────────────────────────────────────────────── + +/// Build a cache key that combines the version string with a short content +/// fingerprint of the embedded runtime bytes. +/// +/// Using the version alone is insufficient for dev builds (all `0.0.0`) +/// because the embedded libraries can change between compiles without the +/// version changing. The fingerprint is a simple XOR-fold of the first few +/// bytes of each embedded resource — cheap to compute at startup without +/// pulling in a hash dependency. +fn runtime_cache_key() -> String { + // XOR-fold the first 64 bytes of each resource to get a cheap fingerprint. + let mut fp: u64 = 0; + for (i, chunk) in [resources::LIBKRUN, resources::LIBKRUNFW, resources::GVPROXY] + .iter() + .enumerate() + { + let sample = &chunk[..chunk.len().min(64)]; + let mut word: u64 = 0; + for (j, &b) in sample.iter().enumerate() { + word ^= (b as u64) << ((j % 8) * 8); + } + // Mix in resource index so identical resources don't cancel out. + fp ^= word.rotate_left((i as u32) * 13 + 7); + // Also mix in the total length so size changes are detected. + fp ^= (chunk.len() as u64).rotate_left((i as u32) * 17 + 3); + } + format!("{VERSION}-{fp:016x}") +} + +fn runtime_cache_dir() -> Result { + let base = openshell_core::paths::xdg_data_dir() + .map_err(|e| VmError::HostSetup(format!("resolve XDG data dir: {e}")))?; + Ok(base.join("openshell").join("vm-runtime").join(VERSION)) +} + +fn runtime_cache_base() -> Result { + let base = openshell_core::paths::xdg_data_dir() + .map_err(|e| VmError::HostSetup(format!("resolve XDG data dir: {e}")))?; + Ok(base.join("openshell").join("vm-runtime")) +} + +fn rootfs_cache_base() -> Result { + let base = openshell_core::paths::xdg_data_dir() + .map_err(|e| VmError::HostSetup(format!("resolve XDG data dir: {e}")))?; + Ok(base.join("openshell").join("openshell-vm")) +} + +fn cleanup_old_versions(current_dir: &Path) -> Result<(), VmError> { + cleanup_old_versions_in_base(&runtime_cache_base()?, current_dir) +} + +fn cleanup_old_versions_in_base(base: &Path, current_dir: &Path) -> Result<(), VmError> { + if !base.exists() { + return Ok(()); + } + + let entries = match fs::read_dir(base) { + Ok(e) => e, + Err(_) => return Ok(()), // Can't read, skip cleanup + }; + + for entry in entries.filter_map(Result::ok) { + let path = entry.path(); + // Skip if this is the current version directory or a parent of it + if path.is_dir() && !current_dir.starts_with(&path) && path != current_dir { + tracing::debug!( + path = %path.display(), + "Cleaning up old version" + ); + if let Err(e) = fs::remove_dir_all(&path) { + tracing::warn!( + path = %path.display(), + error = %e, + "Failed to clean up old version" + ); + } + } + } + + Ok(()) +} + +fn extract_resource(compressed: &[u8], dest: &Path) -> Result<(), VmError> { + if compressed.is_empty() { + return Err(VmError::HostSetup(format!( + "embedded resource is empty: {}", + dest.display() + ))); + } + + let decompressed = zstd::decode_all(compressed) + .map_err(|e| VmError::HostSetup(format!("decompress {}: {e}", dest.display())))?; + + let mut file = fs::File::create(dest) + .map_err(|e| VmError::HostSetup(format!("create {}: {e}", dest.display())))?; + + file.write_all(&decompressed) + .map_err(|e| VmError::HostSetup(format!("write {}: {e}", dest.display())))?; + + tracing::debug!( + path = %dest.display(), + compressed_size = compressed.len(), + decompressed_size = decompressed.len(), + "Extracted resource" + ); + + Ok(()) +} + +fn extract_rootfs_with_progress(compressed: &[u8], dest: &Path) -> Result<(), VmError> { + eprintln!("Extracting VM environment (first run)..."); + + // Create progress bar for decompression + let pb = ProgressBar::new(compressed.len() as u64); + pb.set_style( + ProgressStyle::default_bar() + .template(" Decompressing [{bar:40.cyan/blue}] {bytes}/{total_bytes}") + .unwrap() + .progress_chars("=>-"), + ); + + // Wrap the compressed data in a progress reader + let reader = ProgressReader::new(std::io::Cursor::new(compressed), pb.clone()); + + // Decompress zstd stream + let decoder = zstd::Decoder::new(reader) + .map_err(|e| VmError::HostSetup(format!("create zstd decoder: {e}")))?; + + pb.finish_and_clear(); + + // Create destination directory + fs::create_dir_all(dest).map_err(|e| VmError::HostSetup(format!("create rootfs dir: {e}")))?; + + // Extract tar archive with progress + eprintln!(" Extracting rootfs..."); + let mut archive = tar::Archive::new(decoder); + archive + .unpack(dest) + .map_err(|e| VmError::HostSetup(format!("extract rootfs tarball: {e}")))?; + + eprintln!(" Rootfs extracted to {}", dest.display()); + + Ok(()) +} + +/// A reader wrapper that updates a progress bar as data is read. +struct ProgressReader { + inner: R, + progress: ProgressBar, +} + +impl ProgressReader { + fn new(inner: R, progress: ProgressBar) -> Self { + Self { inner, progress } + } +} + +impl Read for ProgressReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let n = self.inner.read(buf)?; + self.progress.inc(n as u64); + Ok(n) + } +} + +fn validate_runtime_dir(dir: &Path) -> Result<(), VmError> { + let libkrun = dir.join(resources::LIBKRUN_NAME); + let libkrunfw = dir.join(resources::LIBKRUNFW_NAME); + let gvproxy = dir.join("gvproxy"); + + for path in [&libkrun, &libkrunfw, &gvproxy] { + if !path.exists() { + return Err(VmError::HostSetup(format!( + "missing runtime file: {}", + path.display() + ))); + } + + // Check file is not empty (would indicate a stub) + let size = fs::metadata(path).map(|m| m.len()).unwrap_or(0); + if size == 0 { + return Err(VmError::HostSetup(format!( + "runtime file is empty (stub): {}", + path.display() + ))); + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_resources_not_empty() { + // On supported platforms, resources should be non-empty + #[cfg(any( + all(target_os = "macos", target_arch = "aarch64"), + all(target_os = "linux", target_arch = "aarch64"), + all(target_os = "linux", target_arch = "x86_64"), + ))] + { + // Note: This test only passes if `mise run vm:setup` was run + // before building. In CI without compressed artifacts, resources will be + // empty stubs. + if !resources::LIBKRUN.is_empty() { + assert!(!resources::LIBKRUNFW.is_empty()); + assert!(!resources::GVPROXY.is_empty()); + } + } + } +} diff --git a/crates/openshell-vm/src/exec.rs b/crates/openshell-vm/src/exec.rs new file mode 100644 index 000000000..6195556e1 --- /dev/null +++ b/crates/openshell-vm/src/exec.rs @@ -0,0 +1,767 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, Read, Write}; +use std::os::unix::net::UnixStream; +use std::path::{Path, PathBuf}; +use std::thread; +use std::time::{SystemTime, UNIX_EPOCH}; + +use base64::Engine as _; +use serde::{Deserialize, Serialize}; + +use crate::VmError; + +/// Remove a directory, safely handling symlinks. +/// +/// Uses `symlink_metadata` (lstat) to detect symlinks. If the path is a +/// symlink (e.g. `var/run -> /run` in a Linux rootfs), the symlink itself +/// is removed without following it — preventing traversal attacks where a +/// symlink could redirect `remove_dir_all` to an arbitrary host path. +/// If the path is a real directory, it is removed recursively. +fn safe_remove_dir_all(path: &Path) -> Result { + match fs::symlink_metadata(path) { + Ok(meta) => { + if meta.file_type().is_symlink() { + // Remove the symlink itself, not the target it points to. + fs::remove_file(path).map_err(|e| { + VmError::RuntimeState(format!("reset: remove symlink {}: {e}", path.display())) + })?; + return Ok(true); + } + if !meta.is_dir() { + return Ok(false); // Not a directory — nothing to remove. + } + fs::remove_dir_all(path).map_err(|e| { + VmError::RuntimeState(format!("reset: remove {}: {e}", path.display())) + })?; + Ok(true) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(VmError::RuntimeState(format!( + "stat {}: {e}", + path.display() + ))), + } +} + +pub const VM_EXEC_VSOCK_PORT: u32 = 10_777; + +const VM_STATE_NAME: &str = "vm-state.json"; +const VM_LOCK_NAME: &str = "vm.lock"; +const KUBECONFIG_ENV: &str = "KUBECONFIG=/etc/rancher/k3s/k3s.yaml"; + +#[derive(Debug, Clone)] +pub struct VmExecOptions { + pub rootfs: Option, + pub command: Vec, + pub workdir: Option, + pub env: Vec, + pub tty: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VmRuntimeState { + pub pid: i32, + pub exec_vsock_port: u32, + pub socket_path: PathBuf, + pub rootfs: PathBuf, + pub console_log: PathBuf, + pub started_at_ms: u128, + /// PID of the gvproxy process (if networking uses gvproxy). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub gvproxy_pid: Option, +} + +#[derive(Debug, Serialize)] +struct ExecRequest { + argv: Vec, + env: Vec, + cwd: Option, + tty: bool, +} + +#[derive(Debug, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum ClientFrame { + Stdin { data: String }, + StdinClose, +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum ServerFrame { + Stdout { data: String }, + Stderr { data: String }, + Exit { code: i32 }, + Error { message: String }, +} + +pub fn vm_exec_socket_path(rootfs: &Path) -> PathBuf { + // Prefer XDG_RUNTIME_DIR (per-user, restricted permissions on Linux), + // fall back to /tmp. Ownership/symlink validation happens in + // secure_socket_base() when the gvproxy socket dir is created; here + // we just compute the path. The parent directory is created (with + // permission checks) at launch time via create_dir_all. + let base = if let Some(xdg) = std::env::var_os("XDG_RUNTIME_DIR") { + PathBuf::from(xdg) + } else { + let mut base = PathBuf::from("/tmp"); + if !base.is_dir() { + base = std::env::temp_dir(); + } + base + }; + let dir = base.join("ovm-exec"); + let id = hash_path_id(rootfs); + dir.join(format!("{id}.sock")) +} + +fn hash_path_id(path: &Path) -> String { + let mut hash: u64 = 0xcbf29ce484222325; + for byte in path.to_string_lossy().as_bytes() { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(0x100000001b3); + } + format!("{:012x}", hash & 0x0000_ffff_ffff_ffff) +} + +pub fn write_vm_runtime_state( + rootfs: &Path, + pid: i32, + console_log: &Path, + gvproxy_pid: Option, +) -> Result<(), VmError> { + let state = VmRuntimeState { + pid, + exec_vsock_port: VM_EXEC_VSOCK_PORT, + socket_path: vm_exec_socket_path(rootfs), + rootfs: rootfs.to_path_buf(), + console_log: console_log.to_path_buf(), + started_at_ms: now_ms()?, + gvproxy_pid, + }; + let path = vm_state_path(rootfs); + let bytes = serde_json::to_vec_pretty(&state) + .map_err(|e| VmError::RuntimeState(format!("serialize VM runtime state: {e}")))?; + fs::create_dir_all(vm_run_dir(rootfs)) + .map_err(|e| VmError::RuntimeState(format!("create VM runtime dir: {e}")))?; + fs::write(&path, bytes) + .map_err(|e| VmError::RuntimeState(format!("write {}: {e}", path.display())))?; + Ok(()) +} + +pub fn clear_vm_runtime_state(rootfs: &Path) { + let state_path = vm_state_path(rootfs); + let socket_path = vm_exec_socket_path(rootfs); + let _ = fs::remove_file(state_path); + let _ = fs::remove_file(socket_path); +} + +/// Wipe stale container runtime state from the rootfs. +/// +/// After a crash or unclean shutdown, containerd and kubelet can retain +/// references to pod sandboxes and containers that no longer exist. This +/// causes `ContainerCreating` → `context deadline exceeded` loops because +/// containerd blocks trying to clean up orphaned resources. +/// +/// This function removes: +/// - containerd runtime task state (running container metadata) +/// - containerd sandbox controller shim state +/// - containerd CRI plugin state (pod/container tracking) +/// - containerd tmp mounts +/// - kubelet pod state (volume mounts, pod status) +/// +/// It preserves: +/// - containerd images and content (no re-pull needed) +/// - containerd snapshots (no re-extract needed) +/// - containerd metadata database (meta.db — image/snapshot tracking) +/// +/// **Note:** This is the only path that wipes the kine/SQLite database. +/// Normal boots preserve `state.db` (and all cluster objects) across +/// restarts. The init script clears stale bootstrap locks via `sqlite3`, +/// and `recover_corrupt_kine_db` handles actual file corruption. +pub fn reset_runtime_state(rootfs: &Path, gateway_name: &str) -> Result<(), VmError> { + // Full reset: wipe all runtime state so the VM cold-starts from scratch. + // + // With the block-device layout, k3s server/agent state, containerd, PVCs, + // and PKI all live on the state disk — the caller in lib.rs deletes the + // entire state disk image file, which achieves a complete wipe in one + // operation without touching the virtiofs rootfs. + // + // We still clean the virtiofs rootfs for paths that are NOT on the state + // disk: kubelet pod volumes, CNI state, and the pre-init sentinel. These + // paths are present in the rootfs regardless of the storage layout. + let dirs_to_remove = [ + // Stale pod volume mounts and projected secrets + rootfs.join("var/lib/kubelet/pods"), + // CNI state: stale network namespace references from dead pods + rootfs.join("var/lib/cni"), + // Runtime state (PIDs, sockets) — on virtiofs, not block device + rootfs.join("var/run"), + ]; + + let mut cleaned = 0usize; + for dir in &dirs_to_remove { + if safe_remove_dir_all(dir)? { + cleaned += 1; + } + } + + // Remove the pre-initialized sentinel so the init script knows + // this is a cold start and deploys manifests from staging. + // We write a marker file so ensure-vm-rootfs.sh still sees the + // rootfs as built (avoiding a full rebuild) while the init script + // detects the cold start via the missing .initialized sentinel. + let sentinel = rootfs.join("opt/openshell/.initialized"); + let reset_marker = rootfs.join("opt/openshell/.reset"); + if sentinel.exists() { + fs::remove_file(&sentinel).map_err(|e| { + VmError::RuntimeState(format!( + "reset: remove sentinel {}: {e}", + sentinel.display() + )) + })?; + fs::write(&reset_marker, "").map_err(|e| { + VmError::RuntimeState(format!( + "reset: write marker {}: {e}", + reset_marker.display() + )) + })?; + cleaned += 1; + } + + // PKI lives on the state disk; deleting the state disk image (done by + // the caller) rotates it automatically. Just note it for the log. + eprintln!("Reset: PKI will be regenerated on next boot (state disk wiped)"); + + // Wipe host-side mTLS credentials so bootstrap_gateway() takes the + // first-boot path and fetches new certs from the VM via the exec agent. + if let Ok(home) = std::env::var("HOME") { + let config_base = + std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config")); + let mtls_dir = PathBuf::from(&config_base) + .join("openshell/gateways") + .join(gateway_name) + .join("mtls"); + if mtls_dir.is_dir() { + fs::remove_dir_all(&mtls_dir).map_err(|e| { + VmError::RuntimeState(format!( + "reset: remove mTLS dir {}: {e}", + mtls_dir.display() + )) + })?; + } + // Also remove metadata so is_warm_boot() returns false. + let metadata = PathBuf::from(&config_base) + .join("openshell/gateways") + .join(gateway_name) + .join("metadata.json"); + if metadata.is_file() { + fs::remove_file(&metadata).map_err(|e| { + VmError::RuntimeState(format!( + "reset: remove metadata {}: {e}", + metadata.display() + )) + })?; + } + } + + eprintln!("Reset: cleaned {cleaned} state directories (full reset)"); + Ok(()) +} + +/// Remove a corrupt kine (`SQLite`) database so k3s can recreate it on boot. +/// +/// k3s uses kine with a `SQLite` backend at `var/lib/rancher/k3s/server/db/state.db`. +/// If the VM is killed mid-write (SIGKILL, host crash, power loss), the database +/// file may be left in a corrupt state — the `SQLite` header magic is missing or the +/// file is truncated. k3s would open the DB, get `SQLITE_NOTADB` / +/// `SQLITE_CORRUPT`, and crash at startup. +/// +/// This function checks the `SQLite` file header (first 100 bytes only) and removes +/// the database plus its WAL/SHM sidecar files if the header is invalid. k3s will +/// create a fresh database on startup and cluster state will be re-applied from +/// the auto-deploy manifests in `server/manifests/`. +/// +/// **Stale bootstrap locks** (a kine application-level issue where a killed k3s +/// server leaves a lock row that causes the next instance to hang) are handled +/// separately by the init script (`openshell-vm-init.sh`), which runs +/// `sqlite3 state.db "DELETE FROM kine WHERE name LIKE '/bootstrap/%'"` before +/// starting k3s. This allows the database — and all persistent cluster state — to +/// survive normal restarts. +/// +/// **What is lost on corruption:** all cluster object records (Pods, Deployments, +/// Secrets, `ConfigMaps`, CRDs, etc.) and the bootstrap token. These are re-created +/// from manifests on the next boot. +/// +/// **What is always preserved:** container images and snapshots (under +/// `k3s/agent/`), PKI, and the `.initialized` sentinel. +/// +/// This function is a no-op if `state.db` does not exist (e.g. first boot or +/// after a full `--reset`). +pub fn recover_corrupt_kine_db(rootfs: &Path) -> Result<(), VmError> { + let db_path = rootfs.join("var/lib/rancher/k3s/server/db/state.db"); + if !db_path.exists() { + return Ok(()); // Nothing to check — first boot or post-reset. + } + + // The SQLite file format begins with a 16-byte magic string. + // Reference: https://www.sqlite.org/fileformat.html#the_database_header + const SQLITE_MAGIC: &[u8] = b"SQLite format 3\x00"; + + // Read only the first 100 bytes (the minimum valid SQLite header size) + // instead of loading the entire database into memory. + let has_invalid_header = match File::open(&db_path).and_then(|mut f| { + let mut buf = [0u8; 100]; + let n = f.read(&mut buf)?; + Ok((n, buf)) + }) { + Err(_) => true, // Can't read → treat as corrupt. + Ok((n, _)) if n < 100 => true, // Too short to be a valid DB. + Ok((_, buf)) => !buf.starts_with(SQLITE_MAGIC), + }; + + if !has_invalid_header { + return Ok(()); // Valid database — preserve it for warm boot. + } + + eprintln!( + "Warning: kine database is corrupt ({}), removing for clean boot", + db_path.display() + ); + + remove_kine_db_files(&db_path)?; + + Ok(()) +} + +/// Remove the kine `SQLite` database and its WAL/SHM sidecar files. +fn remove_kine_db_files(db_path: &Path) -> Result<(), VmError> { + if let Err(e) = fs::remove_file(db_path) { + return Err(VmError::RuntimeState(format!( + "failed to remove kine database {}: {e}", + db_path.display() + ))); + } + // Also remove any WAL/SHM sidecar files left by an interrupted write. + let _ = fs::remove_file(db_path.with_extension("db-wal")); + let _ = fs::remove_file(db_path.with_extension("db-shm")); + Ok(()) +} + +/// Acquire an exclusive lock on the rootfs lock file. +/// +/// The lock is held for the lifetime of the returned `File` handle. When +/// the process exits (even via SIGKILL), the OS releases the lock +/// automatically. This provides a reliable guard against two VM processes +/// sharing the same rootfs — even if the state file is deleted. +/// +/// Returns `Ok(File)` on success. The caller must keep the `File` alive +/// for as long as the VM is running. +pub fn acquire_rootfs_lock(rootfs: &Path) -> Result { + let lock_path = vm_lock_path(rootfs); + fs::create_dir_all(vm_run_dir(rootfs)) + .map_err(|e| VmError::RuntimeState(format!("create VM runtime dir: {e}")))?; + + // Open (or create) the lock file without truncating so we can read + // the holder's PID for the error message if the lock is held. + let file = fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&lock_path) + .map_err(|e| { + VmError::RuntimeState(format!("open lock file {}: {e}", lock_path.display())) + })?; + + // Try non-blocking exclusive lock. + let fd = std::os::unix::io::AsRawFd::as_raw_fd(&file); + let rc = unsafe { libc::flock(fd, libc::LOCK_EX | libc::LOCK_NB) }; + if rc != 0 { + let err = std::io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::EWOULDBLOCK) { + // Another process holds the lock — read its PID for diagnostics. + let holder_pid = fs::read_to_string(&lock_path).unwrap_or_default(); + let holder_pid = holder_pid.trim(); + return Err(VmError::RuntimeState(format!( + "another process (pid {holder_pid}) is using rootfs {}. \ + Stop the running VM first", + rootfs.display() + ))); + } + return Err(VmError::RuntimeState(format!( + "lock rootfs {}: {err}", + lock_path.display() + ))); + } + + // Lock acquired — write our PID (truncate first, then write). + // This is informational only; the flock is the real guard. + let _ = file.set_len(0); + { + let mut f = &file; + let _ = write!(f, "{}", std::process::id()); + } + + Ok(file) +} + +/// Check whether the rootfs lock file is currently held by another process. +/// +/// Returns `Ok(())` if the lock is free (or can be acquired), and an +/// `Err` if another process holds it. Does NOT acquire the lock — use +/// [`acquire_rootfs_lock`] for that. +fn check_rootfs_lock_free(rootfs: &Path) -> Result<(), VmError> { + let lock_path = vm_lock_path(rootfs); + if !lock_path.exists() { + return Ok(()); + } + + let Ok(file) = File::open(&lock_path) else { + return Ok(()); // Can't open → treat as free + }; + + let fd = std::os::unix::io::AsRawFd::as_raw_fd(&file); + let rc = unsafe { libc::flock(fd, libc::LOCK_EX | libc::LOCK_NB) }; + if rc != 0 { + let err = std::io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::EWOULDBLOCK) { + let holder_pid = fs::read_to_string(&lock_path).unwrap_or_default(); + let holder_pid = holder_pid.trim(); + return Err(VmError::RuntimeState(format!( + "another process (pid {holder_pid}) is using rootfs {}. \ + Stop the running VM first", + rootfs.display() + ))); + } + } else { + // We acquired the lock — release it immediately since we're only probing. + unsafe { libc::flock(fd, libc::LOCK_UN) }; + } + + Ok(()) +} + +pub fn ensure_vm_not_running(rootfs: &Path) -> Result<(), VmError> { + // Primary guard: check the flock. This works even if the state file + // has been deleted, because the kernel holds the lock until the + // owning process exits. + check_rootfs_lock_free(rootfs)?; + + // Secondary guard: check the state file for any stale state. + match load_vm_runtime_state(Some(rootfs)) { + Ok(state) => Err(VmError::RuntimeState(format!( + "VM is already running (pid {}) with exec socket {}", + state.pid, + state.socket_path.display() + ))), + Err(VmError::RuntimeState(message)) + if message.starts_with("read VM runtime state") + || message.starts_with("VM is not running") => + { + clear_vm_runtime_state(rootfs); + Ok(()) + } + Err(err) => Err(err), + } +} + +pub fn exec_running_vm(options: VmExecOptions) -> Result { + let state = load_vm_runtime_state(options.rootfs.as_deref())?; + let mut stream = UnixStream::connect(&state.socket_path).map_err(|e| { + VmError::Exec(format!( + "connect to VM exec socket {}: {e}", + state.socket_path.display() + )) + })?; + let mut writer = stream + .try_clone() + .map_err(|e| VmError::Exec(format!("clone VM exec socket: {e}")))?; + + let mut env = options.env; + validate_env_vars(&env)?; + if !env.iter().any(|item| item.starts_with("KUBECONFIG=")) { + env.push(KUBECONFIG_ENV.to_string()); + } + + let request = ExecRequest { + argv: options.command, + env, + cwd: options.workdir, + tty: options.tty, + }; + send_json_line(&mut writer, &request)?; + + let stdin_writer = writer; + thread::spawn(move || { + let _ = pump_stdin(stdin_writer); + }); + + let mut reader = BufReader::new(&mut stream); + let mut line = String::new(); + let stdout = std::io::stdout(); + let stderr = std::io::stderr(); + let mut stdout = stdout.lock(); + let mut stderr = stderr.lock(); + let mut exit_code = None; + + loop { + line.clear(); + let bytes = reader + .read_line(&mut line) + .map_err(|e| VmError::Exec(format!("read VM exec response from guest agent: {e}")))?; + if bytes == 0 { + break; + } + + let frame: ServerFrame = serde_json::from_str(line.trim_end()) + .map_err(|e| VmError::Exec(format!("decode VM exec response frame: {e}")))?; + + match frame { + ServerFrame::Stdout { data } => { + let bytes = decode_payload(&data)?; + stdout + .write_all(&bytes) + .map_err(|e| VmError::Exec(format!("write guest stdout: {e}")))?; + stdout + .flush() + .map_err(|e| VmError::Exec(format!("flush guest stdout: {e}")))?; + } + ServerFrame::Stderr { data } => { + let bytes = decode_payload(&data)?; + stderr + .write_all(&bytes) + .map_err(|e| VmError::Exec(format!("write guest stderr: {e}")))?; + stderr + .flush() + .map_err(|e| VmError::Exec(format!("flush guest stderr: {e}")))?; + } + ServerFrame::Exit { code } => { + exit_code = Some(code); + break; + } + ServerFrame::Error { message } => { + return Err(VmError::Exec(message)); + } + } + } + + exit_code.ok_or_else(|| { + VmError::Exec("VM exec agent disconnected before returning an exit code".to_string()) + }) +} + +/// Run a command inside the guest via the exec agent and capture its stdout. +/// +/// Unlike [`exec_running_vm`], this function does not pump host stdin or write +/// to the terminal. It collects all stdout frames into a `Vec` and returns +/// them on success (exit code 0). Stderr output is discarded. +/// +/// This is the building block for internal host→guest queries (e.g. reading +/// files from the guest filesystem) without requiring a dedicated vsock server. +pub fn exec_capture(socket_path: &Path, argv: Vec) -> Result, VmError> { + let mut stream = UnixStream::connect(socket_path).map_err(|e| { + VmError::Exec(format!( + "connect to VM exec socket {}: {e}", + socket_path.display() + )) + })?; + let mut writer = stream + .try_clone() + .map_err(|e| VmError::Exec(format!("clone VM exec socket: {e}")))?; + + let request = ExecRequest { + argv, + env: vec![], + cwd: None, + tty: false, + }; + send_json_line(&mut writer, &request)?; + + // Close stdin immediately — we have no input to send. + send_json_line(&mut writer, &ClientFrame::StdinClose)?; + + let mut reader = BufReader::new(&mut stream); + let mut line = String::new(); + let mut stdout_buf = Vec::new(); + + loop { + line.clear(); + let bytes = reader + .read_line(&mut line) + .map_err(|e| VmError::Exec(format!("read VM exec response: {e}")))?; + if bytes == 0 { + break; + } + + let frame: ServerFrame = serde_json::from_str(line.trim_end()) + .map_err(|e| VmError::Exec(format!("decode VM exec response frame: {e}")))?; + + match frame { + ServerFrame::Stdout { data } => { + stdout_buf.extend_from_slice(&decode_payload(&data)?); + } + ServerFrame::Stderr { .. } => { + // Discard stderr for capture mode. + } + ServerFrame::Exit { code } => { + if code != 0 { + return Err(VmError::Exec(format!( + "guest command exited with code {code}" + ))); + } + return Ok(stdout_buf); + } + ServerFrame::Error { message } => { + return Err(VmError::Exec(message)); + } + } + } + + Err(VmError::Exec( + "VM exec agent disconnected before returning an exit code".to_string(), + )) +} + +fn vm_run_dir(rootfs: &Path) -> PathBuf { + rootfs.parent().unwrap_or(rootfs).to_path_buf() +} + +pub fn vm_state_path(rootfs: &Path) -> PathBuf { + vm_run_dir(rootfs).join(format!("{}-{}", rootfs_key(rootfs), VM_STATE_NAME)) +} + +fn vm_lock_path(rootfs: &Path) -> PathBuf { + vm_run_dir(rootfs).join(format!("{}-{}", rootfs_key(rootfs), VM_LOCK_NAME)) +} + +fn rootfs_key(rootfs: &Path) -> String { + let name = rootfs + .file_name() + .and_then(|part| part.to_str()) + .unwrap_or("openshell-vm"); + let mut out = String::with_capacity(name.len()); + for ch in name.chars() { + if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' { + out.push(ch); + } else { + out.push('_'); + } + } + if out.is_empty() { + "openshell-vm".to_string() + } else { + out + } +} + +fn default_rootfs() -> Result { + crate::named_rootfs_dir("default") +} + +fn load_vm_runtime_state(rootfs: Option<&Path>) -> Result { + let rootfs = match rootfs { + Some(rootfs) => rootfs.to_path_buf(), + None => default_rootfs()?, + }; + let path = vm_state_path(&rootfs); + let bytes = fs::read(&path).map_err(|e| { + VmError::RuntimeState(format!( + "read VM runtime state {}: {e}. Start the VM with `openshell-vm` first", + path.display() + )) + })?; + let state: VmRuntimeState = serde_json::from_slice(&bytes) + .map_err(|e| VmError::RuntimeState(format!("decode VM runtime state: {e}")))?; + + if !process_alive(state.pid) { + clear_vm_runtime_state(&state.rootfs); + return Err(VmError::RuntimeState(format!( + "VM is not running (stale pid {})", + state.pid + ))); + } + + if !state.socket_path.exists() { + return Err(VmError::RuntimeState(format!( + "VM exec socket is not ready: {}", + state.socket_path.display() + ))); + } + + Ok(state) +} + +fn validate_env_vars(items: &[String]) -> Result<(), VmError> { + for item in items { + let (key, _value) = item.split_once('=').ok_or_else(|| { + VmError::Exec(format!( + "invalid environment variable `{item}`; expected KEY=VALUE" + )) + })?; + if key.is_empty() + || !key.chars().enumerate().all(|(idx, ch)| { + ch == '_' || (ch.is_ascii_alphanumeric() && (idx > 0 || !ch.is_ascii_digit())) + }) + { + return Err(VmError::Exec(format!( + "invalid environment variable name `{key}`" + ))); + } + } + Ok(()) +} + +fn send_json_line(writer: &mut UnixStream, value: &T) -> Result<(), VmError> { + let mut bytes = serde_json::to_vec(value) + .map_err(|e| VmError::Exec(format!("encode VM exec request: {e}")))?; + bytes.push(b'\n'); + writer + .write_all(&bytes) + .map_err(|e| VmError::Exec(format!("write VM exec request: {e}"))) +} + +fn pump_stdin(mut writer: UnixStream) -> Result<(), VmError> { + let stdin = std::io::stdin(); + let mut stdin = stdin.lock(); + let mut buf = [0u8; 8192]; + + loop { + let read = stdin + .read(&mut buf) + .map_err(|e| VmError::Exec(format!("read local stdin: {e}")))?; + if read == 0 { + break; + } + let frame = ClientFrame::Stdin { + data: base64::engine::general_purpose::STANDARD.encode(&buf[..read]), + }; + send_json_line(&mut writer, &frame)?; + } + + send_json_line(&mut writer, &ClientFrame::StdinClose) +} + +fn decode_payload(data: &str) -> Result, VmError> { + base64::engine::general_purpose::STANDARD + .decode(data) + .map_err(|e| VmError::Exec(format!("decode VM exec payload: {e}"))) +} + +fn process_alive(pid: i32) -> bool { + let rc = unsafe { libc::kill(pid, 0) }; + if rc == 0 { + return true; + } + std::io::Error::last_os_error().raw_os_error() == Some(libc::EPERM) +} + +fn now_ms() -> Result { + let duration = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_err(|e| VmError::RuntimeState(format!("read system clock: {e}")))?; + Ok(duration.as_millis()) +} diff --git a/crates/openshell-vm/src/ffi.rs b/crates/openshell-vm/src/ffi.rs new file mode 100644 index 000000000..7500b1c97 --- /dev/null +++ b/crates/openshell-vm/src/ffi.rs @@ -0,0 +1,336 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Minimal runtime-loaded bindings for the libkrun C API. +//! +//! We intentionally do not link libkrun at build time. Instead, the +//! `openshell-vm` binary loads `libkrun` from the staged `openshell-vm.runtime/` +//! sidecar bundle on first use. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::OnceLock; + +use libc::c_char; +use libloading::Library; + +use crate::VmError; + +/// Runtime provenance information extracted from the bundle. +#[derive(Debug, Clone)] +pub struct RuntimeProvenance { + /// Path to the libkrun library that was loaded. + pub libkrun_path: PathBuf, + /// Paths to all libkrunfw libraries that were preloaded. + pub libkrunfw_paths: Vec, + /// SHA-256 hash of the primary libkrunfw artifact (if computable). + pub libkrunfw_sha256: Option, + /// Contents of provenance.json if present in the runtime bundle. + pub provenance_json: Option, + /// Whether this is a custom (OpenShell-built) runtime. + pub is_custom: bool, +} + +pub const KRUN_LOG_TARGET_DEFAULT: i32 = -1; +pub const KRUN_LOG_LEVEL_OFF: u32 = 0; +pub const KRUN_LOG_LEVEL_ERROR: u32 = 1; +pub const KRUN_LOG_LEVEL_WARN: u32 = 2; +pub const KRUN_LOG_LEVEL_INFO: u32 = 3; +pub const KRUN_LOG_LEVEL_DEBUG: u32 = 4; +pub const KRUN_LOG_LEVEL_TRACE: u32 = 5; +pub const KRUN_LOG_STYLE_AUTO: u32 = 0; +pub const KRUN_LOG_OPTION_NO_ENV: u32 = 1; +pub const KRUN_DISK_FORMAT_RAW: u32 = 0; +#[allow(dead_code)] // Used only on macOS (cfg-gated in state_disk_sync_mode) +pub const KRUN_SYNC_RELAXED: u32 = 1; +#[allow(dead_code)] // Used only on Linux (cfg-gated in state_disk_sync_mode) +pub const KRUN_SYNC_FULL: u32 = 2; + +type KrunInitLog = + unsafe extern "C" fn(target_fd: i32, level: u32, style: u32, options: u32) -> i32; +type KrunCreateCtx = unsafe extern "C" fn() -> i32; +type KrunFreeCtx = unsafe extern "C" fn(ctx_id: u32) -> i32; +type KrunSetVmConfig = unsafe extern "C" fn(ctx_id: u32, num_vcpus: u8, ram_mib: u32) -> i32; +type KrunSetRoot = unsafe extern "C" fn(ctx_id: u32, root_path: *const c_char) -> i32; +type KrunSetWorkdir = unsafe extern "C" fn(ctx_id: u32, workdir_path: *const c_char) -> i32; +type KrunSetExec = unsafe extern "C" fn( + ctx_id: u32, + exec_path: *const c_char, + argv: *const *const c_char, + envp: *const *const c_char, +) -> i32; +type KrunSetPortMap = unsafe extern "C" fn(ctx_id: u32, port_map: *const *const c_char) -> i32; +type KrunSetConsoleOutput = unsafe extern "C" fn(ctx_id: u32, filepath: *const c_char) -> i32; +type KrunAddDisk3 = unsafe extern "C" fn( + ctx_id: u32, + block_id: *const c_char, + disk_path: *const c_char, + disk_format: u32, + read_only: bool, + direct_io: bool, + sync_mode: u32, +) -> i32; +type KrunAddVsockPort2 = + unsafe extern "C" fn(ctx_id: u32, port: u32, c_filepath: *const c_char, listen: bool) -> i32; +type KrunStartEnter = unsafe extern "C" fn(ctx_id: u32) -> i32; +type KrunDisableImplicitVsock = unsafe extern "C" fn(ctx_id: u32) -> i32; +type KrunAddVsock = unsafe extern "C" fn(ctx_id: u32, tsi_features: u32) -> i32; +#[cfg(target_os = "macos")] +type KrunAddNetUnixgram = unsafe extern "C" fn( + ctx_id: u32, + c_path: *const c_char, + fd: i32, + c_mac: *const u8, + features: u32, + flags: u32, +) -> i32; +type KrunAddNetUnixstream = unsafe extern "C" fn( + ctx_id: u32, + c_path: *const c_char, + fd: i32, + c_mac: *const u8, + features: u32, + flags: u32, +) -> i32; + +pub struct LibKrun { + pub krun_init_log: KrunInitLog, + pub krun_create_ctx: KrunCreateCtx, + pub krun_free_ctx: KrunFreeCtx, + pub krun_set_vm_config: KrunSetVmConfig, + pub krun_set_root: KrunSetRoot, + pub krun_set_workdir: KrunSetWorkdir, + pub krun_set_exec: KrunSetExec, + pub krun_set_port_map: KrunSetPortMap, + pub krun_set_console_output: KrunSetConsoleOutput, + pub krun_add_disk3: Option, + pub krun_add_vsock_port2: KrunAddVsockPort2, + pub krun_start_enter: KrunStartEnter, + pub krun_disable_implicit_vsock: KrunDisableImplicitVsock, + pub krun_add_vsock: KrunAddVsock, + #[cfg(target_os = "macos")] + pub krun_add_net_unixgram: KrunAddNetUnixgram, + #[allow(dead_code)] // FFI symbol loaded for future use + pub krun_add_net_unixstream: KrunAddNetUnixstream, +} + +static LIBKRUN: OnceLock = OnceLock::new(); +static RUNTIME_PROVENANCE: OnceLock = OnceLock::new(); + +pub fn libkrun() -> Result<&'static LibKrun, VmError> { + if let Some(lib) = LIBKRUN.get() { + return Ok(lib); + } + + let loaded = LibKrun::load()?; + let _ = LIBKRUN.set(loaded); + Ok(LIBKRUN.get().expect("libkrun should be initialized")) +} + +/// Return the provenance information for the loaded runtime. +/// +/// Only available after [`libkrun()`] has been called successfully. +pub fn runtime_provenance() -> Option<&'static RuntimeProvenance> { + RUNTIME_PROVENANCE.get() +} + +impl LibKrun { + fn load() -> Result { + let path = runtime_libkrun_path()?; + let runtime_dir = path.parent().ok_or_else(|| { + VmError::HostSetup(format!("libkrun has no parent dir: {}", path.display())) + })?; + let krunfw_paths = preload_runtime_support_libraries(runtime_dir)?; + + // Build and store provenance information. + let provenance_json_path = runtime_dir.join("provenance.json"); + let provenance_json = fs::read_to_string(&provenance_json_path).ok(); + let is_custom = provenance_json.is_some(); + + let libkrunfw_sha256 = krunfw_paths.first().and_then(|p| compute_sha256(p).ok()); + + let provenance = RuntimeProvenance { + libkrun_path: path.clone(), + libkrunfw_paths: krunfw_paths, + libkrunfw_sha256, + provenance_json, + is_custom, + }; + let _ = RUNTIME_PROVENANCE.set(provenance); + + let library = Box::leak(Box::new(unsafe { + Library::new(&path).map_err(|e| { + VmError::HostSetup(format!("load libkrun from {}: {e}", path.display())) + })? + })); + + Ok(Self { + krun_init_log: load_symbol(library, b"krun_init_log\0", &path)?, + krun_create_ctx: load_symbol(library, b"krun_create_ctx\0", &path)?, + krun_free_ctx: load_symbol(library, b"krun_free_ctx\0", &path)?, + krun_set_vm_config: load_symbol(library, b"krun_set_vm_config\0", &path)?, + krun_set_root: load_symbol(library, b"krun_set_root\0", &path)?, + krun_set_workdir: load_symbol(library, b"krun_set_workdir\0", &path)?, + krun_set_exec: load_symbol(library, b"krun_set_exec\0", &path)?, + krun_set_port_map: load_symbol(library, b"krun_set_port_map\0", &path)?, + krun_set_console_output: load_symbol(library, b"krun_set_console_output\0", &path)?, + krun_add_disk3: load_optional_symbol(library, b"krun_add_disk3\0"), + krun_add_vsock_port2: load_symbol(library, b"krun_add_vsock_port2\0", &path)?, + krun_start_enter: load_symbol(library, b"krun_start_enter\0", &path)?, + krun_disable_implicit_vsock: load_symbol( + library, + b"krun_disable_implicit_vsock\0", + &path, + )?, + krun_add_vsock: load_symbol(library, b"krun_add_vsock\0", &path)?, + #[cfg(target_os = "macos")] + krun_add_net_unixgram: load_symbol(library, b"krun_add_net_unixgram\0", &path)?, + krun_add_net_unixstream: load_symbol(library, b"krun_add_net_unixstream\0", &path)?, + }) + } +} + +fn runtime_libkrun_path() -> Result { + Ok(crate::configured_runtime_dir()?.join(required_runtime_lib_name())) +} + +fn preload_runtime_support_libraries(runtime_dir: &Path) -> Result, VmError> { + let entries = fs::read_dir(runtime_dir) + .map_err(|e| VmError::HostSetup(format!("read {}: {e}", runtime_dir.display())))?; + + let mut support_libs: Vec = entries + .filter_map(Result::ok) + .map(|entry| entry.path()) + .filter(|path| { + path.file_name() + .and_then(|name| name.to_str()) + .is_some_and(|name| { + #[cfg(target_os = "macos")] + { + name.starts_with("libkrunfw") && name.ends_with(".dylib") + } + #[cfg(not(target_os = "macos"))] + { + name.starts_with("libkrunfw") && name.contains(".so") + } + }) + }) + .collect(); + + support_libs.sort(); + + for path in &support_libs { + let path_cstr = std::ffi::CString::new(path.to_string_lossy().as_bytes()).map_err(|e| { + VmError::HostSetup(format!( + "invalid support library path {}: {e}", + path.display() + )) + })?; + let handle = + unsafe { libc::dlopen(path_cstr.as_ptr(), libc::RTLD_NOW | libc::RTLD_GLOBAL) }; + if handle.is_null() { + let error = unsafe { + let err = libc::dlerror(); + if err.is_null() { + "unknown dlopen error".to_string() + } else { + std::ffi::CStr::from_ptr(err).to_string_lossy().into_owned() + } + }; + return Err(VmError::HostSetup(format!( + "preload runtime support library {}: {error}", + path.display() + ))); + } + } + + Ok(support_libs) +} + +pub fn required_runtime_lib_name() -> &'static str { + #[cfg(target_os = "macos")] + { + "libkrun.dylib" + } + #[cfg(not(target_os = "macos"))] + { + "libkrun.so" + } +} + +/// Compute SHA-256 hash of a file, returning hex string. +/// +/// Streams the file contents directly to `shasum -a 256` via a pipe, +/// avoiding buffering the entire file in memory. +fn compute_sha256(path: &Path) -> Result { + use std::io::{Read, Write}; + use std::process::{Command, Stdio}; + + let mut file = fs::File::open(path)?; + + // sha256sum is standard on Linux; shasum ships with macOS/Perl. + let mut child = Command::new("sha256sum") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + .or_else(|_| { + Command::new("shasum") + .args(["-a", "256"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + })?; + + // Stream file contents directly to shasum's stdin in 8KB chunks. + { + let mut stdin = child + .stdin + .take() + .ok_or_else(|| std::io::Error::other("failed to open shasum stdin"))?; + let mut buf = [0u8; 8192]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + stdin.write_all(&buf[..n])?; + } + // stdin is dropped here, closing the pipe so shasum can finish. + } + + let output = child.wait_with_output()?; + if output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + Ok(stdout + .split_whitespace() + .next() + .unwrap_or("unknown") + .to_string()) + } else { + Ok("unknown".to_string()) + } +} + +fn load_symbol( + library: &'static Library, + symbol: &[u8], + path: &Path, +) -> Result { + let loaded = unsafe { + library.get::(symbol).map_err(|e| { + VmError::HostSetup(format!( + "resolve {} from {}: {e}", + String::from_utf8_lossy(symbol).trim_end_matches('\0'), + path.display() + )) + })? + }; + Ok(*loaded) +} + +fn load_optional_symbol(library: &'static Library, symbol: &[u8]) -> Option { + let loaded = unsafe { library.get::(symbol).ok()? }; + Some(*loaded) +} diff --git a/crates/openshell-vm/src/health.rs b/crates/openshell-vm/src/health.rs new file mode 100644 index 000000000..096a35d1f --- /dev/null +++ b/crates/openshell-vm/src/health.rs @@ -0,0 +1,201 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! gRPC health check for verifying the gateway is fully ready. +//! +//! This module provides a proper gRPC health check that verifies the gateway +//! service is not just accepting TCP connections, but is actually responding +//! to gRPC requests. This ensures we don't mark the server as ready before +//! it has fully booted. + +use crate::VmError; +use openshell_core::proto::{HealthRequest, ServiceStatus, open_shell_client::OpenShellClient}; +use std::path::PathBuf; +use std::time::Duration; +use tonic::transport::{Certificate, ClientTlsConfig, Endpoint, Identity}; + +/// Load mTLS materials from the gateway's cert directory. +fn load_mtls_materials(gateway_name: &str) -> Result<(Vec, Vec, Vec), String> { + let home = std::env::var("HOME").map_err(|_| "HOME not set")?; + let mtls_dir = PathBuf::from(home) + .join(".config/openshell/gateways") + .join(gateway_name) + .join("mtls"); + + let ca = std::fs::read(mtls_dir.join("ca.crt")) + .map_err(|e| format!("failed to read ca.crt: {e}"))?; + let cert = std::fs::read(mtls_dir.join("tls.crt")) + .map_err(|e| format!("failed to read tls.crt: {e}"))?; + let key = std::fs::read(mtls_dir.join("tls.key")) + .map_err(|e| format!("failed to read tls.key: {e}"))?; + + Ok((ca, cert, key)) +} + +/// Build a tonic TLS config from mTLS materials. +fn build_tls_config(ca: Vec, cert: Vec, key: Vec) -> ClientTlsConfig { + let ca_cert = Certificate::from_pem(ca); + let identity = Identity::from_pem(cert, key); + ClientTlsConfig::new() + .ca_certificate(ca_cert) + .identity(identity) +} + +/// Perform a gRPC health check against the gateway. +/// +/// Returns `Ok(())` if the health check succeeds (service reports healthy), +/// or an error describing why the check failed. +async fn grpc_health_check(gateway_port: u16, gateway_name: &str) -> Result<(), String> { + // Load mTLS materials + let (ca, cert, key) = load_mtls_materials(gateway_name)?; + let tls_config = build_tls_config(ca, cert, key); + + // Build the channel with TLS + let endpoint = format!("https://127.0.0.1:{gateway_port}"); + let channel = Endpoint::from_shared(endpoint.clone()) + .map_err(|e| format!("invalid endpoint: {e}"))? + .connect_timeout(Duration::from_secs(5)) + .tls_config(tls_config) + .map_err(|e| format!("TLS config error: {e}"))? + .connect() + .await + .map_err(|e| format!("connection failed: {e}"))?; + + // Create client and call health + let mut client = OpenShellClient::new(channel); + let response = client + .health(HealthRequest {}) + .await + .map_err(|e| format!("health RPC failed: {e}"))?; + + let health = response.into_inner(); + if health.status == ServiceStatus::Healthy as i32 { + Ok(()) + } else { + Err(format!("service not healthy: status={}", health.status)) + } +} + +/// Wait for the gateway service to be fully ready by polling the gRPC health endpoint. +/// +/// This replaces the TCP-only probe with a proper gRPC health check that verifies +/// the service is actually responding to requests, not just accepting connections. +/// +/// Returns `Ok(())` when the gateway is confirmed healthy, or `Err` if the health +/// check fails or times out. Falls back to TCP probe if mTLS materials aren't +/// available yet. +pub fn wait_for_gateway_ready(gateway_port: u16, gateway_name: &str) -> Result<(), VmError> { + let start = std::time::Instant::now(); + let timeout = Duration::from_secs(90); + let poll_interval = Duration::from_secs(1); + + eprintln!("Waiting for gateway gRPC health check..."); + + // Create a runtime for async health checks + let rt = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(rt) => rt, + Err(e) => { + eprintln!(" failed to create tokio runtime: {e}, falling back to TCP probe"); + return wait_for_tcp_only(gateway_port, timeout, poll_interval); + } + }; + + loop { + // Try gRPC health check + let result = rt.block_on(async { + tokio::time::timeout( + Duration::from_secs(5), + grpc_health_check(gateway_port, gateway_name), + ) + .await + }); + + match result { + Ok(Ok(())) => { + eprintln!("Gateway healthy [{:.1}s]", start.elapsed().as_secs_f64()); + return Ok(()); + } + Ok(Err(e)) => { + // gRPC call completed but failed + if start.elapsed() >= timeout { + return Err(VmError::Bootstrap(format!( + "gateway health check failed after {:.0}s: {e}", + timeout.as_secs_f64() + ))); + } + } + Err(_) => { + // Timeout on the health check itself + if start.elapsed() >= timeout { + return Err(VmError::Bootstrap(format!( + "gateway health check timed out after {:.0}s", + timeout.as_secs_f64() + ))); + } + } + } + + std::thread::sleep(poll_interval); + } +} + +/// Fallback TCP-only probe when gRPC health check can't be performed. +fn wait_for_tcp_only( + gateway_port: u16, + timeout: Duration, + poll_interval: Duration, +) -> Result<(), VmError> { + let start = std::time::Instant::now(); + + loop { + if host_tcp_probe(gateway_port) { + eprintln!( + "Service reachable (TCP) [{:.1}s]", + start.elapsed().as_secs_f64() + ); + return Ok(()); + } + + if start.elapsed() >= timeout { + return Err(VmError::Bootstrap(format!( + "gateway TCP probe failed after {:.0}s", + timeout.as_secs_f64() + ))); + } + + std::thread::sleep(poll_interval); + } +} + +/// Probe `127.0.0.1:port` from the host to verify the TCP path is working. +/// +/// This is a fallback when gRPC health check isn't available. +fn host_tcp_probe(gateway_port: u16) -> bool { + use std::io::Read; + use std::net::{SocketAddr, TcpStream}; + + let addr: SocketAddr = ([127, 0, 0, 1], gateway_port).into(); + let Ok(mut stream) = TcpStream::connect_timeout(&addr, Duration::from_secs(2)) else { + return false; + }; + + // A short read timeout: if the server is alive it will wait for us + // to send a TLS ClientHello, so the read will time out (= good). + // If the connection resets or closes, the server is dead. + stream + .set_read_timeout(Some(Duration::from_millis(200))) + .ok(); + let mut buf = [0u8; 1]; + match stream.read(&mut buf) { + Err(e) + if e.kind() == std::io::ErrorKind::WouldBlock + || e.kind() == std::io::ErrorKind::TimedOut => + { + true // Timeout = server alive, waiting for ClientHello. + } + _ => false, // Reset, EOF, or unexpected data = not healthy. + } +} diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs new file mode 100644 index 000000000..4593dd605 --- /dev/null +++ b/crates/openshell-vm/src/lib.rs @@ -0,0 +1,1994 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! `MicroVM` runtime using libkrun for hardware-isolated execution. +//! +//! This crate provides a thin wrapper around the libkrun C API to boot +//! lightweight VMs backed by virtio-fs root filesystems. On macOS ARM64, +//! it uses Apple's Hypervisor.framework; on Linux it uses KVM. +//! +//! # Codesigning (macOS) +//! +//! The calling binary must be codesigned with the +//! `com.apple.security.hypervisor` entitlement. See `entitlements.plist`. + +#![allow(unsafe_code)] + +mod embedded; +mod exec; +mod ffi; +mod health; + +use std::ffi::CString; +use std::path::{Path, PathBuf}; +use std::ptr; +use std::time::Instant; + +pub use exec::{ + VM_EXEC_VSOCK_PORT, VmExecOptions, VmRuntimeState, acquire_rootfs_lock, clear_vm_runtime_state, + ensure_vm_not_running, exec_capture, exec_running_vm, recover_corrupt_kine_db, + reset_runtime_state, vm_exec_socket_path, vm_state_path, write_vm_runtime_state, +}; + +// ── Error type ───────────────────────────────────────────────────────── + +/// Errors that can occur when configuring or launching a microVM. +#[derive(Debug, thiserror::Error, miette::Diagnostic)] +pub enum VmError { + /// A libkrun FFI call returned a negative error code. + #[error("{func} failed with error code {code}")] + Krun { func: &'static str, code: i32 }, + + /// The rootfs directory does not exist. + #[error( + "rootfs directory not found: {path}\nRun `openshell-vm prepare-rootfs` or build one with ./crates/openshell-vm/scripts/build-rootfs.sh " + )] + RootfsNotFound { path: String }, + + /// A path contained invalid UTF-8. + #[error("path is not valid UTF-8: {0}")] + InvalidPath(String), + + /// `CString::new` failed (embedded NUL byte). + #[error("invalid C string: {0}")] + CString(#[from] std::ffi::NulError), + + /// A required host binary was not found. + #[error("required binary not found: {path}\n{hint}")] + BinaryNotFound { path: String, hint: String }, + + /// Host-side VM setup failed before boot. + #[error("host setup failed: {0}")] + HostSetup(String), + + /// `fork()` failed. + #[error("fork() failed: {0}")] + Fork(String), + + /// Post-boot bootstrap failed. + #[error("bootstrap failed: {0}")] + Bootstrap(String), + + /// Local VM runtime state could not be read or written. + #[error("VM runtime state error: {0}")] + RuntimeState(String), + + /// Exec operation against a running VM failed. + #[error("VM exec failed: {0}")] + Exec(String), +} + +/// Check a libkrun return code; negative values are errors. +fn check(ret: i32, func: &'static str) -> Result<(), VmError> { + if ret < 0 { + Err(VmError::Krun { func, code: ret }) + } else { + Ok(()) + } +} + +// ── Configuration ────────────────────────────────────────────────────── + +/// Networking backend for the microVM. +#[derive(Debug, Clone)] +pub enum NetBackend { + /// TSI (Transparent Socket Impersonation) — default libkrun networking. + /// Simple but intercepts guest loopback connections, breaking k3s. + Tsi, + + /// No networking — disable vsock/TSI entirely. For debugging only. + None, + + /// gvproxy (vfkit mode) — real `eth0` interface via virtio-net. + /// Requires gvproxy binary on the host. Port forwarding is done + /// through gvproxy's HTTP API. + Gvproxy { + /// Path to the gvproxy binary. + binary: PathBuf, + }, +} + +/// Host Unix socket bridged into the guest as a vsock port. +#[derive(Debug, Clone)] +pub struct VsockPort { + pub port: u32, + pub socket_path: PathBuf, + pub listen: bool, +} + +/// Host-backed raw block image attached to the VM for mutable guest state. +#[derive(Debug, Clone)] +pub struct StateDiskConfig { + /// Path to the sparse raw image on the host. + pub path: PathBuf, + + /// Size of the raw image in bytes. + pub size_bytes: u64, + + /// Guest-visible libkrun block ID. + pub block_id: String, + + /// Guest device path used by the init script. + pub guest_device: String, +} + +impl StateDiskConfig { + fn for_rootfs(rootfs: &Path) -> Self { + Self { + path: default_state_disk_path(rootfs), + size_bytes: DEFAULT_STATE_DISK_SIZE_BYTES, + block_id: DEFAULT_STATE_DISK_BLOCK_ID.to_string(), + guest_device: DEFAULT_STATE_DISK_GUEST_DEVICE.to_string(), + } + } +} + +/// Configuration for a libkrun microVM. +pub struct VmConfig { + /// Path to the extracted rootfs directory (aarch64 Linux). + pub rootfs: PathBuf, + + /// Number of virtual CPUs. + pub vcpus: u8, + + /// RAM in MiB. + pub mem_mib: u32, + + /// Executable path inside the VM. + pub exec_path: String, + + /// Arguments to the executable (argv, excluding argv\[0\]). + pub args: Vec, + + /// Environment variables in `KEY=VALUE` form. + /// If empty, a minimal default set is used. + pub env: Vec, + + /// Working directory inside the VM. + pub workdir: String, + + /// TCP port mappings in `"host_port:guest_port"` form. + /// Only used with TSI networking. + pub port_map: Vec, + + /// Optional host Unix sockets exposed to the guest over vsock. + pub vsock_ports: Vec, + + /// libkrun log level (0=Off .. 5=Trace). + pub log_level: u32, + + /// Optional file path for VM console output. If `None`, console output + /// goes to the parent directory of the rootfs as `console.log`. + pub console_output: Option, + + /// Networking backend. + pub net: NetBackend, + + /// Wipe all runtime state (containerd tasks/sandboxes, kubelet pods) + /// before booting. Recovers from corrupted state after a crash. + pub reset: bool, + + /// Gateway metadata name used for host-side config and mTLS material. + pub gateway_name: String, + + /// Optional host-backed raw block image for mutable guest state. + pub state_disk: Option, +} + +impl VmConfig { + /// Default gateway configuration: boots k3s server inside the VM. + /// + /// Runs `/srv/openshell-vm-init.sh` which mounts essential filesystems, + /// deploys the `OpenShell` helm chart, and execs `k3s server`. + /// Exposes the `OpenShell` gateway on port 30051. + pub fn gateway(rootfs: PathBuf) -> Self { + let state_disk = StateDiskConfig::for_rootfs(&rootfs); + Self { + vsock_ports: vec![VsockPort { + port: VM_EXEC_VSOCK_PORT, + socket_path: vm_exec_socket_path(&rootfs), + listen: true, + }], + rootfs, + vcpus: 4, + mem_mib: 8192, + exec_path: "/srv/openshell-vm-init.sh".to_string(), + args: vec![], + env: vec![ + "HOME=/root".to_string(), + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(), + "TERM=xterm".to_string(), + ], + workdir: "/".to_string(), + port_map: vec![ + // OpenShell server — with bridge CNI the pod listens on + // 8080 inside its own network namespace (10.42.0.x), not + // on the VM's root namespace. The NodePort service + // (kube-proxy nftables) forwards VM:30051 → pod:8080. + // gvproxy maps host:30051 → VM:30051 to complete the path. + "30051:30051".to_string(), + ], + log_level: 3, // Info — for debugging + console_output: None, + net: NetBackend::Gvproxy { + binary: default_runtime_gvproxy_path(), + }, + reset: false, + gateway_name: format!("{GATEWAY_NAME_PREFIX}-default"), + state_disk: Some(state_disk), + } + } +} + +/// Base prefix for gateway metadata names. +const GATEWAY_NAME_PREFIX: &str = "openshell-vm"; +const DEFAULT_STATE_DISK_SIZE_BYTES: u64 = 32 * 1024 * 1024 * 1024; +const DEFAULT_STATE_DISK_BLOCK_ID: &str = "openshell-state"; +const DEFAULT_STATE_DISK_GUEST_DEVICE: &str = "/dev/vda"; + +/// Resolve the gateway metadata name for an instance name. +pub fn gateway_name(instance_name: &str) -> Result { + Ok(format!( + "{GATEWAY_NAME_PREFIX}-{}", + sanitize_instance_name(instance_name)? + )) +} + +/// Resolve the rootfs path for a named instance (including the default gateway). +/// +/// Layout: `$XDG_DATA_HOME/openshell/openshell-vm/{version}/instances/{name}/rootfs` +pub fn named_rootfs_dir(instance_name: &str) -> Result { + let name = sanitize_instance_name(instance_name)?; + let base = openshell_bootstrap::paths::openshell_vm_base_dir() + .map_err(|e| VmError::RuntimeState(format!("resolve openshell-vm base dir: {e}")))?; + Ok(base + .join(env!("CARGO_PKG_VERSION")) + .join("instances") + .join(name) + .join("rootfs")) +} + +/// Ensure a named instance rootfs exists, extracting from the embedded +/// rootfs tarball on first use. +/// +/// The default (unnamed) gateway should be routed here as `"default"`. +pub fn ensure_named_rootfs(instance_name: &str) -> Result { + let instance_rootfs = named_rootfs_dir(instance_name)?; + if instance_rootfs.is_dir() { + return Ok(instance_rootfs); + } + + if embedded::has_embedded_rootfs() { + // Clean up rootfs directories left by older binary versions. + embedded::cleanup_old_rootfs()?; + + embedded::extract_rootfs_to(&instance_rootfs)?; + return Ok(instance_rootfs); + } + + Err(VmError::RootfsNotFound { + path: instance_rootfs.display().to_string(), + }) +} + +/// Ensure the requested rootfs exists, extracting the embedded rootfs when needed. +/// +/// When `rootfs` is `None`, this uses the named-instance layout under +/// `$XDG_DATA_HOME/openshell/openshell-vm/{version}/instances//rootfs`. +/// When `force_recreate` is true and the target exists, it is removed first. +pub fn prepare_rootfs( + rootfs: Option, + instance_name: &str, + force_recreate: bool, +) -> Result { + let target = match rootfs { + Some(path) => path, + None => named_rootfs_dir(instance_name)?, + }; + + if force_recreate && target.exists() { + std::fs::remove_dir_all(&target).map_err(|e| { + VmError::HostSetup(format!("remove existing rootfs {}: {e}", target.display())) + })?; + } + + if target.is_dir() { + return Ok(target); + } + + if embedded::has_embedded_rootfs() { + if target == named_rootfs_dir(instance_name)? { + embedded::cleanup_old_rootfs()?; + } + embedded::extract_rootfs_to(&target)?; + return Ok(target); + } + + Err(VmError::RootfsNotFound { + path: target.display().to_string(), + }) +} + +fn sanitize_instance_name(name: &str) -> Result { + let trimmed = name.trim(); + if trimmed.is_empty() { + return Err(VmError::RuntimeState( + "instance name cannot be empty".to_string(), + )); + } + + let mut out = String::with_capacity(trimmed.len()); + for ch in trimmed.chars() { + if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' { + out.push(ch); + } else { + return Err(VmError::RuntimeState(format!( + "invalid instance name '{trimmed}': only [A-Za-z0-9_-] are allowed" + ))); + } + } + + Ok(out) +} + +// ── Helpers ───────────────────────────────────────────────────────────── + +/// Build a null-terminated C string array from a slice of strings. +/// +/// Returns both the `CString` owners (to keep them alive) and the pointer array. +fn c_string_array(strings: &[&str]) -> Result<(Vec, Vec<*const libc::c_char>), VmError> { + let owned: Vec = strings + .iter() + .map(|s| CString::new(*s)) + .collect::, _>>()?; + let mut ptrs: Vec<*const libc::c_char> = owned.iter().map(|c| c.as_ptr()).collect(); + ptrs.push(ptr::null()); // null terminator + Ok((owned, ptrs)) +} + +const VM_RUNTIME_DIR_ENV: &str = "OPENSHELL_VM_RUNTIME_DIR"; + +pub(crate) fn configured_runtime_dir() -> Result { + // Allow override for development + if let Some(path) = std::env::var_os(VM_RUNTIME_DIR_ENV) { + let path = PathBuf::from(path); + tracing::debug!( + path = %path.display(), + "Using runtime from OPENSHELL_VM_RUNTIME_DIR" + ); + return Ok(path); + } + + // Use embedded runtime (extracts on first use) + embedded::ensure_runtime_extracted() +} + +fn validate_runtime_dir(dir: &Path) -> Result<(), VmError> { + if !dir.is_dir() { + return Err(VmError::BinaryNotFound { + path: dir.display().to_string(), + hint: format!( + "VM runtime not found. Run `mise run vm:build:embedded` or set {VM_RUNTIME_DIR_ENV}" + ), + }); + } + + let libkrun = dir.join(ffi::required_runtime_lib_name()); + if !libkrun.is_file() { + return Err(VmError::BinaryNotFound { + path: libkrun.display().to_string(), + hint: "runtime is incomplete: missing libkrun".to_string(), + }); + } + + let has_krunfw = std::fs::read_dir(dir) + .map_err(|e| VmError::HostSetup(format!("read {}: {e}", dir.display())))? + .filter_map(Result::ok) + .any(|entry| { + entry + .file_name() + .to_string_lossy() + .starts_with("libkrunfw.") + }); + if !has_krunfw { + return Err(VmError::BinaryNotFound { + path: dir.display().to_string(), + hint: "runtime is incomplete: missing libkrunfw".to_string(), + }); + } + + let gvproxy = dir.join("gvproxy"); + if !gvproxy.is_file() { + return Err(VmError::BinaryNotFound { + path: gvproxy.display().to_string(), + hint: "runtime is incomplete: missing gvproxy".to_string(), + }); + } + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + + let mode = std::fs::metadata(&gvproxy) + .map_err(|e| VmError::HostSetup(format!("stat {}: {e}", gvproxy.display())))? + .permissions() + .mode(); + if mode & 0o111 == 0 { + return Err(VmError::HostSetup(format!( + "gvproxy is not executable: {}", + gvproxy.display() + ))); + } + } + + Ok(()) +} + +fn resolve_runtime_bundle() -> Result { + let runtime_dir = configured_runtime_dir()?; + // Validate the directory has required files + validate_runtime_dir(&runtime_dir)?; + Ok(runtime_dir.join("gvproxy")) +} + +pub fn default_runtime_gvproxy_path() -> PathBuf { + configured_runtime_dir() + .or_else(|_| embedded::runtime_cache_path()) + .unwrap_or_else(|_| PathBuf::from("gvproxy")) + .join("gvproxy") +} + +/// Check if the given path looks like an openshell-vm instance rootfs. +fn is_instance_rootfs_path(path: &Path) -> bool { + // Matches: .../openshell/openshell-vm/.../instances/.../rootfs + let s = path.to_string_lossy(); + s.contains("openshell/openshell-vm") && s.contains("instances") && path.ends_with("rootfs") +} + +#[cfg(target_os = "macos")] +fn configure_runtime_loader_env(runtime_dir: &Path) -> Result<(), VmError> { + let existing = std::env::var_os("DYLD_FALLBACK_LIBRARY_PATH"); + let mut paths = vec![runtime_dir.to_path_buf()]; + if let Some(existing) = existing { + paths.extend(std::env::split_paths(&existing)); + } + let joined = std::env::join_paths(paths) + .map_err(|e| VmError::HostSetup(format!("join DYLD_FALLBACK_LIBRARY_PATH: {e}")))?; + unsafe { + std::env::set_var("DYLD_FALLBACK_LIBRARY_PATH", joined); + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn configure_runtime_loader_env(runtime_dir: &Path) -> Result<(), VmError> { + // On Linux, libkrun.so has a DT_NEEDED for libkrunfw.so. Even though we + // preload libkrunfw with RTLD_GLOBAL, the ELF dynamic linker still resolves + // DT_NEEDED entries through LD_LIBRARY_PATH / system paths. Without this, + // dlopen("libkrun.so") fails if libkrunfw.so is only in the runtime bundle. + let existing = std::env::var_os("LD_LIBRARY_PATH"); + let mut paths = vec![runtime_dir.to_path_buf()]; + if let Some(existing) = existing { + paths.extend(std::env::split_paths(&existing)); + } + let joined = std::env::join_paths(paths) + .map_err(|e| VmError::HostSetup(format!("join LD_LIBRARY_PATH: {e}")))?; + unsafe { + std::env::set_var("LD_LIBRARY_PATH", joined); + } + Ok(()) +} + +#[cfg(not(any(target_os = "macos", target_os = "linux")))] +fn configure_runtime_loader_env(_runtime_dir: &Path) -> Result<(), VmError> { + Ok(()) +} + +fn raise_nofile_limit() { + #[cfg(unix)] + unsafe { + let mut rlim = libc::rlimit { + rlim_cur: 0, + rlim_max: 0, + }; + if libc::getrlimit(libc::RLIMIT_NOFILE, &raw mut rlim) == 0 { + rlim.rlim_cur = rlim.rlim_max; + let _ = libc::setrlimit(libc::RLIMIT_NOFILE, &raw const rlim); + } + } +} + +/// Log runtime provenance information for diagnostics. +/// +/// Prints the libkrun/libkrunfw versions, artifact hashes, and whether +/// a custom runtime is in use. This makes it easy to correlate VM issues +/// with the specific runtime bundle. +fn log_runtime_provenance(runtime_dir: &Path) { + if let Some(prov) = ffi::runtime_provenance() { + eprintln!("runtime: {}", runtime_dir.display()); + eprintln!(" libkrun: {}", prov.libkrun_path.display()); + for krunfw in &prov.libkrunfw_paths { + let name = krunfw.file_name().map_or_else( + || "unknown".to_string(), + |n| n.to_string_lossy().to_string(), + ); + eprintln!(" libkrunfw: {name}"); + } + if let Some(ref sha) = prov.libkrunfw_sha256 { + let short = if sha.len() > 12 { &sha[..12] } else { sha }; + eprintln!(" sha256: {short}..."); + } + if prov.is_custom { + eprintln!(" type: custom (OpenShell-built)"); + // Parse provenance.json for additional details. + if let Some(ref json) = prov.provenance_json { + // Extract key fields from provenance metadata. + for key in &["libkrunfw_commit", "kernel_version", "build_timestamp"] { + if let Some(val) = extract_json_string(json, key) { + eprintln!(" {}: {}", key.replace('_', "-"), val); + } + } + } + } else { + eprintln!(" type: stock (system/homebrew)"); + } + } +} + +/// Extract a string value from a JSON object by key. +fn extract_json_string(json: &str, key: &str) -> Option { + let map: serde_json::Map = serde_json::from_str(json).ok()?; + map.get(key)?.as_str().map(ToOwned::to_owned) +} + +fn clamp_log_level(level: u32) -> u32 { + match level { + 0 => ffi::KRUN_LOG_LEVEL_OFF, + 1 => ffi::KRUN_LOG_LEVEL_ERROR, + 2 => ffi::KRUN_LOG_LEVEL_WARN, + 3 => ffi::KRUN_LOG_LEVEL_INFO, + 4 => ffi::KRUN_LOG_LEVEL_DEBUG, + _ => ffi::KRUN_LOG_LEVEL_TRACE, + } +} + +struct VmContext { + krun: &'static ffi::LibKrun, + ctx_id: u32, +} + +impl VmContext { + fn create(log_level: u32) -> Result { + let krun = ffi::libkrun()?; + unsafe { + check( + (krun.krun_init_log)( + ffi::KRUN_LOG_TARGET_DEFAULT, + clamp_log_level(log_level), + ffi::KRUN_LOG_STYLE_AUTO, + ffi::KRUN_LOG_OPTION_NO_ENV, + ), + "krun_init_log", + )?; + } + + let ctx_id = unsafe { (krun.krun_create_ctx)() }; + if ctx_id < 0 { + return Err(VmError::Krun { + func: "krun_create_ctx", + code: ctx_id, + }); + } + + Ok(Self { + krun, + ctx_id: ctx_id as u32, + }) + } + + fn set_vm_config(&self, vcpus: u8, mem_mib: u32) -> Result<(), VmError> { + unsafe { + check( + (self.krun.krun_set_vm_config)(self.ctx_id, vcpus, mem_mib), + "krun_set_vm_config", + ) + } + } + + fn set_root(&self, rootfs: &Path) -> Result<(), VmError> { + let rootfs_c = path_to_cstring(rootfs)?; + unsafe { + check( + (self.krun.krun_set_root)(self.ctx_id, rootfs_c.as_ptr()), + "krun_set_root", + ) + } + } + + fn add_state_disk(&self, state_disk: &StateDiskConfig) -> Result<(), VmError> { + let Some(add_disk3) = self.krun.krun_add_disk3 else { + return Err(VmError::HostSetup( + "libkrun runtime does not expose krun_add_disk3; rebuild the VM runtime with block support" + .to_string(), + )); + }; + + let block_id_c = CString::new(state_disk.block_id.as_str())?; + let disk_path_c = path_to_cstring(&state_disk.path)?; + unsafe { + check( + add_disk3( + self.ctx_id, + block_id_c.as_ptr(), + disk_path_c.as_ptr(), + ffi::KRUN_DISK_FORMAT_RAW, + false, + false, + state_disk_sync_mode(), + ), + "krun_add_disk3", + ) + } + } + + fn set_workdir(&self, workdir: &str) -> Result<(), VmError> { + let workdir_c = CString::new(workdir)?; + unsafe { + check( + (self.krun.krun_set_workdir)(self.ctx_id, workdir_c.as_ptr()), + "krun_set_workdir", + ) + } + } + + fn disable_implicit_vsock(&self) -> Result<(), VmError> { + unsafe { + check( + (self.krun.krun_disable_implicit_vsock)(self.ctx_id), + "krun_disable_implicit_vsock", + ) + } + } + + fn add_vsock(&self, tsi_features: u32) -> Result<(), VmError> { + unsafe { + check( + (self.krun.krun_add_vsock)(self.ctx_id, tsi_features), + "krun_add_vsock", + ) + } + } + + #[cfg(target_os = "macos")] + fn add_net_unixgram( + &self, + socket_path: &Path, + mac: &[u8; 6], + features: u32, + flags: u32, + ) -> Result<(), VmError> { + let sock_c = path_to_cstring(socket_path)?; + unsafe { + check( + (self.krun.krun_add_net_unixgram)( + self.ctx_id, + sock_c.as_ptr(), + -1, + mac.as_ptr(), + features, + flags, + ), + "krun_add_net_unixgram", + ) + } + } + + #[allow(dead_code)] // FFI binding for future use (e.g. Linux networking) + fn add_net_unixstream( + &self, + socket_path: &Path, + mac: &[u8; 6], + features: u32, + ) -> Result<(), VmError> { + let sock_c = path_to_cstring(socket_path)?; + unsafe { + check( + (self.krun.krun_add_net_unixstream)( + self.ctx_id, + sock_c.as_ptr(), + -1, + mac.as_ptr(), + features, + 0, + ), + "krun_add_net_unixstream", + ) + } + } + + fn set_port_map(&self, port_map: &[String]) -> Result<(), VmError> { + let port_strs: Vec<&str> = port_map.iter().map(String::as_str).collect(); + let (_port_owners, port_ptrs) = c_string_array(&port_strs)?; + unsafe { + check( + (self.krun.krun_set_port_map)(self.ctx_id, port_ptrs.as_ptr()), + "krun_set_port_map", + ) + } + } + + fn add_vsock_port(&self, port: &VsockPort) -> Result<(), VmError> { + let socket_c = path_to_cstring(&port.socket_path)?; + unsafe { + check( + (self.krun.krun_add_vsock_port2)( + self.ctx_id, + port.port, + socket_c.as_ptr(), + port.listen, + ), + "krun_add_vsock_port2", + ) + } + } + + fn set_console_output(&self, path: &Path) -> Result<(), VmError> { + let console_c = path_to_cstring(path)?; + unsafe { + check( + (self.krun.krun_set_console_output)(self.ctx_id, console_c.as_ptr()), + "krun_set_console_output", + ) + } + } + + fn set_exec(&self, exec_path: &str, args: &[String], env: &[String]) -> Result<(), VmError> { + let exec_c = CString::new(exec_path)?; + let argv_strs: Vec<&str> = args.iter().map(String::as_str).collect(); + let (_argv_owners, argv_ptrs) = c_string_array(&argv_strs)?; + let env_strs: Vec<&str> = env.iter().map(String::as_str).collect(); + let (_env_owners, env_ptrs) = c_string_array(&env_strs)?; + + unsafe { + check( + (self.krun.krun_set_exec)( + self.ctx_id, + exec_c.as_ptr(), + argv_ptrs.as_ptr(), + env_ptrs.as_ptr(), + ), + "krun_set_exec", + ) + } + } + + fn start_enter(&self) -> i32 { + unsafe { (self.krun.krun_start_enter)(self.ctx_id) } + } +} + +impl Drop for VmContext { + fn drop(&mut self) { + unsafe { + let ret = (self.krun.krun_free_ctx)(self.ctx_id); + if ret < 0 { + eprintln!( + "warning: krun_free_ctx({}) failed with code {ret}", + self.ctx_id + ); + } + } + } +} + +/// RAII guard that kills and waits on a gvproxy child process when dropped. +/// +/// This prevents orphaned gvproxy processes when early `?` returns in the +/// launch function cause the child to be dropped before cleanup code runs. +/// Call [`GvproxyGuard::disarm`] to take ownership of the child when it +/// should outlive the guard (i.e., after a successful fork). +struct GvproxyGuard { + child: Option, +} + +impl GvproxyGuard { + fn new(child: std::process::Child) -> Self { + Self { child: Some(child) } + } + + /// Take the child out of the guard, preventing it from being killed on drop. + /// Use this after the launch is successful and the parent will manage cleanup. + fn disarm(&mut self) -> Option { + self.child.take() + } + + /// Get the child's PID without disarming. + fn id(&self) -> Option { + self.child.as_ref().map(std::process::Child::id) + } +} + +impl Drop for GvproxyGuard { + fn drop(&mut self) { + if let Some(mut child) = self.child.take() { + let pid = child.id(); + let _ = child.kill(); + let _ = child.wait(); + eprintln!("gvproxy cleaned up (pid {pid})"); + } + } +} + +/// Issue a gvproxy expose call via its HTTP API (unix socket). +/// +/// Sends a raw HTTP/1.1 POST request over the unix socket to avoid +/// depending on `curl` being installed on the host. +fn gvproxy_expose(api_sock: &Path, body: &str) -> Result<(), String> { + use std::io::{Read, Write}; + use std::os::unix::net::UnixStream; + + let mut stream = + UnixStream::connect(api_sock).map_err(|e| format!("connect to gvproxy API socket: {e}"))?; + + let request = format!( + "POST /services/forwarder/expose HTTP/1.1\r\n\ + Host: localhost\r\n\ + Content-Type: application/json\r\n\ + Content-Length: {}\r\n\ + Connection: close\r\n\ + \r\n\ + {}", + body.len(), + body, + ); + + stream + .write_all(request.as_bytes()) + .map_err(|e| format!("write to gvproxy API: {e}"))?; + + // Read just enough of the response to get the status line. + let mut buf = [0u8; 1024]; + let n = stream + .read(&mut buf) + .map_err(|e| format!("read from gvproxy API: {e}"))?; + let response = String::from_utf8_lossy(&buf[..n]); + + // Parse the HTTP status code from the first line (e.g. "HTTP/1.1 200 OK"). + let status = response + .lines() + .next() + .and_then(|line| line.split_whitespace().nth(1)) + .unwrap_or("0"); + + match status { + "200" | "204" => Ok(()), + _ => { + let first_line = response.lines().next().unwrap_or(""); + Err(format!("gvproxy API: {first_line}")) + } + } +} + +/// Kill a stale gvproxy process from a previous openshell-vm run. +/// +/// If the CLI crashes or is killed before cleanup, gvproxy keeps running +/// and holds port 2222. A new gvproxy instance then fails with +/// "bind: address already in use". +/// +/// We only kill the specific gvproxy PID recorded in the VM runtime state +/// to avoid disrupting unrelated gvproxy instances (e.g. Podman Desktop). +/// Before sending SIGTERM, we verify the process name contains "gvproxy" +/// to guard against PID reuse. +fn kill_stale_gvproxy(rootfs: &Path) { + let state_path = vm_state_path(rootfs); + let pid = std::fs::read(&state_path) + .ok() + .and_then(|bytes| serde_json::from_slice::(&bytes).ok()) + .and_then(|state| state.gvproxy_pid); + + if let Some(gvproxy_pid) = pid { + // Verify the process is still alive before killing it. + let pid_i32 = gvproxy_pid as libc::pid_t; + let is_alive = unsafe { libc::kill(pid_i32, 0) } == 0; + if is_alive { + // Verify the process is actually gvproxy before killing. + // Without this check, PID reuse could cause us to kill an + // unrelated process. + if !is_process_named(pid_i32, "gvproxy") { + eprintln!( + "Stale gvproxy pid {gvproxy_pid} is no longer gvproxy (PID reused), skipping kill" + ); + return; + } + unsafe { + libc::kill(pid_i32, libc::SIGTERM); + } + eprintln!("Killed stale gvproxy process (pid {gvproxy_pid})"); + // Brief pause for the port to be released. + std::thread::sleep(std::time::Duration::from_millis(200)); + } + } +} + +/// Check whether a process with the given PID has the expected name. +/// +/// On macOS, shells out to `ps` to query the process name. On Linux, reads +/// `/proc//comm`. Returns `false` if the process name cannot be +/// determined (fail-safe: don't kill if we can't verify). +#[cfg(target_os = "macos")] +fn is_process_named(pid: libc::pid_t, expected: &str) -> bool { + // Use `ps -p -o comm=` to get just the process name. + // This avoids depending on libc kinfo_proc struct layout. + std::process::Command::new("ps") + .args(["-p", &pid.to_string(), "-o", "comm="]) + .output() + .ok() + .and_then(|output| { + if output.status.success() { + String::from_utf8(output.stdout).ok() + } else { + None + } + }) + .is_some_and(|name| name.trim().contains(expected)) +} + +#[cfg(target_os = "linux")] +fn is_process_named(pid: libc::pid_t, expected: &str) -> bool { + let comm_path = format!("/proc/{pid}/comm"); + std::fs::read_to_string(comm_path) + .map(|name| name.trim().contains(expected)) + .unwrap_or(false) +} + +#[cfg(not(any(target_os = "macos", target_os = "linux")))] +fn is_process_named(_pid: libc::pid_t, _expected: &str) -> bool { + // Cannot verify on this platform — fail-safe: don't kill. + false +} + +fn vm_rootfs_key(rootfs: &Path) -> String { + let name = rootfs + .file_name() + .and_then(|part| part.to_str()) + .unwrap_or("openshell-vm"); + let mut out = String::with_capacity(name.len()); + for ch in name.chars() { + if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' { + out.push(ch); + } else { + out.push('_'); + } + } + if out.is_empty() { + "openshell-vm".to_string() + } else { + out + } +} + +fn default_state_disk_path(rootfs: &Path) -> PathBuf { + rootfs + .parent() + .unwrap_or(rootfs) + .join(format!("{}-state.raw", vm_rootfs_key(rootfs))) +} + +fn ensure_state_disk_image(state_disk: &StateDiskConfig) -> Result<(), VmError> { + if let Some(parent) = state_disk.path.parent() { + std::fs::create_dir_all(parent).map_err(|e| { + VmError::HostSetup(format!("create state disk dir {}: {e}", parent.display())) + })?; + } + + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&state_disk.path) + .map_err(|e| { + VmError::HostSetup(format!( + "open state disk {}: {e}", + state_disk.path.display() + )) + })?; + + let current_len = file + .metadata() + .map_err(|e| { + VmError::HostSetup(format!( + "stat state disk {}: {e}", + state_disk.path.display() + )) + })? + .len(); + if current_len < state_disk.size_bytes { + file.set_len(state_disk.size_bytes).map_err(|e| { + VmError::HostSetup(format!( + "resize state disk {} to {} bytes: {e}", + state_disk.path.display(), + state_disk.size_bytes + )) + })?; + } + + Ok(()) +} + +fn state_disk_sync_mode() -> u32 { + #[cfg(target_os = "macos")] + { + ffi::KRUN_SYNC_RELAXED + } + #[cfg(not(target_os = "macos"))] + { + ffi::KRUN_SYNC_FULL + } +} + +fn hash_path_id(path: &Path) -> String { + let mut hash: u64 = 0xcbf29ce484222325; + for byte in path.to_string_lossy().as_bytes() { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(0x100000001b3); + } + format!("{:012x}", hash & 0x0000_ffff_ffff_ffff) +} + +/// Return a secure base directory for temporary socket files. +/// +/// Prefers `XDG_RUNTIME_DIR` (per-user, restricted permissions on Linux), +/// falls back to `/tmp`. After `create_dir_all`, validates the directory +/// is not a symlink and is owned by the current user. +fn secure_socket_base(subdir: &str) -> Result { + let base = if let Some(xdg) = std::env::var_os("XDG_RUNTIME_DIR") { + PathBuf::from(xdg) + } else { + let mut base = PathBuf::from("/tmp"); + if !base.is_dir() { + base = std::env::temp_dir(); + } + base + }; + let dir = base.join(subdir); + + // If the path exists, verify it is not a symlink before using it. + if dir.exists() { + let meta = dir + .symlink_metadata() + .map_err(|e| VmError::HostSetup(format!("lstat {}: {e}", dir.display())))?; + if meta.file_type().is_symlink() { + return Err(VmError::HostSetup(format!( + "socket directory {} is a symlink — refusing to use it", + dir.display() + ))); + } + // Verify ownership matches current user. + #[cfg(unix)] + { + use std::os::unix::fs::MetadataExt as _; + let uid = unsafe { libc::getuid() }; + if meta.uid() != uid { + return Err(VmError::HostSetup(format!( + "socket directory {} is owned by uid {} but we are uid {} — refusing to use it", + dir.display(), + meta.uid(), + uid + ))); + } + } + } else { + std::fs::create_dir_all(&dir) + .map_err(|e| VmError::HostSetup(format!("create socket dir {}: {e}", dir.display())))?; + // Set restrictive permissions on the newly created directory. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700)); + } + } + + Ok(dir) +} + +fn gvproxy_socket_dir(rootfs: &Path) -> Result { + let dir = secure_socket_base("ovm-gv")?; + + // macOS unix socket path limit is tight (~104 bytes). Keep paths very short. + let id = hash_path_id(rootfs); + Ok(dir.join(id)) +} + +fn gateway_host_port(config: &VmConfig) -> u16 { + config + .port_map + .first() + .and_then(|pm| pm.split(':').next()) + .and_then(|port| port.parse::().ok()) + .unwrap_or(DEFAULT_GATEWAY_PORT) +} + +fn pick_gvproxy_ssh_port() -> Result { + let listener = std::net::TcpListener::bind(("127.0.0.1", 0)) + .map_err(|e| VmError::HostSetup(format!("allocate gvproxy ssh port on localhost: {e}")))?; + let port = listener + .local_addr() + .map_err(|e| VmError::HostSetup(format!("read gvproxy ssh port: {e}")))? + .port(); + drop(listener); + Ok(port) +} + +fn path_to_cstring(path: &Path) -> Result { + let s = path + .to_str() + .ok_or_else(|| VmError::InvalidPath(path.display().to_string()))?; + Ok(CString::new(s)?) +} + +// ── Launch ────────────────────────────────────────────────────────────── + +/// Configure and launch a libkrun microVM. +/// +/// This forks the process. The child enters the VM (never returns); the +/// parent blocks until the VM exits or a signal is received. +/// +/// Returns the VM exit code (from `waitpid`). +#[allow(clippy::similar_names)] +pub fn launch(config: &VmConfig) -> Result { + // Auto-extract embedded rootfs if using an instance path and it doesn't exist + if !config.rootfs.is_dir() + && is_instance_rootfs_path(&config.rootfs) + && embedded::has_embedded_rootfs() + { + embedded::extract_rootfs_to(&config.rootfs)?; + } + + // Validate rootfs + if !config.rootfs.is_dir() { + return Err(VmError::RootfsNotFound { + path: config.rootfs.display().to_string(), + }); + } + if config.exec_path == "/srv/openshell-vm-init.sh" { + ensure_vm_not_running(&config.rootfs)?; + } + + // Acquire an exclusive flock on the rootfs lock file. This is held + // by the parent process for the VM's entire lifetime. If this process + // is killed (even SIGKILL), the OS releases the lock automatically. + // This prevents a second launch or rootfs rebuild from corrupting a + // running VM's filesystem via virtio-fs. + let _rootfs_lock = if config.exec_path == "/srv/openshell-vm-init.sh" { + Some(acquire_rootfs_lock(&config.rootfs)?) + } else { + None + }; + + // Check for a corrupt kine (SQLite) database and remove it if the + // header is invalid. Stale bootstrap locks are handled inside the VM + // by the init script (sqlite3 DELETE before k3s starts). This runs on + // every normal boot (not --reset, which wipes k3s/server/ entirely). + // Must happen after the lock so we know no other VM process is using + // the rootfs. + if !config.reset && config.exec_path == "/srv/openshell-vm-init.sh" { + recover_corrupt_kine_db(&config.rootfs)?; + } + + // Wipe stale containerd/kubelet runtime state if requested. + // This must happen after the lock (to confirm no other VM is using + // the rootfs) but before booting (so the new VM starts clean). + if config.reset { + reset_runtime_state(&config.rootfs, &config.gateway_name)?; + } + if config.reset + && let Some(state_disk) = &config.state_disk + && let Err(err) = std::fs::remove_file(&state_disk.path) + && err.kind() != std::io::ErrorKind::NotFound + { + return Err(VmError::HostSetup(format!( + "remove state disk {}: {err}", + state_disk.path.display() + ))); + } + if let Some(state_disk) = &config.state_disk { + ensure_state_disk_image(state_disk)?; + } + + let launch_start = Instant::now(); + eprintln!("rootfs: {}", config.rootfs.display()); + if let Some(state_disk) = &config.state_disk { + eprintln!( + "state disk: {} ({} GiB)", + state_disk.path.display(), + state_disk.size_bytes / 1024 / 1024 / 1024 + ); + } + eprintln!("vm: {} vCPU(s), {} MiB RAM", config.vcpus, config.mem_mib); + + // The runtime is embedded in the binary and extracted on first use. + // Can be overridden via OPENSHELL_VM_RUNTIME_DIR for development. + let runtime_gvproxy = resolve_runtime_bundle()?; + let runtime_dir = runtime_gvproxy.parent().ok_or_else(|| { + VmError::HostSetup(format!( + "runtime bundle file has no parent directory: {}", + runtime_gvproxy.display() + )) + })?; + configure_runtime_loader_env(runtime_dir)?; + raise_nofile_limit(); + + // ── Log runtime provenance ───────────────────────────────────── + // After configuring the loader, trigger library loading so that + // provenance is captured before we proceed with VM configuration. + let _ = ffi::libkrun()?; + log_runtime_provenance(runtime_dir); + + // ── Configure the microVM ────────────────────────────────────── + + let vm = VmContext::create(config.log_level)?; + vm.set_vm_config(config.vcpus, config.mem_mib)?; + vm.set_root(&config.rootfs)?; + if let Some(state_disk) = &config.state_disk { + vm.add_state_disk(state_disk)?; + } + vm.set_workdir(&config.workdir)?; + + // Networking setup — use a drop guard so gvproxy is killed if we + // return early via `?` before reaching the parent's cleanup code. + let mut gvproxy_guard: Option = None; + let mut gvproxy_api_sock: Option = None; + + match &config.net { + NetBackend::Tsi => { + // Default TSI — no special setup needed. + } + NetBackend::None => { + vm.disable_implicit_vsock()?; + vm.add_vsock(0)?; + eprintln!("Networking: disabled (no TSI, no virtio-net)"); + } + NetBackend::Gvproxy { binary } => { + if !binary.exists() { + return Err(VmError::BinaryNotFound { + path: binary.display().to_string(), + hint: "Install Podman Desktop or place gvproxy in PATH".to_string(), + }); + } + + // Create temp socket paths + let run_dir = config + .rootfs + .parent() + .unwrap_or(&config.rootfs) + .to_path_buf(); + let rootfs_key = vm_rootfs_key(&config.rootfs); + let sock_base = gvproxy_socket_dir(&config.rootfs)?; + let net_sock = sock_base.with_extension("v"); + let api_sock = sock_base.with_extension("a"); + + // Kill any stale gvproxy process from a previous run. + // If gvproxy is still holding port 2222, the new instance + // will fail with "bind: address already in use". + kill_stale_gvproxy(&config.rootfs); + + // Clean stale sockets (including the -krun.sock file that + // libkrun creates as its datagram endpoint on macOS). + let _ = std::fs::remove_file(&net_sock); + let _ = std::fs::remove_file(&api_sock); + let krun_sock = sock_base.with_extension("v-krun.sock"); + let _ = std::fs::remove_file(&krun_sock); + + // Start gvproxy + eprintln!("Starting gvproxy: {}", binary.display()); + let ssh_port = pick_gvproxy_ssh_port()?; + let gvproxy_log = run_dir.join(format!("{rootfs_key}-gvproxy.log")); + let gvproxy_log_file = std::fs::File::create(&gvproxy_log) + .map_err(|e| VmError::Fork(format!("failed to create gvproxy log: {e}")))?; + + // On Linux, gvproxy uses QEMU mode (SOCK_STREAM) since the vfkit + // unixgram scheme is macOS/vfkit-specific. On macOS, use vfkit mode. + #[cfg(target_os = "linux")] + let (gvproxy_net_flag, gvproxy_net_url) = + ("-listen-qemu", format!("unix://{}", net_sock.display())); + #[cfg(target_os = "macos")] + let (gvproxy_net_flag, gvproxy_net_url) = ( + "-listen-vfkit", + format!("unixgram://{}", net_sock.display()), + ); + + let child = std::process::Command::new(binary) + .arg(gvproxy_net_flag) + .arg(&gvproxy_net_url) + .arg("-listen") + .arg(format!("unix://{}", api_sock.display())) + .arg("-ssh-port") + .arg(ssh_port.to_string()) + .stdout(std::process::Stdio::null()) + .stderr(gvproxy_log_file) + .spawn() + .map_err(|e| VmError::Fork(format!("failed to start gvproxy: {e}")))?; + + eprintln!( + "gvproxy started (pid {}, ssh port {}) [{:.1}s]", + child.id(), + ssh_port, + launch_start.elapsed().as_secs_f64() + ); + + // Wait for the socket to appear (exponential backoff: 5ms → 100ms). + { + let deadline = Instant::now() + std::time::Duration::from_secs(5); + let mut interval = std::time::Duration::from_millis(5); + while !net_sock.exists() { + if Instant::now() >= deadline { + return Err(VmError::Fork( + "gvproxy socket did not appear within 5s".to_string(), + )); + } + std::thread::sleep(interval); + interval = (interval * 2).min(std::time::Duration::from_millis(100)); + } + } + + // Disable implicit TSI and add virtio-net via gvproxy + vm.disable_implicit_vsock()?; + vm.add_vsock(0)?; + // This MAC matches gvproxy's default static DHCP lease for + // 192.168.127.2. Using a different MAC can cause the gVisor + // network stack to misroute or drop packets. + let mac: [u8; 6] = [0x5a, 0x94, 0xef, 0xe4, 0x0c, 0xee]; + + // COMPAT_NET_FEATURES from libkrun.h + const NET_FEATURE_CSUM: u32 = 1 << 0; + const NET_FEATURE_GUEST_CSUM: u32 = 1 << 1; + const NET_FEATURE_GUEST_TSO4: u32 = 1 << 7; + const NET_FEATURE_GUEST_UFO: u32 = 1 << 10; + const NET_FEATURE_HOST_TSO4: u32 = 1 << 11; + const NET_FEATURE_HOST_UFO: u32 = 1 << 14; + const COMPAT_NET_FEATURES: u32 = NET_FEATURE_CSUM + | NET_FEATURE_GUEST_CSUM + | NET_FEATURE_GUEST_TSO4 + | NET_FEATURE_GUEST_UFO + | NET_FEATURE_HOST_TSO4 + | NET_FEATURE_HOST_UFO; + + // On Linux use unixstream (SOCK_STREAM) to connect to gvproxy's + // QEMU listener. On macOS use unixgram (SOCK_DGRAM) with the vfkit + // magic byte for the vfkit listener. + #[cfg(target_os = "linux")] + vm.add_net_unixstream(&net_sock, &mac, COMPAT_NET_FEATURES)?; + #[cfg(target_os = "macos")] + { + const NET_FLAG_VFKIT: u32 = 1 << 0; + vm.add_net_unixgram(&net_sock, &mac, COMPAT_NET_FEATURES, NET_FLAG_VFKIT)?; + } + + eprintln!( + "Networking: gvproxy (virtio-net) [{:.1}s]", + launch_start.elapsed().as_secs_f64() + ); + gvproxy_guard = Some(GvproxyGuard::new(child)); + gvproxy_api_sock = Some(api_sock); + } + } + + // Port mapping (TSI only) + if !config.port_map.is_empty() && matches!(config.net, NetBackend::Tsi) { + vm.set_port_map(&config.port_map)?; + } + + for vsock_port in &config.vsock_ports { + if let Some(parent) = vsock_port.socket_path.parent() { + std::fs::create_dir_all(parent).map_err(|e| { + VmError::RuntimeState(format!("create vsock socket dir {}: {e}", parent.display())) + })?; + } + // libkrun returns EEXIST if the socket file is already present from a + // previous run. Remove any stale socket before registering the port. + let _ = std::fs::remove_file(&vsock_port.socket_path); + vm.add_vsock_port(vsock_port)?; + } + + // Console output + let console_log = config.console_output.clone().unwrap_or_else(|| { + config + .rootfs + .parent() + .unwrap_or(&config.rootfs) + .join(format!("{}-console.log", vm_rootfs_key(&config.rootfs))) + }); + vm.set_console_output(&console_log)?; + + // envp: use provided env or minimal defaults + let mut env: Vec = if config.env.is_empty() { + vec![ + "HOME=/root", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + ] + .into_iter() + .map(ToOwned::to_owned) + .collect() + } else { + config.env.clone() + }; + if let Some(state_disk) = &config.state_disk + && !env + .iter() + .any(|entry| entry.starts_with("OPENSHELL_VM_STATE_DISK_DEVICE=")) + { + env.push(format!( + "OPENSHELL_VM_STATE_DISK_DEVICE={}", + state_disk.guest_device + )); + } + vm.set_exec(&config.exec_path, &config.args, &env)?; + + // ── Fork and enter the VM ────────────────────────────────────── + // + // krun_start_enter() never returns — it calls exit() when the guest + // process exits. We fork so the parent can monitor and report. + + let boot_start = Instant::now(); + eprintln!("Booting microVM..."); + + let pid = unsafe { libc::fork() }; + match pid { + -1 => Err(VmError::Fork(std::io::Error::last_os_error().to_string())), + 0 => { + // Child process: enter the VM (never returns on success) + let ret = vm.start_enter(); + eprintln!("krun_start_enter failed: {ret}"); + std::process::exit(1); + } + _ => { + // Parent: wait for child + if config.exec_path == "/srv/openshell-vm-init.sh" { + let gvproxy_pid = gvproxy_guard.as_ref().and_then(GvproxyGuard::id); + if let Err(err) = + write_vm_runtime_state(&config.rootfs, pid, &console_log, gvproxy_pid) + { + unsafe { + libc::kill(pid, libc::SIGTERM); + } + // Guard drop will kill gvproxy automatically + drop(gvproxy_guard); + clear_vm_runtime_state(&config.rootfs); + return Err(err); + } + } + eprintln!( + "VM started (child pid {pid}) [{:.1}s]", + boot_start.elapsed().as_secs_f64() + ); + for pm in &config.port_map { + let host_port = pm.split(':').next().unwrap_or(pm); + eprintln!(" port {pm} -> http://localhost:{host_port}"); + } + eprintln!("Console output: {}", console_log.display()); + + // Set up gvproxy port forwarding via its HTTP API. + // The port_map entries use the same "host:guest" format + // as TSI, but here we translate them into gvproxy expose + // calls targeting the guest IP (192.168.127.2). + // + // Instead of a fixed 500ms sleep, poll the API socket with + // exponential backoff (5ms → 200ms, ~1s total budget). + if let Some(ref api_sock) = gvproxy_api_sock { + let fwd_start = Instant::now(); + // Wait for the API socket to appear (it lags slightly + // behind the vfkit data socket). + { + let deadline = Instant::now() + std::time::Duration::from_secs(2); + let mut interval = std::time::Duration::from_millis(5); + while !api_sock.exists() { + if Instant::now() >= deadline { + eprintln!( + "warning: gvproxy API socket not ready after 2s, attempting anyway" + ); + break; + } + std::thread::sleep(interval); + interval = (interval * 2).min(std::time::Duration::from_millis(200)); + } + } + + let guest_ip = "192.168.127.2"; + + for pm in &config.port_map { + let parts: Vec<&str> = pm.split(':').collect(); + let (host_port, guest_port) = match parts.len() { + 2 => (parts[0], parts[1]), + 1 => (parts[0], parts[0]), + _ => { + eprintln!(" skipping invalid port mapping: {pm}"); + continue; + } + }; + + let expose_body = format!( + r#"{{"local":":{host_port}","remote":"{guest_ip}:{guest_port}","protocol":"tcp"}}"# + ); + + match gvproxy_expose(api_sock, &expose_body) { + Ok(()) => { + eprintln!(" port {host_port} -> {guest_ip}:{guest_port}"); + } + Err(e) => { + eprintln!(" port {host_port}: {e}"); + } + } + } + eprintln!( + "Port forwarding ready [{:.1}s]", + fwd_start.elapsed().as_secs_f64() + ); + } + + // Bootstrap the OpenShell control plane and wait for the + // service to be reachable. Only for the gateway preset, and + // only when port forwarding is configured (i.e. the gateway + // is reachable from the host). During rootfs pre-init builds, + // no --port is specified so there is nothing to health-check + // — the build script has its own kubectl-based readiness + // checks inside the VM. + if config.exec_path == "/srv/openshell-vm-init.sh" && !config.port_map.is_empty() { + // Bootstrap stores host-side metadata and mTLS creds. + // With pre-baked rootfs (Path 1) this reads PKI directly + // from virtio-fs — no kubectl or port forwarding needed. + // Cold boot (Path 2) writes secret manifests into the + // k3s auto-deploy directory via virtio-fs. + let gateway_port = gateway_host_port(config); + bootstrap_gateway(&config.rootfs, &config.gateway_name, gateway_port)?; + + // Wait for the gRPC health check to pass. This ensures + // the service is fully operational, not just accepting + // TCP connections. The health check confirms the full + // path (gvproxy → kube-proxy nftables → pod:8080) and + // that the gRPC service is responding to requests. + health::wait_for_gateway_ready(gateway_port, &config.gateway_name)?; + } + + eprintln!("Ready [{:.1}s total]", boot_start.elapsed().as_secs_f64()); + eprintln!("Press Ctrl+C to stop."); + + // Forward signals to child + unsafe { + libc::signal( + libc::SIGINT, + forward_signal as *const () as libc::sighandler_t, + ); + libc::signal( + libc::SIGTERM, + forward_signal as *const () as libc::sighandler_t, + ); + CHILD_PID.store(pid, std::sync::atomic::Ordering::Relaxed); + } + + let mut status: libc::c_int = 0; + unsafe { + libc::waitpid(pid, &raw mut status, 0); + } + + // Clean up gvproxy — disarm the guard and do explicit cleanup + // so we can print the "stopped" message. + if config.exec_path == "/srv/openshell-vm-init.sh" { + clear_vm_runtime_state(&config.rootfs); + } + if let Some(mut guard) = gvproxy_guard + && let Some(mut child) = guard.disarm() + { + let _ = child.kill(); + let _ = child.wait(); + eprintln!("gvproxy stopped"); + } + + if libc::WIFEXITED(status) { + let code = libc::WEXITSTATUS(status); + eprintln!("VM exited with code {code}"); + return Ok(code); + } else if libc::WIFSIGNALED(status) { + let sig = libc::WTERMSIG(status); + eprintln!("VM killed by signal {sig}"); + return Ok(128 + sig); + } + + Ok(status) + } + } +} + +// ── Post-boot bootstrap ──────────────────────────────────────────────── + +/// Default gateway port: host port mapped to the `OpenShell` `NodePort` (30051). +const DEFAULT_GATEWAY_PORT: u16 = 30051; + +/// Bootstrap the `OpenShell` control plane after k3s is ready. +/// +/// Two paths: +/// +/// 1. **Warm boot**: host-side metadata and mTLS certs already exist from a +/// previous run. Fetch PKI via the exec agent to detect cert drift (e.g. +/// after a `--reset`), re-sync if needed, then proceed to the health check. +/// +/// 2. **First boot / post-reset**: poll the exec agent to `cat` each PEM file +/// from `/opt/openshell/pki/` until the files exist (PKI generation has +/// finished), then store them in `~/.config/openshell/gateways//mtls/`. +fn bootstrap_gateway(rootfs: &Path, gateway_name: &str, gateway_port: u16) -> Result<(), VmError> { + let bootstrap_start = Instant::now(); + + let metadata = openshell_bootstrap::GatewayMetadata { + name: gateway_name.to_string(), + gateway_endpoint: format!("https://127.0.0.1:{gateway_port}"), + is_remote: false, + gateway_port, + remote_host: None, + resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + }; + + let exec_socket = vm_exec_socket_path(rootfs); + + // ── Warm boot: host already has certs ────────────────────────── + if is_warm_boot(gateway_name) { + // Always (re-)store metadata so port/endpoint changes are picked up. + openshell_bootstrap::store_gateway_metadata(gateway_name, &metadata) + .map_err(|e| VmError::Bootstrap(format!("failed to store metadata: {e}")))?; + openshell_bootstrap::save_active_gateway(gateway_name) + .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?; + + // Verify host certs match the VM's PKI. If they diverge (e.g. + // PKI was regenerated after a --reset, or the state disk was + // replaced), re-sync the host certs from the VM via the exec agent. + // + // On warm boot the exec agent may not be ready yet (the VM is + // still booting). Use a short timeout — this is a non-critical + // drift check and the host already has valid certs. If the agent + // isn't reachable we skip silently rather than blocking boot for + // 30s. + match fetch_pki_over_exec(&exec_socket, std::time::Duration::from_secs(5)) { + Ok(bundle) => { + if let Err(e) = sync_host_certs_if_stale(gateway_name, &bundle) { + eprintln!("Warning: cert sync check failed: {e}"); + } + } + Err(_) => { + // Expected on warm boot — exec agent not ready yet. + } + } + + eprintln!( + "Warm boot [{:.1}s]", + bootstrap_start.elapsed().as_secs_f64() + ); + eprintln!(" Cluster: {gateway_name}"); + eprintln!(" Gateway: https://127.0.0.1:{gateway_port}"); + eprintln!(" mTLS: ~/.config/openshell/gateways/{gateway_name}/mtls/"); + return Ok(()); + } + + // ── First boot / post-reset: fetch PKI from VM via exec agent ── + // + // The VM init script generates certs on first boot at /opt/openshell/pki/. + // We poll the exec agent with `cat ` for each PEM file until they + // exist, retrying to handle the window between VM boot and PKI generation. + eprintln!("Waiting for VM to generate PKI..."); + let pki_bundle = fetch_pki_over_exec(&exec_socket, std::time::Duration::from_secs(120)) + .map_err(|e| VmError::Bootstrap(format!("VM did not produce PKI within 120s: {e}")))?; + + eprintln!("PKI ready — storing client certs on host..."); + + openshell_bootstrap::store_gateway_metadata(gateway_name, &metadata) + .map_err(|e| VmError::Bootstrap(format!("failed to store metadata: {e}")))?; + + openshell_bootstrap::mtls::store_pki_bundle(gateway_name, &pki_bundle) + .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS creds: {e}")))?; + + openshell_bootstrap::save_active_gateway(gateway_name) + .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?; + + eprintln!( + "Bootstrap complete [{:.1}s]", + bootstrap_start.elapsed().as_secs_f64() + ); + eprintln!(" Cluster: {gateway_name}"); + eprintln!(" Gateway: https://127.0.0.1:{gateway_port}"); + eprintln!(" mTLS: ~/.config/openshell/gateways/{gateway_name}/mtls/"); + + Ok(()) +} + +/// PKI file names and the corresponding [`PkiBundle`] fields. +const PKI_FILES: &[(&str, &str)] = &[ + ("ca.crt", "ca_cert_pem"), + ("ca.key", "ca_key_pem"), + ("server.crt", "server_cert_pem"), + ("server.key", "server_key_pem"), + ("client.crt", "client_cert_pem"), + ("client.key", "client_key_pem"), +]; + +/// Fetch all six PEM files from `/opt/openshell/pki/` inside the guest by +/// running `cat` via the exec agent. Retries until `timeout` elapses, +/// sleeping 500ms between attempts, to handle the window between VM boot +/// and PKI generation completing. +fn fetch_pki_over_exec( + exec_socket: &Path, + timeout: std::time::Duration, +) -> Result { + let deadline = Instant::now() + timeout; + + loop { + match try_read_pki_files(exec_socket) { + Ok(bundle) => return Ok(bundle), + Err(_) if Instant::now() < deadline => { + std::thread::sleep(std::time::Duration::from_millis(500)); + } + Err(e) => { + return Err(VmError::Bootstrap(format!( + "failed to read PKI files via exec agent: {e}" + ))); + } + } + } +} + +/// Attempt to read all six PEM files from the guest in one pass. +fn try_read_pki_files(exec_socket: &Path) -> Result { + let mut pems = std::collections::HashMap::new(); + + for &(filename, _field) in PKI_FILES { + let path = format!("/opt/openshell/pki/{filename}"); + let output = exec_capture(exec_socket, vec!["cat".to_string(), path])?; + let content = String::from_utf8(output).map_err(|e| { + VmError::Bootstrap(format!("PKI file {filename} is not valid UTF-8: {e}")) + })?; + if content.is_empty() { + return Err(VmError::Bootstrap(format!("PKI file {filename} is empty"))); + } + pems.insert(filename, content); + } + + let mut get = |key: &str| -> Result { + pems.remove(key) + .ok_or_else(|| VmError::Bootstrap(format!("PKI file {key} missing from exec output"))) + }; + + Ok(openshell_bootstrap::pki::PkiBundle { + ca_cert_pem: get("ca.crt")?, + ca_key_pem: get("ca.key")?, + server_cert_pem: get("server.crt")?, + server_key_pem: get("server.key")?, + client_cert_pem: get("client.crt")?, + client_key_pem: get("client.key")?, + }) +} + +/// Check whether a previous bootstrap left valid state on disk. +/// +/// A warm boot is detected when both: +/// - Cluster metadata exists: `$XDG_CONFIG_HOME/openshell/gateways/openshell-vm/metadata.json` +/// - mTLS certs exist: `$XDG_CONFIG_HOME/openshell/gateways/openshell-vm/mtls/{ca.crt,tls.crt,tls.key}` +/// +/// When true, the host-side bootstrap (PKI generation, secret manifest writing, +/// metadata storage) can be skipped because the virtio-fs rootfs persists k3s +/// state (TLS certs, kine/SQLite cluster objects, containerd images, helm +/// releases) across VM restarts. The kine database is preserved on normal +/// boots so that pods and other cluster objects survive restarts. +fn is_warm_boot(gateway_name: &str) -> bool { + let Ok(home) = std::env::var("HOME") else { + return false; + }; + + let config_base = + std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config")); + + let config_dir = PathBuf::from(&config_base) + .join("openshell") + .join("gateways"); + + // Check metadata file. + let metadata_path = config_dir.join(gateway_name).join("metadata.json"); + if !metadata_path.is_file() { + return false; + } + + // Check mTLS cert files. + let mtls_dir = config_dir.join(gateway_name).join("mtls"); + for name in &["ca.crt", "tls.crt", "tls.key"] { + let path = mtls_dir.join(name); + match std::fs::metadata(&path) { + Ok(m) if m.is_file() && m.len() > 0 => {} + _ => return false, + } + } + + true +} + +/// Compare the CA cert on the rootfs (authoritative source) against the +/// host-side copy. If they differ, re-copy all client certs from the rootfs. +/// +/// This catches cases where PKI was regenerated (e.g. rootfs rebuilt, +/// manual reset) but host-side certs survived from a previous boot cycle. +fn sync_host_certs_if_stale( + gateway_name: &str, + bundle: &openshell_bootstrap::pki::PkiBundle, +) -> Result<(), VmError> { + let Ok(home) = std::env::var("HOME") else { + return Ok(()); + }; + let config_base = + std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config")); + let host_ca = PathBuf::from(&config_base) + .join("openshell/gateways") + .join(gateway_name) + .join("mtls/ca.crt"); + + let host_ca_contents = std::fs::read_to_string(&host_ca) + .map_err(|e| VmError::Bootstrap(format!("failed to read host ca.crt: {e}")))?; + + if bundle.ca_cert_pem.trim() == host_ca_contents.trim() { + return Ok(()); + } + + eprintln!("Cert drift detected — re-syncing mTLS certs from VM..."); + + openshell_bootstrap::mtls::store_pki_bundle(gateway_name, bundle) + .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS creds: {e}")))?; + + eprintln!(" mTLS certs re-synced from VM"); + Ok(()) +} + +static CHILD_PID: std::sync::atomic::AtomicI32 = std::sync::atomic::AtomicI32::new(0); + +extern "C" fn forward_signal(_sig: libc::c_int) { + let pid = CHILD_PID.load(std::sync::atomic::Ordering::Relaxed); + if pid > 0 { + unsafe { + libc::kill(pid, libc::SIGTERM); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_runtime_dir() -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos(); + std::env::temp_dir().join(format!( + "openshell-vm-runtime-{}-{nanos}", + std::process::id() + )) + } + + fn write_runtime_file(path: &Path) { + fs::write(path, b"test").expect("failed to write runtime file"); + } + + #[test] + fn validate_runtime_dir_accepts_minimal_bundle() { + let dir = temp_runtime_dir(); + fs::create_dir_all(&dir).expect("failed to create runtime dir"); + + write_runtime_file(&dir.join(ffi::required_runtime_lib_name())); + write_runtime_file(&dir.join("libkrunfw.test")); + let gvproxy = dir.join("gvproxy"); + write_runtime_file(&gvproxy); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + + let mut perms = fs::metadata(&gvproxy).expect("stat gvproxy").permissions(); + perms.set_mode(0o755); + fs::set_permissions(&gvproxy, perms).expect("chmod gvproxy"); + } + + validate_runtime_dir(&dir).expect("runtime bundle should validate"); + assert!(gvproxy.exists()); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn validate_runtime_dir_requires_gvproxy() { + let dir = temp_runtime_dir(); + fs::create_dir_all(&dir).expect("failed to create runtime dir"); + + write_runtime_file(&dir.join(ffi::required_runtime_lib_name())); + write_runtime_file(&dir.join("libkrunfw.test")); + + let err = validate_runtime_dir(&dir).expect_err("missing gvproxy should fail"); + match err { + VmError::BinaryNotFound { hint, .. } => { + assert!(hint.contains("missing gvproxy")); + } + other => panic!("unexpected error: {other:?}"), + } + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn gateway_config_uses_default_state_disk_next_to_rootfs() { + let rootfs = PathBuf::from("/tmp/openshell-vm-test/rootfs"); + + let config = VmConfig::gateway(rootfs.clone()); + let state_disk = config + .state_disk + .expect("gateway should enable a state disk"); + + assert_eq!( + state_disk.path, + rootfs.parent().unwrap().join("rootfs-state.raw") + ); + assert_eq!(state_disk.block_id, DEFAULT_STATE_DISK_BLOCK_ID); + assert_eq!(state_disk.guest_device, DEFAULT_STATE_DISK_GUEST_DEVICE); + assert_eq!(state_disk.size_bytes, DEFAULT_STATE_DISK_SIZE_BYTES); + } + + #[test] + fn ensure_state_disk_image_creates_sparse_file() { + let dir = temp_runtime_dir(); + fs::create_dir_all(&dir).expect("failed to create runtime dir"); + + let state_disk = StateDiskConfig { + path: dir.join("state.raw"), + size_bytes: 8 * 1024 * 1024, + block_id: DEFAULT_STATE_DISK_BLOCK_ID.to_string(), + guest_device: DEFAULT_STATE_DISK_GUEST_DEVICE.to_string(), + }; + + ensure_state_disk_image(&state_disk).expect("state disk should be created"); + + let metadata = fs::metadata(&state_disk.path).expect("stat state disk"); + assert_eq!(metadata.len(), state_disk.size_bytes); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn prepare_rootfs_returns_existing_explicit_rootfs() { + let dir = temp_runtime_dir(); + let rootfs = dir.join("rootfs"); + fs::create_dir_all(&rootfs).expect("failed to create rootfs dir"); + + let prepared = + prepare_rootfs(Some(rootfs.clone()), "default", false).expect("prepare rootfs"); + + assert_eq!(prepared, rootfs); + + let _ = fs::remove_dir_all(&dir); + } +} diff --git a/crates/openshell-vm/src/main.rs b/crates/openshell-vm/src/main.rs new file mode 100644 index 000000000..ba7c7d6bc --- /dev/null +++ b/crates/openshell-vm/src/main.rs @@ -0,0 +1,241 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Standalone openshell-vm binary. +//! +//! Boots a libkrun microVM running the `OpenShell` control plane (k3s + +//! openshell-server). Each named instance gets its own rootfs extracted from +//! the embedded tarball at +//! `~/.local/share/openshell/openshell-vm/{version}/instances//rootfs`. +//! +//! # Codesigning (macOS) +//! +//! This binary must be codesigned with the `com.apple.security.hypervisor` +//! entitlement. See `entitlements.plist` in this crate. +//! +//! ```sh +//! codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - target/debug/openshell-vm +//! ``` + +use std::io::IsTerminal; +use std::path::PathBuf; + +use clap::{Parser, Subcommand, ValueHint}; + +const DISABLE_STATE_DISK_ENV: &str = "OPENSHELL_VM_DISABLE_STATE_DISK"; + +/// Boot the `OpenShell` gateway microVM. +/// +/// Starts a libkrun microVM running a k3s Kubernetes cluster with the +/// `OpenShell` control plane. Use `--exec` to run a custom process instead. +#[derive(Parser)] +#[command(name = "openshell-vm", version)] +struct Cli { + #[command(subcommand)] + command: Option, + + /// Path to the rootfs directory (aarch64 Linux). + /// Overrides the default instance-based rootfs resolution. + #[arg(long, value_hint = ValueHint::DirPath)] + rootfs: Option, + + /// Named VM instance. + /// + /// When used alone, the rootfs resolves to + /// `~/.local/share/openshell/openshell-vm/{version}/instances//rootfs` + /// and is extracted from the embedded tarball on first use. + /// When combined with `--rootfs`, only provides the instance identity + /// (for exec, gateway name, etc.) while the rootfs comes from the + /// explicit path. + #[arg(long, default_value = "default")] + name: String, + + /// Executable path inside the VM. When set, runs this instead of + /// the default k3s server. + #[arg(long)] + exec: Option, + + /// Arguments to the executable (requires `--exec`). + #[arg(long, num_args = 1..)] + args: Vec, + + /// Environment variables in `KEY=VALUE` form (requires `--exec`). + #[arg(long, num_args = 1..)] + env: Vec, + + /// Working directory inside the VM. + #[arg(long, default_value = "/")] + workdir: String, + + /// Port mappings (`host_port:guest_port`). + #[arg(long, short, num_args = 1..)] + port: Vec, + + /// Number of virtual CPUs (default: 4 for openshell-vm, 2 for --exec). + #[arg(long)] + vcpus: Option, + + /// RAM in MiB (default: 8192 for openshell-vm, 2048 for --exec). + #[arg(long)] + mem: Option, + + /// libkrun log level (0=Off .. 5=Trace). + #[arg(long, default_value_t = 1)] + krun_log_level: u32, + + /// Networking backend: "gvproxy" (default), "tsi", or "none". + #[arg(long, default_value = "gvproxy")] + net: String, + + /// Wipe all runtime state (containerd, kubelet, k3s) before booting. + /// Use this to recover from a corrupted state after a crash or + /// unclean shutdown. + #[arg(long)] + reset: bool, +} + +#[derive(Subcommand)] +enum GatewayCommand { + /// Ensure the target rootfs exists, extracting the embedded rootfs if needed. + PrepareRootfs { + /// Recreate the target rootfs even if it already exists. + #[arg(long)] + force: bool, + }, + + /// Execute a command inside a running openshell-vm VM. + Exec { + /// Working directory inside the VM. + #[arg(long)] + workdir: Option, + + /// Environment variables in `KEY=VALUE` form. + #[arg(long, num_args = 1..)] + env: Vec, + + /// Command and arguments to run inside the VM. + #[arg(trailing_var_arg = true)] + command: Vec, + }, +} + +fn main() { + tracing_subscriber::fmt::init(); + + let cli = Cli::parse(); + + let code = match run(cli) { + Ok(code) => code, + Err(e) => { + eprintln!("Error: {e}"); + 1 + } + }; + + if code != 0 { + std::process::exit(code); + } +} + +fn run(cli: Cli) -> Result> { + if let Some(GatewayCommand::PrepareRootfs { force }) = &cli.command { + let rootfs = openshell_vm::prepare_rootfs(cli.rootfs.clone(), &cli.name, *force)?; + println!("{}", rootfs.display()); + return Ok(0); + } + + if let Some(GatewayCommand::Exec { + workdir, + env, + mut command, + }) = cli.command + { + let effective_tty = std::io::stdin().is_terminal(); + if command.is_empty() { + if effective_tty { + command.push("sh".to_string()); + } else { + return Err("openshell-vm exec requires a command when stdin is not a TTY".into()); + } + } + return Ok(openshell_vm::exec_running_vm( + openshell_vm::VmExecOptions { + rootfs: Some( + cli.rootfs + .unwrap_or(openshell_vm::named_rootfs_dir(&cli.name)?), + ), + command, + workdir, + env, + tty: effective_tty, + }, + )?); + } + + let net_backend = match cli.net.as_str() { + "tsi" => openshell_vm::NetBackend::Tsi, + "none" => openshell_vm::NetBackend::None, + "gvproxy" => openshell_vm::NetBackend::Gvproxy { + binary: openshell_vm::default_runtime_gvproxy_path(), + }, + other => { + return Err( + format!("unknown --net backend: {other} (expected: gvproxy, tsi, none)").into(), + ); + } + }; + + let rootfs = cli + .rootfs + .map_or_else(|| openshell_vm::ensure_named_rootfs(&cli.name), Ok)?; + + let gateway_name = openshell_vm::gateway_name(&cli.name)?; + + let mut config = if let Some(exec_path) = cli.exec { + openshell_vm::VmConfig { + rootfs, + vcpus: cli.vcpus.unwrap_or(2), + mem_mib: cli.mem.unwrap_or(2048), + exec_path, + args: cli.args, + env: cli.env, + workdir: cli.workdir, + port_map: cli.port, + vsock_ports: vec![], + log_level: cli.krun_log_level, + console_output: None, + net: net_backend, + reset: cli.reset, + gateway_name, + state_disk: None, + } + } else { + let mut c = openshell_vm::VmConfig::gateway(rootfs); + if !cli.port.is_empty() { + c.port_map = cli.port; + } + if let Some(v) = cli.vcpus { + c.vcpus = v; + } + if let Some(m) = cli.mem { + c.mem_mib = m; + } + c.net = net_backend; + c.reset = cli.reset; + c.gateway_name = gateway_name; + if state_disk_disabled() { + c.state_disk = None; + } + c + }; + config.log_level = cli.krun_log_level; + + Ok(openshell_vm::launch(&config)?) +} + +fn state_disk_disabled() -> bool { + matches!( + std::env::var(DISABLE_STATE_DISK_ENV).ok().as_deref(), + Some("1" | "true" | "TRUE" | "yes" | "YES") + ) +} diff --git a/crates/openshell-vm/tests/gateway_integration.rs b/crates/openshell-vm/tests/gateway_integration.rs new file mode 100644 index 000000000..7ababb42f --- /dev/null +++ b/crates/openshell-vm/tests/gateway_integration.rs @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for the standalone `openshell-vm` binary. +//! +//! These tests require: +//! - libkrun installed (e.g. `brew tap slp/krun && brew install libkrun`) +//! - macOS ARM64 with Apple Hypervisor.framework +//! - An `openshell-vm` binary built with an embedded rootfs tarball +//! (for example via `mise run vm:build:embedded`) +//! +//! All tests are `#[ignore]` — run them explicitly: +//! +//! ```sh +//! cargo test -p openshell-vm --test gateway_integration -- --ignored +//! ``` + +#![allow(unsafe_code)] + +use std::net::{SocketAddr, TcpStream}; +use std::process::{Command, Stdio}; +use std::time::{Duration, Instant}; + +/// Path to the built `openshell-vm` binary (resolved by Cargo at compile time). +const GATEWAY: &str = env!("CARGO_BIN_EXE_openshell-vm"); + +// ── Helpers ──────────────────────────────────────────────────────────── + +/// Codesign the binary on macOS so it can access Hypervisor.framework. +fn codesign_if_needed() { + if cfg!(target_os = "macos") { + let entitlements = format!("{}/entitlements.plist", env!("CARGO_MANIFEST_DIR")); + let status = Command::new("codesign") + .args([ + "--entitlements", + &entitlements, + "--force", + "-s", + "-", + GATEWAY, + ]) + .status() + .expect("codesign command failed to execute"); + assert!(status.success(), "failed to codesign openshell-vm binary"); + } +} + +fn assert_runtime_bundle_staged() { + let bundle_dir = std::path::Path::new(GATEWAY) + .parent() + .expect("openshell-vm binary has no parent") + .join("openshell-vm.runtime"); + assert!( + bundle_dir.is_dir(), + "openshell-vm.runtime is missing next to the test binary: {}. Run `mise run vm:bundle-runtime` first.", + bundle_dir.display() + ); +} + +// ── Tests ────────────────────────────────────────────────────────────── + +/// Boot the full `OpenShell` gateway and verify the gRPC service becomes +/// reachable on port 30051. +#[test] +#[ignore] // requires libkrun + rootfs +fn gateway_boots_and_service_becomes_reachable() { + codesign_if_needed(); + assert_runtime_bundle_staged(); + + let mut cmd = Command::new(GATEWAY); + cmd.stdout(Stdio::null()).stderr(Stdio::piped()); + + let mut child = cmd.spawn().expect("failed to start openshell-vm"); + + // Poll for the OpenShell gRPC service. + let addr: SocketAddr = ([127, 0, 0, 1], 30051).into(); + let timeout = Duration::from_secs(180); + let start = Instant::now(); + let mut reachable = false; + + while start.elapsed() < timeout { + if TcpStream::connect_timeout(&addr, Duration::from_secs(1)).is_ok() { + reachable = true; + break; + } + std::thread::sleep(Duration::from_secs(2)); + } + + // Tear down regardless of result. + let _ = unsafe { libc::kill(child.id() as i32, libc::SIGTERM) }; + let _ = child.wait(); + + assert!( + reachable, + "openshell-vm service on port 30051 not reachable within {timeout:?}" + ); +} + +/// Run a trivial command inside the VM via `--exec` and verify it exits +/// successfully, proving the VM boots and can execute guest processes. +#[test] +#[ignore] // requires libkrun + rootfs +fn gateway_exec_runs_guest_command() { + codesign_if_needed(); + assert_runtime_bundle_staged(); + + let mut cmd = Command::new(GATEWAY); + cmd.args(["--exec", "/bin/true"]); + + let output = cmd.output().expect("failed to run openshell-vm --exec"); + + assert!( + output.status.success(), + "openshell-vm --exec /bin/true failed with status {:?}\nstderr: {}", + output.status, + String::from_utf8_lossy(&output.stderr), + ); +} + +/// Boot the VM, then use `openshell-vm exec` against the running instance. +#[test] +#[ignore] // requires libkrun + rootfs +fn gateway_exec_attaches_to_running_vm() { + codesign_if_needed(); + assert_runtime_bundle_staged(); + + let mut boot = Command::new(GATEWAY); + boot.stdout(Stdio::null()).stderr(Stdio::piped()); + let mut child = boot.spawn().expect("failed to start openshell-vm VM"); + + let addr: SocketAddr = ([127, 0, 0, 1], 30051).into(); + let timeout = Duration::from_secs(180); + let start = Instant::now(); + while start.elapsed() < timeout { + if TcpStream::connect_timeout(&addr, Duration::from_secs(1)).is_ok() { + break; + } + std::thread::sleep(Duration::from_secs(2)); + } + + let output = Command::new(GATEWAY) + .args(["exec", "--", "/bin/true"]) + .output() + .expect("failed to run openshell-vm exec"); + + let _ = unsafe { libc::kill(child.id() as i32, libc::SIGTERM) }; + let _ = child.wait(); + + assert!( + output.status.success(), + "openshell-vm exec -- /bin/true failed with status {:?}\nstderr: {}", + output.status, + String::from_utf8_lossy(&output.stderr), + ); +} diff --git a/deploy/docker/Dockerfile.vm-macos b/deploy/docker/Dockerfile.vm-macos new file mode 100644 index 000000000..c033e43e8 --- /dev/null +++ b/deploy/docker/Dockerfile.vm-macos @@ -0,0 +1,125 @@ +# syntax=docker/dockerfile:1.6 + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Cross-compile the openshell-vm binary for macOS aarch64 (Apple Silicon) +# using the osxcross toolchain. +# +# The openshell-vm binary loads libkrun/libkrunfw at runtime via dlopen, so it +# does NOT need Hypervisor.framework headers at build time. Pre-compressed +# runtime artifacts (libkrun, libkrunfw, gvproxy, rootfs) are injected via +# the vm-runtime-compressed build context and embedded into the binary via +# include_bytes!(). +# +# Usage: +# docker buildx build -f deploy/docker/Dockerfile.vm-macos \ +# --build-arg OPENSHELL_CARGO_VERSION=0.6.0 \ +# --build-context vm-runtime-compressed=/path/to/compressed-dir \ +# --output type=local,dest=out/ . + +ARG OSXCROSS_IMAGE=crazymax/osxcross:latest + +FROM ${OSXCROSS_IMAGE} AS osxcross + +FROM python:3.12-slim AS builder + +ARG CARGO_TARGET_CACHE_SCOPE=default + +ENV PATH="/root/.cargo/bin:/usr/local/bin:/osxcross/bin:${PATH}" +ENV LD_LIBRARY_PATH="/osxcross/lib" + +COPY --from=osxcross /osxcross /osxcross + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + clang \ + cmake \ + curl \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.88.0 + +RUN rustup target add aarch64-apple-darwin + +WORKDIR /build + +ENV CC_aarch64_apple_darwin=oa64-clang +ENV CXX_aarch64_apple_darwin=oa64-clang++ +ENV AR_aarch64_apple_darwin=aarch64-apple-darwin25.1-ar +ENV CARGO_TARGET_AARCH64_APPLE_DARWIN_LINKER=oa64-clang +ENV CARGO_TARGET_AARCH64_APPLE_DARWIN_AR=aarch64-apple-darwin25.1-ar + +# aws-lc-sys workaround (in case it ends up in the dep tree via feature unification) +RUN ln -sf /osxcross/bin/arm64-apple-darwin25.1-ld /usr/local/bin/arm64-apple-macosx-ld + +# --------------------------------------------------------------------------- +# Stage 1: dependency caching — copy only manifests, create dummy sources, +# build dependencies. This layer is cached unless Cargo.toml/lock changes. +# --------------------------------------------------------------------------- +COPY Cargo.toml Cargo.lock ./ +COPY crates/openshell-vm/Cargo.toml crates/openshell-vm/Cargo.toml +COPY crates/openshell-vm/build.rs crates/openshell-vm/build.rs +COPY crates/openshell-core/Cargo.toml crates/openshell-core/Cargo.toml +COPY crates/openshell-core/build.rs crates/openshell-core/build.rs +COPY crates/openshell-bootstrap/Cargo.toml crates/openshell-bootstrap/Cargo.toml +COPY crates/openshell-policy/Cargo.toml crates/openshell-policy/Cargo.toml +COPY proto/ proto/ + +# Scope workspace to VM crates only. +RUN sed -i 's|members = \["crates/\*"\]|members = ["crates/openshell-vm", "crates/openshell-core", "crates/openshell-bootstrap", "crates/openshell-policy"]|' Cargo.toml + +RUN mkdir -p crates/openshell-vm/src \ + crates/openshell-core/src \ + crates/openshell-bootstrap/src \ + crates/openshell-policy/src && \ + echo "fn main() {}" > crates/openshell-vm/src/main.rs && \ + touch crates/openshell-vm/src/lib.rs && \ + touch crates/openshell-core/src/lib.rs && \ + touch crates/openshell-bootstrap/src/lib.rs && \ + touch crates/openshell-policy/src/lib.rs + +# Build deps only (cached layer). +RUN --mount=type=cache,id=cargo-registry-vm-macos,sharing=locked,target=/root/.cargo/registry \ + --mount=type=cache,id=cargo-git-vm-macos,sharing=locked,target=/root/.cargo/git \ + --mount=type=cache,id=cargo-target-vm-macos-${CARGO_TARGET_CACHE_SCOPE},sharing=locked,target=/build/target \ + cargo build --release --target aarch64-apple-darwin -p openshell-vm 2>/dev/null || true + +# --------------------------------------------------------------------------- +# Stage 2: real build with compressed runtime artifacts +# --------------------------------------------------------------------------- +COPY crates/ crates/ + +# Copy compressed VM runtime artifacts for embedding. +# These are passed in via --build-context vm-runtime-compressed=... +COPY --from=vm-runtime-compressed / /build/vm-runtime-compressed/ + +# Touch source files to ensure they're rebuilt (not the cached dummy). +RUN touch crates/openshell-vm/src/main.rs \ + crates/openshell-vm/src/lib.rs \ + crates/openshell-vm/build.rs \ + crates/openshell-bootstrap/src/lib.rs \ + crates/openshell-core/src/lib.rs \ + crates/openshell-core/build.rs \ + crates/openshell-policy/src/lib.rs \ + proto/*.proto + +# Declare version ARGs here (not earlier) so the git-hash-bearing values do not +# invalidate the expensive dependency-build layers above on every commit. +ARG OPENSHELL_CARGO_VERSION +ARG OPENSHELL_IMAGE_TAG +RUN --mount=type=cache,id=cargo-registry-vm-macos,sharing=locked,target=/root/.cargo/registry \ + --mount=type=cache,id=cargo-git-vm-macos,sharing=locked,target=/root/.cargo/git \ + --mount=type=cache,id=cargo-target-vm-macos-${CARGO_TARGET_CACHE_SCOPE},sharing=locked,target=/build/target \ + if [ -n "${OPENSHELL_CARGO_VERSION:-}" ]; then \ + sed -i -E '/^\[workspace\.package\]/,/^\[/{s/^version[[:space:]]*=[[:space:]]*".*"/version = "'"${OPENSHELL_CARGO_VERSION}"'"/}' Cargo.toml; \ + fi && \ + OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=/build/vm-runtime-compressed \ + OPENSHELL_IMAGE_TAG="${OPENSHELL_IMAGE_TAG:-dev}" \ + cargo build --release --target aarch64-apple-darwin -p openshell-vm && \ + cp target/aarch64-apple-darwin/release/openshell-vm /openshell-vm + +FROM scratch AS binary +COPY --from=builder /openshell-vm /openshell-vm diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh index 14f13ecb0..b045bf222 100644 --- a/deploy/docker/cluster-entrypoint.sh +++ b/deploy/docker/cluster-entrypoint.sh @@ -461,9 +461,18 @@ if [ -n "${IMAGE_TAG:-}" ] && [ -f "$HELMCHART" ]; then sed -i -E "s|tag:[[:space:]]*\"?latest\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART" fi -if [ -n "${IMAGE_PULL_POLICY:-}" ] && [ -f "$HELMCHART" ]; then - echo "Overriding image pull policy to: ${IMAGE_PULL_POLICY}" - sed -i "s|pullPolicy: Always|pullPolicy: ${IMAGE_PULL_POLICY}|" "$HELMCHART" +if [ -f "$HELMCHART" ]; then + IMAGE_PULL_POLICY_VALUE="${IMAGE_PULL_POLICY:-Always}" + if [ -n "${IMAGE_PULL_POLICY:-}" ]; then + echo "Overriding image pull policy to: ${IMAGE_PULL_POLICY}" + fi + sed -i "s|__IMAGE_PULL_POLICY__|${IMAGE_PULL_POLICY_VALUE}|g" "$HELMCHART" + + SANDBOX_IMAGE_PULL_POLICY_VALUE="${SANDBOX_IMAGE_PULL_POLICY:-\"\"}" + sed -i "s|__SANDBOX_IMAGE_PULL_POLICY__|${SANDBOX_IMAGE_PULL_POLICY_VALUE}|g" "$HELMCHART" + + DB_URL_VALUE="${DB_URL:-\"sqlite:/var/openshell/openshell.db\"}" + sed -i "s|__DB_URL__|${DB_URL_VALUE}|g" "$HELMCHART" fi # SSH handshake secret: previously generated here and injected via sed into the diff --git a/deploy/kube/manifests/openshell-helmchart.yaml b/deploy/kube/manifests/openshell-helmchart.yaml index ae22ddc6a..a09e0f300 100644 --- a/deploy/kube/manifests/openshell-helmchart.yaml +++ b/deploy/kube/manifests/openshell-helmchart.yaml @@ -27,9 +27,11 @@ spec: image: repository: ghcr.io/nvidia/openshell/gateway tag: latest - pullPolicy: Always + pullPolicy: __IMAGE_PULL_POLICY__ server: sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest + sandboxImagePullPolicy: __SANDBOX_IMAGE_PULL_POLICY__ + dbUrl: __DB_URL__ sshGatewayHost: __SSH_GATEWAY_HOST__ sshGatewayPort: __SSH_GATEWAY_PORT__ grpcEndpoint: "https://openshell.openshell.svc.cluster.local:8080" diff --git a/e2e/rust/e2e-vm.sh b/e2e/rust/e2e-vm.sh new file mode 100755 index 000000000..5fd055036 --- /dev/null +++ b/e2e/rust/e2e-vm.sh @@ -0,0 +1,246 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Run the Rust e2e smoke test against an openshell-vm gateway. +# +# Usage: +# mise run e2e:vm # start new named VM on random port +# mise run e2e:vm -- --vm-port=30051 # reuse existing VM on port 30051 +# mise run e2e:vm -- --vm-port=30051 --vm-name=my-vm # reuse existing named VM and run exec check +# +# Options: +# --vm-port=PORT Skip VM startup and test against this port. +# --vm-name=NAME VM instance name. Auto-generated for fresh VMs. +# +# When --vm-port is omitted: +# 1. Picks a random free host port +# 2. Starts the VM with --name --port :30051 +# 3. Waits for the VM to fully bootstrap (mTLS certs + gRPC health) +# 4. Verifies `openshell-vm exec` works +# 5. Runs the Rust smoke test +# 6. Tears down the VM +# +# When --vm-port is given the script assumes the VM is already running +# on that port and runs the smoke test. The VM exec check runs only when +# --vm-name is provided (so the script can target the correct instance). +# +# Prerequisites (when starting a new VM): `mise run vm:build` must already +# be done (the e2e:vm mise task handles this via depends). + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +RUNTIME_DIR="${ROOT}/target/debug/openshell-vm.runtime" +GATEWAY_BIN="${ROOT}/target/debug/openshell-vm" +VM_GATEWAY_IMAGE="${IMAGE_REPO_BASE:-openshell}/gateway:${IMAGE_TAG:-dev}" +VM_GATEWAY_TAR_REL="var/lib/rancher/k3s/agent/images/openshell-server.tar.zst" +GUEST_PORT=30051 +TIMEOUT=180 + +named_vm_rootfs() { + local vm_version + + vm_version=$("${GATEWAY_BIN}" --version | awk '{print $2}') + printf '%s\n' "${XDG_DATA_HOME:-${HOME}/.local/share}/openshell/openshell-vm/${vm_version}/instances/${VM_NAME}/rootfs" +} + +vm_exec() { + local rootfs_args=() + if [ -n "${VM_ROOTFS_DIR:-}" ]; then + rootfs_args=(--rootfs "${VM_ROOTFS_DIR}") + fi + "${GATEWAY_BIN}" "${rootfs_args[@]}" --name "${VM_NAME}" exec -- "$@" +} + +prepare_named_vm_rootfs() { + if [ -z "${VM_NAME}" ]; then + return 0 + fi + + echo "Preparing named VM rootfs '${VM_NAME}'..." + VM_ROOTFS_DIR="$("${ROOT}/tasks/scripts/vm/ensure-vm-rootfs.sh" --name "${VM_NAME}" \ + | tail -n 1 | sed 's/^using openshell-vm rootfs at //')" + "${ROOT}/tasks/scripts/vm/sync-vm-rootfs.sh" --name "${VM_NAME}" +} + +refresh_vm_gateway() { + if [ -z "${VM_NAME}" ]; then + return 0 + fi + + echo "Refreshing VM gateway StatefulSet image to ${VM_GATEWAY_IMAGE}..." + # Re-import the host-synced :dev image into the VM's containerd, then + # force a rollout when the StatefulSet already points at the same tag. + vm_exec sh -lc "set -eu; \ + image_tar='/${VM_GATEWAY_TAR_REL}'; \ + k3s ctr -n k8s.io images import \"\${image_tar}\" >/dev/null; \ + current_image=\$(kubectl -n openshell get statefulset/openshell -o jsonpath='{.spec.template.spec.containers[?(@.name==\"openshell\")].image}'); \ + if [ \"\${current_image}\" = \"${VM_GATEWAY_IMAGE}\" ]; then \ + kubectl -n openshell rollout restart statefulset/openshell >/dev/null; \ + else \ + kubectl -n openshell set image statefulset/openshell openshell=${VM_GATEWAY_IMAGE} >/dev/null; \ + fi; \ + kubectl -n openshell rollout status statefulset/openshell --timeout=300s" + echo "Gateway rollout complete." +} + +wait_for_gateway_health() { + local elapsed=0 timeout=60 consecutive_ok=0 + + echo "Waiting for refreshed gateway health..." + while [ "${elapsed}" -lt "${timeout}" ]; do + if "${ROOT}/target/debug/openshell" status >/dev/null 2>&1; then + consecutive_ok=$((consecutive_ok + 1)) + if [ "${consecutive_ok}" -ge 3 ]; then + echo "Gateway health confirmed after refresh." + return 0 + fi + else + consecutive_ok=0 + fi + + sleep 2 + elapsed=$((elapsed + 2)) + done + + echo "ERROR: refreshed gateway did not become healthy after ${timeout}s" + return 1 +} + +# ── Parse arguments ────────────────────────────────────────────────── +VM_PORT="" +VM_NAME="" +VM_ROOTFS_DIR="" +for arg in "$@"; do + case "$arg" in + --vm-port=*) VM_PORT="${arg#--vm-port=}" ;; + --vm-name=*) VM_NAME="${arg#--vm-name=}" ;; + *) echo "Unknown argument: $arg"; exit 1 ;; + esac +done + +# ── Determine mode ─────────────────────────────────────────────────── +if [ -n "${VM_PORT}" ]; then + # Point at an already-running VM. + HOST_PORT="${VM_PORT}" + echo "Using existing VM on port ${HOST_PORT}." + if [ -n "${VM_NAME}" ]; then + prepare_named_vm_rootfs + fi +else + # Pick a random free port and start a new VM. + HOST_PORT=$(python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()') + if [ -z "${VM_NAME}" ]; then + VM_NAME="e2e-${HOST_PORT}-$$" + fi + + cleanup() { + local exit_code=$? + if [ -n "${VM_PID:-}" ] && kill -0 "$VM_PID" 2>/dev/null; then + echo "Stopping openshell-vm (pid ${VM_PID})..." + kill "$VM_PID" 2>/dev/null || true + wait "$VM_PID" 2>/dev/null || true + fi + # On failure, preserve the VM console log for post-mortem debugging. + if [ "$exit_code" -ne 0 ] && [ -n "${VM_NAME:-}" ]; then + local console_log + console_log="$(named_vm_rootfs)-console.log" + if [ -f "$console_log" ]; then + echo "=== VM console log (preserved for debugging) ===" + cat "$console_log" + echo "=== end VM console log ===" + fi + fi + rm -f "${VM_LOG:-}" 2>/dev/null || true + if [ -n "${VM_NAME:-}" ]; then + rm -rf "$(dirname "$(named_vm_rootfs)")" 2>/dev/null || true + fi + } + trap cleanup EXIT + + prepare_named_vm_rootfs + + echo "Starting openshell-vm '${VM_NAME}' on port ${HOST_PORT}..." + if [ "$(uname -s)" = "Darwin" ]; then + export DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}" + fi + + VM_LOG=$(mktemp /tmp/openshell-vm-e2e.XXXXXX) + rootfs_args=() + if [ -n "${VM_ROOTFS_DIR}" ]; then + rootfs_args=(--rootfs "${VM_ROOTFS_DIR}") + fi + "${GATEWAY_BIN}" "${rootfs_args[@]}" --name "${VM_NAME}" --port "${HOST_PORT}:${GUEST_PORT}" 2>"${VM_LOG}" & + VM_PID=$! + + # ── Wait for full bootstrap (mTLS certs + gRPC health) ───────────── + # The VM prints "Ready [Xs total]" to stderr after bootstrap_gateway() + # stores mTLS certs and wait_for_gateway_ready() confirms the gRPC + # service is responding. Waiting only for TCP port reachability (nc -z) + # is insufficient because port forwarding is established before the + # mTLS certs are written, causing `openshell status` to fail. + echo "Waiting for VM bootstrap to complete (timeout ${TIMEOUT}s)..." + elapsed=0 + while ! grep -q "^Ready " "${VM_LOG}" 2>/dev/null; do + if ! kill -0 "$VM_PID" 2>/dev/null; then + echo "ERROR: openshell-vm exited before becoming ready" + echo "VM log:" + cat "${VM_LOG}" + exit 1 + fi + if [ "$elapsed" -ge "$TIMEOUT" ]; then + echo "ERROR: openshell-vm did not become ready after ${TIMEOUT}s" + echo "VM log:" + cat "${VM_LOG}" + exit 1 + fi + sleep 2 + elapsed=$((elapsed + 2)) + done + echo "Gateway is ready (${elapsed}s)." + echo "VM log:" + cat "${VM_LOG}" +fi + +# ── Exec into the VM (when instance name is known) ─────────────────── +if [ -n "${VM_NAME}" ]; then + echo "Verifying openshell-vm exec for '${VM_NAME}'..." + exec_elapsed=0 + exec_timeout=60 + until vm_exec /bin/true; do + if [ "$exec_elapsed" -ge "$exec_timeout" ]; then + echo "ERROR: openshell-vm exec did not become ready after ${exec_timeout}s" + exit 1 + fi + sleep 2 + exec_elapsed=$((exec_elapsed + 2)) + done + echo "VM exec succeeded." +else + echo "Skipping openshell-vm exec check (provide --vm-name for existing VMs)." +fi + +refresh_vm_gateway + +# ── Run the smoke test ─────────────────────────────────────────────── +# The openshell CLI reads OPENSHELL_GATEWAY_ENDPOINT to connect to the +# gateway directly, and OPENSHELL_GATEWAY to resolve mTLS certs from +# ~/.config/openshell/gateways//mtls/. +# In the VM, the overlayfs snapshotter re-extracts all image layers on +# every boot. The 1GB sandbox base image extraction can take >300s +# under contention, so allow 600s for sandbox provisioning. +export OPENSHELL_PROVISION_TIMEOUT=600 +export OPENSHELL_GATEWAY_ENDPOINT="https://127.0.0.1:${HOST_PORT}" +if [ -n "${VM_NAME}" ]; then + export OPENSHELL_GATEWAY="openshell-vm-${VM_NAME}" +else + export OPENSHELL_GATEWAY="openshell-vm" +fi + +echo "Running e2e smoke test (gateway: ${OPENSHELL_GATEWAY}, endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})..." +cargo build -p openshell-cli --features openshell-core/dev-settings +wait_for_gateway_health +cargo test --manifest-path e2e/rust/Cargo.toml --features e2e --test smoke -- --nocapture + +echo "Smoke test passed." diff --git a/e2e/rust/src/harness/sandbox.rs b/e2e/rust/src/harness/sandbox.rs index 7a942265d..3a9601a83 100644 --- a/e2e/rust/src/harness/sandbox.rs +++ b/e2e/rust/src/harness/sandbox.rs @@ -25,7 +25,11 @@ fn extract_sandbox_name(output: &str) -> Option { } /// Default timeout for waiting for a sandbox to become ready. -const SANDBOX_READY_TIMEOUT: Duration = Duration::from_secs(300); +/// In VM mode, the overlayfs snapshotter re-extracts all image layers +/// from the content store on every boot (~250s for the 1GB sandbox +/// base image), so 600s accommodates extraction + workspace-init + pod +/// startup. +const SANDBOX_READY_TIMEOUT: Duration = Duration::from_secs(600); /// RAII guard that deletes a sandbox on drop. /// diff --git a/e2e/rust/tests/smoke.rs b/e2e/rust/tests/smoke.rs new file mode 100644 index 000000000..c380efc8c --- /dev/null +++ b/e2e/rust/tests/smoke.rs @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![cfg(feature = "e2e")] + +//! Smoke test: verify the gateway is healthy, create a sandbox, exec a +//! command inside it, and tear it down. +//! +//! This test is cluster-agnostic — it works against any running gateway +//! (Docker-based cluster or openshell-vm microVM). The `e2e:vm` mise +//! task uses it to validate the VM gateway after boot. + +use std::process::Stdio; +use std::time::Duration; + +use openshell_e2e::harness::binary::openshell_cmd; +use openshell_e2e::harness::output::strip_ansi; +use openshell_e2e::harness::sandbox::SandboxGuard; + +/// End-to-end smoke test: status → create → exec → list → delete. +#[tokio::test] +async fn gateway_smoke() { + // ── 1. Gateway must be reachable ────────────────────────────────── + let mut clean_status = String::new(); + let mut status_ok = false; + for _ in 0..15 { + let mut status_cmd = openshell_cmd(); + status_cmd + .arg("status") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let status_out = status_cmd + .output() + .await + .expect("failed to run openshell status"); + + let status_text = format!( + "{}{}", + String::from_utf8_lossy(&status_out.stdout), + String::from_utf8_lossy(&status_out.stderr), + ); + clean_status = strip_ansi(&status_text); + + if status_out.status.success() && clean_status.contains("Connected") { + status_ok = true; + break; + } + + tokio::time::sleep(Duration::from_secs(2)).await; + } + + assert!( + status_ok, + "openshell status never became healthy:\n{clean_status}", + ); + + // ── 2. Create a sandbox and exec a command ─────────────────────── + // Default behaviour keeps the sandbox alive after the command exits, + // so we can verify it in the list before cleaning up. + let mut sb = SandboxGuard::create(&["--", "echo", "smoke-ok"]) + .await + .expect("sandbox create should succeed"); + + assert!( + sb.create_output.contains("smoke-ok"), + "expected 'smoke-ok' in sandbox output:\n{}", + sb.create_output, + ); + + // ── 3. Verify the sandbox appeared in the list ─────────────────── + let mut list_cmd = openshell_cmd(); + list_cmd + .args(["sandbox", "list", "--names"]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let list_out = list_cmd + .output() + .await + .expect("failed to run openshell sandbox list"); + + let list_text = strip_ansi(&format!( + "{}{}", + String::from_utf8_lossy(&list_out.stdout), + String::from_utf8_lossy(&list_out.stderr), + )); + + assert!( + list_text.contains(&sb.name), + "sandbox '{}' should appear in list output:\n{list_text}", + sb.name, + ); + + // ── 4. Cleanup ─────────────────────────────────────────────────── + sb.cleanup().await; +} diff --git a/pyproject.toml b/pyproject.toml index 60d5177d5..899885929 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dev = [ "maturin>=1.5,<2.0", "setuptools-scm>=8", "grpcio-tools>=1.60", + "pyelftools>=0.30", ] docs = [ "sphinx<=7.5", diff --git a/scripts/bin/openshell-vm b/scripts/bin/openshell-vm new file mode 100755 index 000000000..6513219eb --- /dev/null +++ b/scripts/bin/openshell-vm @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +BINARY="$PROJECT_ROOT/target/debug/openshell-vm" + +cargo build --package openshell-vm --bin openshell-vm --quiet + +# On macOS, codesign with the hypervisor entitlement so libkrun can use +# Apple's Hypervisor.framework. Re-sign after every build. +ENTITLEMENTS="$PROJECT_ROOT/crates/openshell-vm/entitlements.plist" +if [[ "$(uname)" == "Darwin" ]] && [[ -f "$ENTITLEMENTS" ]]; then + codesign --entitlements "$ENTITLEMENTS" --force -s - "$BINARY" 2>/dev/null +fi +exec "$BINARY" "$@" diff --git a/tasks/scripts/vm/_lib.sh b/tasks/scripts/vm/_lib.sh new file mode 100755 index 000000000..b925492a3 --- /dev/null +++ b/tasks/scripts/vm/_lib.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Shared helpers for openshell-vm build scripts. +# Source this file from other scripts: +# source "$(dirname "${BASH_SOURCE[0]}")/_lib.sh" + +# ── Root directory ────────────────────────────────────────────────────── + +vm_lib_root() { + cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd +} + +# ── Platform detection ────────────────────────────────────────────────── + +# Detect the current platform and echo one of: +# darwin-aarch64, linux-aarch64, linux-x86_64 +# Exits with error on unsupported platforms. +detect_platform() { + case "$(uname -s)-$(uname -m)" in + Darwin-arm64) echo "darwin-aarch64" ;; + Linux-aarch64) echo "linux-aarch64" ;; + Linux-x86_64) echo "linux-x86_64" ;; + *) + echo "Error: Unsupported platform: $(uname -s)-$(uname -m)" >&2 + echo "Supported: macOS ARM64, Linux ARM64, Linux x86_64" >&2 + return 1 + ;; + esac +} + +# ── Compression helpers ───────────────────────────────────────────────── + +# Compress a single file with zstd level 19, reporting sizes. +# Usage: compress_file +compress_file() { + local input="$1" + local output="$2" + local name + name="$(basename "$input")" + local original_size + original_size="$(du -h "$input" | cut -f1)" + + zstd -19 -f -q -T0 -o "$output" "$input" + chmod 644 "$output" + + local compressed_size + compressed_size="$(du -h "$output" | cut -f1)" + echo " ${name}: ${original_size} -> ${compressed_size}" +} + +# Compress all files in a directory (skipping provenance.json) into an +# output directory, appending .zst to each filename. +# Usage: compress_dir +compress_dir() { + local source_dir="$1" + local output_dir="$2" + + echo "==> Compressing with zstd (level 19)..." + for file in "$source_dir"/*; do + [ -f "$file" ] || continue + local name + name="$(basename "$file")" + # Skip metadata files — not embedded + if [ "$name" = "provenance.json" ]; then + cp "$file" "${output_dir}/" + continue + fi + compress_file "$file" "${output_dir}/${name}.zst" + done +} diff --git a/tasks/scripts/vm/build-libkrun-macos.sh b/tasks/scripts/vm/build-libkrun-macos.sh new file mode 100755 index 000000000..e203c8724 --- /dev/null +++ b/tasks/scripts/vm/build-libkrun-macos.sh @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build libkrun from source on macOS with portable rpath. +# +# This script builds libkrun WITHOUT GPU support (no virglrenderer/libepoxy/MoltenVK +# dependencies), making the resulting binary fully portable and self-contained. +# +# For openshell-vm, we run headless k3s clusters, so GPU passthrough is not needed. +# +# Prerequisites: +# - macOS ARM64 (Apple Silicon) +# - Xcode Command Line Tools +# - Homebrew: brew install rust lld dtc xz libkrunfw +# +# Usage: +# ./build-libkrun-macos.sh +# +# Output: +# target/libkrun-build/libkrun.dylib - portable dylib with @loader_path rpath + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +BUILD_DIR="${ROOT}/target/libkrun-build" +OUTPUT_DIR="${BUILD_DIR}" +BREW_PREFIX="$(brew --prefix 2>/dev/null || echo /opt/homebrew)" +CUSTOM_RUNTIME="${ROOT}/target/custom-runtime" + +if [ "$(uname -s)" != "Darwin" ]; then + echo "Error: This script only runs on macOS" >&2 + exit 1 +fi + +if [ "$(uname -m)" != "arm64" ]; then + echo "Error: libkrun on macOS only supports ARM64 (Apple Silicon)" >&2 + exit 1 +fi + +ARCH="$(uname -m)" +echo "==> Building libkrun for macOS ${ARCH} (no GPU support)" +echo " Build directory: ${BUILD_DIR}" +echo "" + +# ── Check dependencies ────────────────────────────────────────────────── + +check_deps() { + echo "==> Checking build dependencies..." + + MISSING="" + + # Check for Rust + if ! command -v cargo &>/dev/null; then + MISSING="$MISSING rust" + fi + + # Check for lld (LLVM linker) + if ! command -v ld.lld &>/dev/null && ! [ -x "${BREW_PREFIX}/opt/llvm/bin/ld.lld" ]; then + MISSING="$MISSING lld" + fi + + # Check for dtc (device tree compiler) + if ! command -v dtc &>/dev/null; then + MISSING="$MISSING dtc" + fi + + # Check for libkrunfw + if [ ! -f "${BREW_PREFIX}/lib/libkrunfw.dylib" ] && \ + [ ! -f "${BREW_PREFIX}/lib/libkrunfw.5.dylib" ] && \ + [ ! -f "${CUSTOM_RUNTIME}/libkrunfw.dylib" ]; then + MISSING="$MISSING libkrunfw" + fi + + if [ -n "$MISSING" ]; then + echo "Error: Missing dependencies:$MISSING" >&2 + echo "" >&2 + echo "Install with: brew install$MISSING" >&2 + exit 1 + fi + + echo " All dependencies found" +} + +check_deps + +# ── Setup build directory ─────────────────────────────────────────────── + +mkdir -p "$BUILD_DIR" +cd "$BUILD_DIR" + +# ── Clone libkrun ─────────────────────────────────────────────────────── + +LIBKRUN_REF="${LIBKRUN_REF:-e5922f6}" + +if [ ! -d libkrun ]; then + echo "==> Cloning libkrun..." + git clone https://github.com/containers/libkrun.git +fi + +echo "==> Checking out ${LIBKRUN_REF}..." +cd libkrun +git fetch origin --tags +git checkout "${LIBKRUN_REF}" 2>/dev/null || git checkout "tags/${LIBKRUN_REF}" 2>/dev/null || { + echo "Error: Could not checkout ${LIBKRUN_REF}" >&2 + exit 1 +} +cd .. + +LIBKRUN_COMMIT=$(git -C libkrun rev-parse HEAD) +echo " Commit: ${LIBKRUN_COMMIT}" + +cd libkrun + +# ── Build libkrun ─────────────────────────────────────────────────────── + +echo "" +echo "==> Building libkrun with NET=1 BLK=1 (no GPU)..." + +# Find libkrunfw - prefer custom build with bridge support +if [ -f "${CUSTOM_RUNTIME}/provenance.json" ] && [ -f "${CUSTOM_RUNTIME}/libkrunfw.dylib" ]; then + LIBKRUNFW_DIR="${CUSTOM_RUNTIME}" + echo " Using custom libkrunfw from ${LIBKRUNFW_DIR}" +else + LIBKRUNFW_DIR="${BREW_PREFIX}/lib" + echo " Using Homebrew libkrunfw from ${LIBKRUNFW_DIR}" +fi + +# Set library search paths for build +export LIBRARY_PATH="${LIBKRUNFW_DIR}:${BREW_PREFIX}/lib:${LIBRARY_PATH:-}" +export DYLD_LIBRARY_PATH="${LIBKRUNFW_DIR}:${BREW_PREFIX}/lib:${DYLD_LIBRARY_PATH:-}" + +# Set up LLVM/clang for bindgen (required by krun_display/krun_input if they get compiled) +# Note: DYLD_LIBRARY_PATH is needed at runtime for the build scripts that use libclang +LLVM_PREFIX="${BREW_PREFIX}/opt/llvm" +if [ -d "$LLVM_PREFIX" ]; then + export LIBCLANG_PATH="${LLVM_PREFIX}/lib" + export DYLD_LIBRARY_PATH="${LLVM_PREFIX}/lib:${DYLD_LIBRARY_PATH:-}" +fi + +# Build with BLK and NET features only (no GPU) +# This avoids the virglrenderer → libepoxy → MoltenVK dependency chain +make clean 2>/dev/null || true +make BLK=1 NET=1 -j"$(sysctl -n hw.ncpu)" + +# ── Rewrite dylib paths for portability ───────────────────────────────── + +echo "" +echo "==> Making dylib portable with @loader_path..." + +DYLIB="target/release/libkrun.dylib" +if [ ! -f "$DYLIB" ]; then + echo "Error: Build did not produce $DYLIB" >&2 + exit 1 +fi + +# Copy to output +cp "$DYLIB" "${OUTPUT_DIR}/libkrun.dylib" +DYLIB="${OUTPUT_DIR}/libkrun.dylib" + +# Show current dependencies +echo " Original dependencies:" +otool -L "$DYLIB" | grep -v "^/" | sed 's/^/ /' + +# Rewrite the install name to use @loader_path (makes it relocatable) +install_name_tool -id "@loader_path/libkrun.dylib" "$DYLIB" + +# Rewrite libkrunfw path to @loader_path (will be bundled alongside) +# Find what libkrunfw path is currently referenced +# Note: grep may not find anything (libkrunfw is loaded via dlopen), so we use || true +KRUNFW_PATH=$(otool -L "$DYLIB" | grep libkrunfw | awk '{print $1}' || true) +if [ -n "$KRUNFW_PATH" ]; then + install_name_tool -change "$KRUNFW_PATH" "@loader_path/libkrunfw.dylib" "$DYLIB" + echo " Rewrote: $KRUNFW_PATH → @loader_path/libkrunfw.dylib" +fi + +# Re-codesign after modifications (required on macOS) +codesign -f -s - "$DYLIB" + +# Show final dependencies +echo "" +echo " Final dependencies:" +otool -L "$DYLIB" | grep -v "^/" | sed 's/^/ /' + +# Verify no hardcoded homebrew paths remain +if otool -L "$DYLIB" | grep -q "/opt/homebrew"; then + echo "" + echo "Warning: Homebrew paths still present in dylib!" >&2 + otool -L "$DYLIB" | grep "/opt/homebrew" | sed 's/^/ /' +else + echo "" + echo " ✓ No hardcoded Homebrew paths" +fi + +# ── Copy libkrunfw to output ──────────────────────────────────────────── + +echo "" +echo "==> Bundling libkrunfw..." + +# Find and copy libkrunfw +KRUNFW_SRC="" +for candidate in \ + "${CUSTOM_RUNTIME}/libkrunfw.dylib" \ + "${CUSTOM_RUNTIME}/libkrunfw.5.dylib" \ + "${BREW_PREFIX}/lib/libkrunfw.dylib" \ + "${BREW_PREFIX}/lib/libkrunfw.5.dylib"; do + if [ -f "$candidate" ]; then + # Resolve symlinks + if [ -L "$candidate" ]; then + KRUNFW_SRC=$(readlink -f "$candidate" 2>/dev/null || readlink "$candidate") + if [[ "$KRUNFW_SRC" != /* ]]; then + KRUNFW_SRC="$(dirname "$candidate")/${KRUNFW_SRC}" + fi + else + KRUNFW_SRC="$candidate" + fi + break + fi +done + +if [ -z "$KRUNFW_SRC" ]; then + echo "Error: Could not find libkrunfw.dylib" >&2 + exit 1 +fi + +cp "$KRUNFW_SRC" "${OUTPUT_DIR}/libkrunfw.dylib" +echo " Copied: $KRUNFW_SRC" + +# Make libkrunfw portable too +install_name_tool -id "@loader_path/libkrunfw.dylib" "${OUTPUT_DIR}/libkrunfw.dylib" +codesign -f -s - "${OUTPUT_DIR}/libkrunfw.dylib" + +# Check libkrunfw dependencies +echo " libkrunfw dependencies:" +otool -L "${OUTPUT_DIR}/libkrunfw.dylib" | grep -v "^/" | sed 's/^/ /' + +# ── Summary ───────────────────────────────────────────────────────────── + +cd "$BUILD_DIR" + +echo "" +echo "==> Build complete!" +echo " Output directory: ${OUTPUT_DIR}" +echo "" +echo " Artifacts:" +ls -lah "${OUTPUT_DIR}"/*.dylib + +# Verify portability +echo "" +echo "==> Verifying portability..." +ALL_GOOD=true + +for lib in "${OUTPUT_DIR}"/*.dylib; do + if otool -L "$lib" | grep -q "/opt/homebrew"; then + echo " ✗ $(basename "$lib") has hardcoded paths" + ALL_GOOD=false + else + echo " ✓ $(basename "$lib") is portable" + fi +done + +if $ALL_GOOD; then + echo "" + echo "All libraries are portable!" + echo "" + echo "Next step: mise run vm:build" +else + echo "" + echo "Warning: Some libraries have non-portable paths" + echo "They may not work on machines without Homebrew" +fi diff --git a/tasks/scripts/vm/build-libkrun.sh b/tasks/scripts/vm/build-libkrun.sh new file mode 100755 index 000000000..2c01c65de --- /dev/null +++ b/tasks/scripts/vm/build-libkrun.sh @@ -0,0 +1,248 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build libkrun and libkrunfw from source on Linux. +# +# This script builds libkrun (VMM) and libkrunfw (kernel firmware) from source +# with OpenShell's custom kernel configuration for bridge/netfilter support. +# +# Prerequisites: +# - Linux (aarch64 or x86_64) +# - Build tools: make, git, gcc, flex, bison, bc +# - Python 3 with pyelftools +# - Rust toolchain +# +# Usage: +# ./build-libkrun.sh +# +# The script will install missing dependencies on Debian/Ubuntu and Fedora. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/_lib.sh" +ROOT="$(vm_lib_root)" + +# Source pinned dependency versions +source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true + +BUILD_DIR="${ROOT}/target/libkrun-build" +OUTPUT_DIR="${BUILD_DIR}" +KERNEL_CONFIG="${ROOT}/crates/openshell-vm/runtime/kernel/openshell.kconfig" + +if [ "$(uname -s)" != "Linux" ]; then + echo "Error: This script only runs on Linux" >&2 + exit 1 +fi + +ARCH="$(uname -m)" +echo "==> Building libkrun for Linux ${ARCH}" +echo " Build directory: ${BUILD_DIR}" +echo " Kernel config: ${KERNEL_CONFIG}" +echo "" + +# ── Install dependencies ──────────────────────────────────────────────── + +install_deps() { + echo "==> Checking/installing build dependencies..." + + if command -v apt-get &>/dev/null; then + # Debian/Ubuntu + DEPS="build-essential git python3 python3-pyelftools flex bison libelf-dev libssl-dev bc curl libclang-dev" + MISSING="" + for dep in $DEPS; do + if ! dpkg -s "$dep" &>/dev/null; then + MISSING="$MISSING $dep" + fi + done + if [ -n "$MISSING" ]; then + echo " Installing:$MISSING" + sudo apt-get update + sudo apt-get install -y $MISSING + else + echo " All dependencies installed" + fi + + elif command -v dnf &>/dev/null; then + # Fedora/RHEL + DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel" + echo " Installing dependencies via dnf..." + sudo dnf install -y $DEPS + + else + echo "Warning: Unknown package manager. Please install manually:" >&2 + echo " build-essential git python3 python3-pyelftools flex bison" >&2 + echo " libelf-dev libssl-dev bc curl" >&2 + fi +} + +install_deps + +# ── Setup build directory ─────────────────────────────────────────────── + +mkdir -p "$BUILD_DIR" +cd "$BUILD_DIR" + +# ── Build libkrunfw (kernel firmware) ─────────────────────────────────── + +echo "" +echo "==> Building libkrunfw with custom kernel config..." + +if [ ! -d libkrunfw ]; then + echo " Cloning libkrunfw (pinned: ${LIBKRUNFW_REF:-HEAD})..." + git clone https://github.com/containers/libkrunfw.git +fi + +cd libkrunfw + +# Ensure we're on the pinned commit for reproducible builds +if [ -n "${LIBKRUNFW_REF:-}" ]; then + echo " Checking out pinned ref: ${LIBKRUNFW_REF}" + git fetch origin + git checkout "${LIBKRUNFW_REF}" +fi + +# Copy custom kernel config fragment +if [ -f "$KERNEL_CONFIG" ]; then + cp "$KERNEL_CONFIG" openshell.kconfig + echo " Applied custom kernel config fragment: openshell.kconfig" +else + echo "Warning: Custom kernel config not found at ${KERNEL_CONFIG}" >&2 + echo " Building with default config (k3s networking may not work)" >&2 +fi + +echo " Building kernel and libkrunfw (this may take 15-20 minutes)..." + +# The libkrunfw Makefile does not support a config fragment — it copies the +# base config and runs olddefconfig, then builds the kernel image in one +# make invocation. We cannot inject the fragment mid-build via make flags. +# +# Instead we drive the build in two phases: +# +# Phase 1: Run the Makefile's $(KERNEL_SOURCES) target, which: +# - downloads and extracts the kernel tarball (if needed) +# - applies patches +# - copies config-libkrunfw_aarch64 to $(KERNEL_SOURCES)/.config +# - runs olddefconfig +# +# Phase 2: Merge our fragment on top of the .config produced by Phase 1 +# using the kernel's own merge_config.sh, then re-run olddefconfig +# to resolve new dependency chains (e.g. CONFIG_BRIDGE pulls in +# CONFIG_BRIDGE_NETFILTER which needs CONFIG_NETFILTER etc). +# +# Phase 3: Let the Makefile build everything (kernel + kernel.c + .so), +# skipping the $(KERNEL_SOURCES) target since it already exists. + +KERNEL_VERSION="$(grep '^KERNEL_VERSION' Makefile | head -1 | awk '{print $3}')" +KERNEL_SOURCES="${KERNEL_VERSION}" + +# Phase 1: prepare kernel source tree + base .config. +# Run the Makefile's $(KERNEL_SOURCES) target whenever the .config is absent +# (either because the tree was never extracted, or because it was cleaned). +# The target is idempotent: if the directory already exists make skips the +# tarball extraction but still copies the base config and runs olddefconfig. +if [ ! -f "${KERNEL_SOURCES}/.config" ]; then + echo " Phase 1: preparing kernel source tree and base .config..." + # Remove the directory so make re-runs the full $(KERNEL_SOURCES) recipe + # (extract + patch + config copy + olddefconfig). + rm -rf "${KERNEL_SOURCES}" + make "${KERNEL_SOURCES}" +else + echo " Phase 1: kernel source tree and .config already present, skipping" +fi + +# Phase 2: merge the openshell fragment on top +if [ -f openshell.kconfig ]; then + echo " Phase 2: merging openshell.kconfig fragment..." + + # merge_config.sh must be called with ARCH set so it finds the right Kconfig + # entry points. -m means "merge into existing .config" (vs starting fresh). + ARCH=arm64 KCONFIG_CONFIG="${KERNEL_SOURCES}/.config" \ + "${KERNEL_SOURCES}/scripts/kconfig/merge_config.sh" \ + -m -O "${KERNEL_SOURCES}" \ + "${KERNEL_SOURCES}/.config" \ + openshell.kconfig + + # Re-run olddefconfig to fill in any new symbols introduced by the fragment. + make -C "${KERNEL_SOURCES}" ARCH=arm64 olddefconfig + + # Verify that the key options were actually applied. + all_ok=true + for opt in CONFIG_BRIDGE CONFIG_NETFILTER CONFIG_NF_NAT; do + val="$(grep "^${opt}=" "${KERNEL_SOURCES}/.config" 2>/dev/null || true)" + if [ -n "$val" ]; then + echo " ${opt}: ${val#*=}" + else + echo " WARNING: ${opt} not set after merge!" >&2 + all_ok=false + fi + done + if [ "$all_ok" = false ]; then + echo "ERROR: kernel config fragment merge failed — required options missing" >&2 + exit 1 + fi + + # The kernel binary and kernel.c from the previous (bad) build must be + # removed so make rebuilds them with the updated .config. + rm -f kernel.c "${KERNEL_SOURCES}/arch/arm64/boot/Image" \ + "${KERNEL_SOURCES}/vmlinux" libkrunfw.so* +fi + +# Phase 3: build kernel image, kernel.c bundle, and the shared library +make -j"$(nproc)" + +# Copy output +cp libkrunfw.so* "$OUTPUT_DIR/" +echo " Built: $(ls "$OUTPUT_DIR"/libkrunfw.so* | xargs -n1 basename | tr '\n' ' ')" + +cd "$BUILD_DIR" + +# ── Build libkrun (VMM) ───────────────────────────────────────────────── + +echo "" +echo "==> Building libkrun..." + +if [ ! -d libkrun ]; then + echo " Cloning libkrun..." + git clone --depth 1 https://github.com/containers/libkrun.git +fi + +cd libkrun + +# Build with NET support for gvproxy networking and BLK support for the +# host-backed state disk. +echo " Building libkrun with NET=1 BLK=1..." + +# Locate libclang for clang-sys if LIBCLANG_PATH isn't already set. +# clang-sys looks for libclang.so or libclang-*.so; on Debian/Ubuntu the +# versioned file (e.g. libclang-18.so.18) lives under the LLVM lib dir. +if [ -z "${LIBCLANG_PATH:-}" ]; then + for llvm_lib in /usr/lib/llvm-*/lib; do + if ls "$llvm_lib"/libclang*.so* &>/dev/null; then + export LIBCLANG_PATH="$llvm_lib" + echo " LIBCLANG_PATH=$LIBCLANG_PATH" + break + fi + done +fi + +make NET=1 BLK=1 -j"$(nproc)" + +# Copy output +cp target/release/libkrun.so "$OUTPUT_DIR/" +echo " Built: libkrun.so" + +cd "$BUILD_DIR" + +# ── Summary ───────────────────────────────────────────────────────────── + +echo "" +echo "==> Build complete!" +echo " Output directory: ${OUTPUT_DIR}" +echo "" +echo " Artifacts:" +ls -lah "$OUTPUT_DIR"/*.so* + +echo "" +echo "Next step: mise run vm:build" diff --git a/tasks/scripts/vm/build-rootfs-tarball.sh b/tasks/scripts/vm/build-rootfs-tarball.sh new file mode 100755 index 000000000..76e4f6297 --- /dev/null +++ b/tasks/scripts/vm/build-rootfs-tarball.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build rootfs and compress to tarball for embedding in openshell-vm binary. +# +# This script: +# 1. Builds the rootfs using build-rootfs.sh +# 2. Compresses it to a zstd tarball for embedding +# +# Usage: +# ./build-rootfs-tarball.sh [--base] +# +# Options: +# --base Build a base rootfs (~200-300MB) without pre-loaded images. +# First boot will be slower but binary size is much smaller. +# Default: full rootfs with pre-loaded images (~2GB+). +# +# The resulting tarball is placed at target/vm-runtime-compressed/rootfs.tar.zst +# for inclusion in the embedded binary build. + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +ROOTFS_BUILD_DIR="${ROOT}/target/rootfs-build" +OUTPUT_DIR="${ROOT}/target/vm-runtime-compressed" +OUTPUT="${OUTPUT_DIR}/rootfs.tar.zst" + +# Parse arguments +BASE_ONLY=false +for arg in "$@"; do + case "$arg" in + --base) + BASE_ONLY=true + ;; + --help|-h) + echo "Usage: $0 [--base]" + echo "" + echo "Options:" + echo " --base Build base rootfs (~200-300MB) without pre-loaded images" + echo " First boot will be slower but binary size is much smaller" + exit 0 + ;; + *) + echo "Unknown option: $arg" + echo "Use --help for usage information" + exit 1 + ;; + esac +done + +# Check for Docker +if ! command -v docker &>/dev/null; then + echo "Error: Docker is required to build the rootfs" >&2 + echo "Please install Docker and try again" >&2 + exit 1 +fi + +# Check if Docker daemon is running +if ! docker info &>/dev/null; then + echo "Error: Docker daemon is not running" >&2 + echo "Please start Docker and try again" >&2 + exit 1 +fi + +if [ "$BASE_ONLY" = true ]; then + echo "==> Building BASE rootfs for embedding" + echo " Build dir: ${ROOTFS_BUILD_DIR}" + echo " Output: ${OUTPUT}" + echo " Mode: base (no pre-loaded images, ~200-300MB)" + echo "" + + # Build base rootfs + echo "==> Step 1/2: Building base rootfs..." + "${ROOT}/crates/openshell-vm/scripts/build-rootfs.sh" --base "${ROOTFS_BUILD_DIR}" +else + echo "==> Building FULL rootfs for embedding" + echo " Build dir: ${ROOTFS_BUILD_DIR}" + echo " Output: ${OUTPUT}" + echo " Mode: full (pre-loaded images, pre-initialized, ~2GB+)" + echo "" + + # Build full rootfs + echo "==> Step 1/2: Building full rootfs (this may take 10-15 minutes)..." + "${ROOT}/crates/openshell-vm/scripts/build-rootfs.sh" "${ROOTFS_BUILD_DIR}" +fi + +# Compress to tarball +echo "" +echo "==> Step 2/2: Compressing rootfs to tarball..." +mkdir -p "${OUTPUT_DIR}" + +# Remove existing tarball if present +rm -f "${OUTPUT}" + +# Get uncompressed size for display +echo " Uncompressed size: $(du -sh "${ROOTFS_BUILD_DIR}" | cut -f1)" + +# Create tarball with zstd compression +# -19 = high compression (slower but smaller) +# -T0 = use all available threads +echo " Compressing with zstd (level 19, this may take a few minutes)..." +tar -C "${ROOTFS_BUILD_DIR}" -cf - . | zstd -19 -T0 -o "${OUTPUT}" + +# Report results +echo "" +echo "==> Rootfs tarball created successfully!" +echo " Output: ${OUTPUT}" +echo " Compressed: $(du -sh "${OUTPUT}" | cut -f1)" +if [ "$BASE_ONLY" = true ]; then + echo " Type: base (first boot ~30-60s, images pulled on demand)" +else + echo " Type: full (first boot ~3-5s, images pre-loaded)" +fi +echo "" +echo "Next step: mise run vm:build" diff --git a/tasks/scripts/vm/bundle-vm-runtime.sh b/tasks/scripts/vm/bundle-vm-runtime.sh new file mode 100755 index 000000000..6c21e511d --- /dev/null +++ b/tasks/scripts/vm/bundle-vm-runtime.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Stage the openshell-vm sidecar runtime bundle next to local build outputs. +# +# Copies the uncompressed VM runtime libraries (libkrun, libkrunfw, gvproxy) +# from target/vm-runtime/ into the .runtime sidecar directories alongside +# each build output. This is required for: +# - build-rootfs.sh pre-initialization (boots the real VM to pre-bake k3s state) +# - Direct invocation of target/debug/openshell-vm without embedding +# +# The source artifacts are collected by compress-vm-runtime.sh into +# target/vm-runtime/ before compression; this script re-uses that work dir. +# +# Usage: +# ./tasks/scripts/vm/bundle-vm-runtime.sh + +set -euo pipefail + +ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" + +SOURCE_DIR="${ROOT}/target/vm-runtime" + +if [ ! -d "${SOURCE_DIR}" ]; then + echo "ERROR: VM runtime source not found at ${SOURCE_DIR}" + echo " Run: mise run vm:setup" + exit 1 +fi + +# Verify required files are present +for required in libkrun.so gvproxy; do + if ! ls "${SOURCE_DIR}/${required}" >/dev/null 2>&1; then + # Try platform-specific variants + if [ "$required" = "libkrun.so" ] && ls "${SOURCE_DIR}"/libkrun.dylib >/dev/null 2>&1; then + continue + fi + echo "ERROR: Required runtime file not found: ${SOURCE_DIR}/${required}" + echo " Run: mise run vm:setup" + exit 1 + fi +done + +TARGETS=( + "${ROOT}/target/debug" + "${ROOT}/target/release" +) + +for target_dir in "${TARGETS[@]}"; do + # Only stage if the binary exists (avoid creating orphan runtime dirs) + if [ ! -f "${target_dir}/openshell-vm" ] && [ ! -f "${target_dir}/openshell-vm.d" ]; then + continue + fi + + runtime_dir="${target_dir}/openshell-vm.runtime" + mkdir -p "${runtime_dir}" + + for file in "${SOURCE_DIR}"/*; do + [ -f "$file" ] || continue + name="$(basename "$file")" + install -m 0755 "$file" "${runtime_dir}/${name}" + done + + echo "staged runtime bundle in ${runtime_dir}" +done diff --git a/tasks/scripts/vm/codesign-openshell-vm.sh b/tasks/scripts/vm/codesign-openshell-vm.sh new file mode 100755 index 000000000..0aeeca9b1 --- /dev/null +++ b/tasks/scripts/vm/codesign-openshell-vm.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +if [ "$(uname -s)" != "Darwin" ]; then + exit 0 +fi + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +codesign --entitlements "${ROOT}/crates/openshell-vm/entitlements.plist" --force -s - "${ROOT}/target/debug/openshell-vm" diff --git a/tasks/scripts/vm/compress-vm-runtime.sh b/tasks/scripts/vm/compress-vm-runtime.sh new file mode 100755 index 000000000..67290a936 --- /dev/null +++ b/tasks/scripts/vm/compress-vm-runtime.sh @@ -0,0 +1,246 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Gather VM runtime artifacts from local sources and compress for embedding. +# +# This script collects libkrun, libkrunfw, and gvproxy from local sources +# (Homebrew on macOS, built from source on Linux) and compresses them with +# zstd for embedding into the openshell-vm binary. +# +# Usage: +# ./compress-vm-runtime.sh +# +# Environment: +# OPENSHELL_VM_RUNTIME_COMPRESSED_DIR - Output directory (default: target/vm-runtime-compressed) +# VM_RUNTIME_TARBALL - Path to a pre-built vm-runtime-*.tar.zst tarball. +# When set, the script extracts and re-compresses +# artifacts from this tarball instead of looking for +# local builds. Used by CI and download-kernel-runtime.sh. +# +# The script sets OPENSHELL_VM_RUNTIME_COMPRESSED_DIR for use by build.rs. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/_lib.sh" +ROOT="$(vm_lib_root)" + +# Source pins for gvproxy version +source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true +GVPROXY_VERSION="${GVPROXY_VERSION:-v0.8.8}" + +# ── macOS dylib portability helpers ───────────────────────────────────── + +# Make a dylib portable by rewriting paths to use @loader_path +make_dylib_portable() { + local dylib="$1" + local dylib_name + dylib_name="$(basename "$dylib")" + + # Rewrite install name + install_name_tool -id "@loader_path/${dylib_name}" "$dylib" 2>/dev/null || true + + # Rewrite libkrunfw reference if present + local krunfw_path + krunfw_path=$(otool -L "$dylib" 2>/dev/null | grep libkrunfw | awk '{print $1}' || true) + if [ -n "$krunfw_path" ] && [[ "$krunfw_path" != @* ]]; then + install_name_tool -change "$krunfw_path" "@loader_path/libkrunfw.dylib" "$dylib" + fi + + # Re-codesign + codesign -f -s - "$dylib" 2>/dev/null || true +} + +WORK_DIR="${ROOT}/target/vm-runtime" +OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}" + +rm -rf "$WORK_DIR" +mkdir -p "$WORK_DIR" "$OUTPUT_DIR" + +# ── Fast path: pre-built tarball from CI or download-kernel-runtime.sh ── + +if [ -n "${VM_RUNTIME_TARBALL:-}" ]; then + echo "==> Using pre-built runtime tarball: ${VM_RUNTIME_TARBALL}" + + if [ ! -f "${VM_RUNTIME_TARBALL}" ]; then + echo "Error: VM_RUNTIME_TARBALL not found: ${VM_RUNTIME_TARBALL}" >&2 + exit 1 + fi + + # Extract tarball contents + zstd -d "${VM_RUNTIME_TARBALL}" --stdout | tar -xf - -C "$WORK_DIR" + + echo " Extracted files:" + ls -lah "$WORK_DIR" + + echo "" + compress_dir "$WORK_DIR" "$OUTPUT_DIR" + + # Check for rootfs tarball (built separately) + ROOTFS_TARBALL="${OUTPUT_DIR}/rootfs.tar.zst" + if [ -f "$ROOTFS_TARBALL" ]; then + echo " rootfs.tar.zst: $(du -h "$ROOTFS_TARBALL" | cut -f1) (pre-built)" + else + echo "" + echo "Note: rootfs.tar.zst not found." + echo " To build one, run: mise run vm:rootfs -- --base" + fi + + echo "" + echo "==> Compressed artifacts in ${OUTPUT_DIR}:" + ls -lah "$OUTPUT_DIR" + TOTAL=$(du -sh "$OUTPUT_DIR" | cut -f1) + echo "" + echo "==> Total compressed size: ${TOTAL}" + echo "" + echo "Next step: mise run vm:build" + exit 0 +fi + +echo "==> Detecting platform..." + +case "$(uname -s)-$(uname -m)" in + Darwin-arm64) + PLATFORM="darwin-aarch64" + echo " Platform: macOS ARM64" + + # Source priority for libkrun: + # 1. Custom build from build-libkrun-macos.sh (portable, no GPU deps) + # 2. Custom runtime with custom libkrunfw + LIBKRUN_BUILD_DIR="${ROOT}/target/libkrun-build" + CUSTOM_DIR="${ROOT}/target/custom-runtime" + BREW_PREFIX="$(brew --prefix 2>/dev/null || echo /opt/homebrew)" + + if [ -f "${LIBKRUN_BUILD_DIR}/libkrun.dylib" ]; then + echo " Using portable libkrun from ${LIBKRUN_BUILD_DIR}" + cp "${LIBKRUN_BUILD_DIR}/libkrun.dylib" "$WORK_DIR/" + cp "${LIBKRUN_BUILD_DIR}/libkrunfw.dylib" "$WORK_DIR/" + + # Verify portability + if otool -L "${LIBKRUN_BUILD_DIR}/libkrun.dylib" | grep -q "/opt/homebrew"; then + echo " Warning: libkrun has hardcoded Homebrew paths - may not be portable" + else + echo " ✓ libkrun is portable (no hardcoded paths)" + fi + elif [ -f "${CUSTOM_DIR}/provenance.json" ]; then + echo " Using custom runtime from ${CUSTOM_DIR}" + + # libkrun from Homebrew (needs path rewriting for portability) + if [ -f "${CUSTOM_DIR}/libkrun.dylib" ]; then + cp "${CUSTOM_DIR}/libkrun.dylib" "$WORK_DIR/" + else + cp "${BREW_PREFIX}/lib/libkrun.dylib" "$WORK_DIR/" + make_dylib_portable "$WORK_DIR/libkrun.dylib" + fi + + # libkrunfw from custom build + cp "${CUSTOM_DIR}/libkrunfw.dylib" "$WORK_DIR/" + else + echo "Error: No portable libkrun build found." >&2 + echo " Run: FROM_SOURCE=1 mise run vm:setup" >&2 + exit 1 + fi + + # Normalize libkrunfw naming - ensure both names exist for build.rs + # build.rs expects libkrunfw.5.dylib.zst; some builds produce libkrunfw.dylib + if [ ! -f "$WORK_DIR/libkrunfw.dylib" ] && [ -f "$WORK_DIR/libkrunfw.5.dylib" ]; then + cp "$WORK_DIR/libkrunfw.5.dylib" "$WORK_DIR/libkrunfw.dylib" + fi + if [ ! -f "$WORK_DIR/libkrunfw.5.dylib" ] && [ -f "$WORK_DIR/libkrunfw.dylib" ]; then + cp "$WORK_DIR/libkrunfw.dylib" "$WORK_DIR/libkrunfw.5.dylib" + fi + + # gvproxy - prefer Podman, fall back to Homebrew + if [ -x /opt/podman/bin/gvproxy ]; then + cp /opt/podman/bin/gvproxy "$WORK_DIR/" + echo " Using gvproxy from Podman" + elif [ -x "${BREW_PREFIX}/bin/gvproxy" ]; then + cp "${BREW_PREFIX}/bin/gvproxy" "$WORK_DIR/" + echo " Using gvproxy from Homebrew" + else + echo "Error: gvproxy not found. Install Podman Desktop or run: brew install gvproxy" >&2 + exit 1 + fi + ;; + + Linux-*) + ARCH="$(uname -m)" + case "$ARCH" in + aarch64) GVPROXY_ARCH="arm64" ;; + x86_64) GVPROXY_ARCH="amd64" ;; + *) + echo "Error: Unsupported Linux architecture: ${ARCH}" >&2 + exit 1 + ;; + esac + PLATFORM="linux-${ARCH}" + echo " Platform: Linux ${ARCH}" + + BUILD_DIR="${ROOT}/target/libkrun-build" + if [ ! -f "${BUILD_DIR}/libkrun.so" ]; then + echo "Error: libkrun not found. Run: FROM_SOURCE=1 mise run vm:setup" >&2 + exit 1 + fi + + cp "${BUILD_DIR}/libkrun.so" "$WORK_DIR/" + + # Copy libkrunfw - find the versioned .so file + for krunfw in "${BUILD_DIR}"/libkrunfw.so*; do + [ -f "$krunfw" ] || continue + cp "$krunfw" "$WORK_DIR/" + done + + # Ensure the soname symlink (libkrunfw.so.5) exists alongside the fully + # versioned file (libkrunfw.so.5.x.y). libloading loads by soname. + if [ ! -f "$WORK_DIR/libkrunfw.so.5" ]; then + versioned=$(ls "$WORK_DIR"/libkrunfw.so.5.* 2>/dev/null | head -n1) + if [ -n "$versioned" ]; then + cp "$versioned" "$WORK_DIR/libkrunfw.so.5" + fi + fi + + # Download gvproxy if not present + if [ ! -f "$WORK_DIR/gvproxy" ]; then + echo " Downloading gvproxy for linux-${GVPROXY_ARCH}..." + curl -fsSL -o "$WORK_DIR/gvproxy" \ + "https://github.com/containers/gvisor-tap-vsock/releases/download/${GVPROXY_VERSION}/gvproxy-linux-${GVPROXY_ARCH}" + chmod +x "$WORK_DIR/gvproxy" + fi + ;; + + *) + echo "Error: Unsupported platform: $(uname -s)-$(uname -m)" >&2 + echo "Supported platforms: Darwin-arm64, Linux-aarch64, Linux-x86_64" >&2 + exit 1 + ;; +esac + +echo "" +echo "==> Collected artifacts:" +ls -lah "$WORK_DIR" + +echo "" +compress_dir "$WORK_DIR" "$OUTPUT_DIR" + +# Check for rootfs tarball (built separately by build-rootfs-tarball.sh) +ROOTFS_TARBALL="${OUTPUT_DIR}/rootfs.tar.zst" +if [ -f "$ROOTFS_TARBALL" ]; then + echo " rootfs.tar.zst: $(du -h "$ROOTFS_TARBALL" | cut -f1) (pre-built)" +else + echo "" + echo "Note: rootfs.tar.zst not found." + echo " To build one, run: mise run vm:rootfs -- --base" + echo " Without it, the binary will still work but require the rootfs" + echo " to be built separately on first run." +fi + +echo "" +echo "==> Compressed artifacts in ${OUTPUT_DIR}:" +ls -lah "$OUTPUT_DIR" + +TOTAL=$(du -sh "$OUTPUT_DIR" | cut -f1) +echo "" +echo "==> Total compressed size: ${TOTAL}" +echo "" +echo "Next step: mise run vm:build" diff --git a/tasks/scripts/vm/download-kernel-runtime.sh b/tasks/scripts/vm/download-kernel-runtime.sh new file mode 100755 index 000000000..8f0427af9 --- /dev/null +++ b/tasks/scripts/vm/download-kernel-runtime.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Download pre-built VM kernel runtime artifacts from the vm-dev GitHub Release +# and stage them for the openshell-vm cargo build. +# +# This script is used by CI (release-vm-dev.yml) and can also be used locally +# to avoid building libkrun/libkrunfw from source. +# +# Usage: +# ./download-kernel-runtime.sh [--platform PLATFORM] +# +# Environment: +# VM_RUNTIME_RELEASE_TAG - GitHub Release tag (default: vm-dev) +# GITHUB_REPOSITORY - owner/repo (default: NVIDIA/OpenShell) +# OPENSHELL_VM_RUNTIME_COMPRESSED_DIR - Output directory (default: target/vm-runtime-compressed) +# +# Platforms: linux-aarch64, linux-x86_64, darwin-aarch64 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/_lib.sh" +ROOT="$(vm_lib_root)" + +RELEASE_TAG="${VM_RUNTIME_RELEASE_TAG:-vm-dev}" +REPO="${GITHUB_REPOSITORY:-NVIDIA/OpenShell}" +OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}" + +# ── Auto-detect platform (detect_platform from _lib.sh) ───────────────── + +PLATFORM="" +while [[ $# -gt 0 ]]; do + case "$1" in + --platform) + PLATFORM="$2"; shift 2 ;; + --help|-h) + echo "Usage: $0 [--platform PLATFORM]" + echo "" + echo "Download pre-built VM kernel runtime from the vm-dev GitHub Release." + echo "" + echo "Platforms: linux-aarch64, linux-x86_64, darwin-aarch64" + echo "" + echo "Environment:" + echo " VM_RUNTIME_RELEASE_TAG Release tag (default: vm-dev)" + echo " GITHUB_REPOSITORY owner/repo (default: NVIDIA/OpenShell)" + echo " OPENSHELL_VM_RUNTIME_COMPRESSED_DIR Output directory" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done + +if [ -z "$PLATFORM" ]; then + PLATFORM="$(detect_platform)" +fi + +TARBALL_NAME="vm-runtime-${PLATFORM}.tar.zst" + +echo "==> Downloading VM kernel runtime" +echo " Repository: ${REPO}" +echo " Release: ${RELEASE_TAG}" +echo " Platform: ${PLATFORM}" +echo " Artifact: ${TARBALL_NAME}" +echo " Output: ${OUTPUT_DIR}" +echo "" + +# ── Check for gh CLI ──────────────────────────────────────────────────── + +if ! command -v gh &>/dev/null; then + echo "Error: GitHub CLI (gh) is required." >&2 + echo " Install: https://cli.github.com/" >&2 + exit 1 +fi + +# ── Download the runtime tarball ──────────────────────────────────────── + +DOWNLOAD_DIR="${ROOT}/target/vm-runtime-download" +mkdir -p "$DOWNLOAD_DIR" "$OUTPUT_DIR" + +echo "==> Downloading ${TARBALL_NAME} from ${RELEASE_TAG}..." +gh release download "${RELEASE_TAG}" \ + --repo "${REPO}" \ + --pattern "${TARBALL_NAME}" \ + --dir "${DOWNLOAD_DIR}" \ + --clobber + +if [ ! -f "${DOWNLOAD_DIR}/${TARBALL_NAME}" ]; then + echo "Error: Download failed — ${TARBALL_NAME} not found." >&2 + echo "" >&2 + echo "The vm-dev release may not have kernel runtime artifacts yet." >&2 + echo "Run the 'Release VM Kernel' workflow first:" >&2 + echo " gh workflow run release-vm-kernel.yml" >&2 + exit 1 +fi + +echo " Downloaded: $(du -sh "${DOWNLOAD_DIR}/${TARBALL_NAME}" | cut -f1)" + +# ── Extract and stage for cargo build ─────────────────────────────────── + +echo "" +echo "==> Extracting runtime artifacts..." + +EXTRACT_DIR="${ROOT}/target/vm-runtime-extracted" +rm -rf "$EXTRACT_DIR" +mkdir -p "$EXTRACT_DIR" + +zstd -d "${DOWNLOAD_DIR}/${TARBALL_NAME}" --stdout | tar -xf - -C "$EXTRACT_DIR" + +echo " Extracted files:" +ls -lah "$EXTRACT_DIR" + +# ── Compress individual files for embedding ───────────────────────────── +# The cargo build expects individual .zst files (libkrun.so.zst, etc.) +# in OPENSHELL_VM_RUNTIME_COMPRESSED_DIR. The downloaded tarball contains +# the raw libraries, so we re-compress each one. + +echo "" +compress_dir "$EXTRACT_DIR" "$OUTPUT_DIR" + +# ── Check for rootfs (may already be present from a separate build step) ── + +if [ -f "${OUTPUT_DIR}/rootfs.tar.zst" ]; then + echo "" + echo " rootfs.tar.zst: $(du -h "${OUTPUT_DIR}/rootfs.tar.zst" | cut -f1) (pre-existing)" +else + echo "" + echo "Note: rootfs.tar.zst not found in ${OUTPUT_DIR}." + echo " Build it with: mise run vm:rootfs -- --base" +fi + +echo "" +echo "==> Staged artifacts in ${OUTPUT_DIR}:" +ls -lah "$OUTPUT_DIR" + +echo "" +echo "==> Done." +echo "" +echo "Next step: mise run vm:build" diff --git a/tasks/scripts/vm/ensure-vm-rootfs.sh b/tasks/scripts/vm/ensure-vm-rootfs.sh new file mode 100755 index 000000000..3cf9ddfc6 --- /dev/null +++ b/tasks/scripts/vm/ensure-vm-rootfs.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +GATEWAY_BIN="${ROOT}/target/debug/openshell-vm" + +NAME="default" +ROOTFS_ARGS=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --name) + NAME="$2" + shift 2 + ;; + --name=*) + NAME="${1#--name=}" + shift + ;; + --rootfs) + ROOTFS_ARGS=("$1" "$2") + shift 2 + ;; + --rootfs=*) + ROOTFS_ARGS=("$1") + shift + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +if [ ! -x "${GATEWAY_BIN}" ]; then + echo "ERROR: openshell-vm binary not found at ${GATEWAY_BIN}" >&2 + echo " Run: mise run vm:build" >&2 + exit 1 +fi + +prepare_args=(--name "${NAME}") +if [ "${#ROOTFS_ARGS[@]}" -gt 0 ]; then + prepare_args=("${ROOTFS_ARGS[@]}" "${prepare_args[@]}") +fi +if [ "${OPENSHELL_VM_FORCE_ROOTFS_REBUILD:-}" = "1" ]; then + prepare_args+=(prepare-rootfs --force) +else + prepare_args+=(prepare-rootfs) +fi + +if ROOTFS_PATH="$("${GATEWAY_BIN}" "${prepare_args[@]}" 2>/dev/null)"; then + echo "using openshell-vm rootfs at ${ROOTFS_PATH}" + exit 0 +fi + +# prepare-rootfs failed — no embedded rootfs in the binary. +# Fall back to target/rootfs-build if it exists (rootfs was built separately +# but not yet compressed for embedding via mise run vm:rootfs). +if [ "${#ROOTFS_ARGS[@]}" -eq 0 ]; then + FALLBACK_ROOTFS="${ROOT}/target/rootfs-build" + if [ -d "${FALLBACK_ROOTFS}/srv" ]; then + echo "using openshell-vm rootfs at ${FALLBACK_ROOTFS}" + exit 0 + fi +fi + +echo "ERROR: No rootfs available." >&2 +echo " Run: mise run vm:rootfs -- --base # build rootfs (~5-10 min, requires Docker)" >&2 +exit 1 diff --git a/tasks/scripts/vm/package-vm-runtime.sh b/tasks/scripts/vm/package-vm-runtime.sh new file mode 100755 index 000000000..f97eec870 --- /dev/null +++ b/tasks/scripts/vm/package-vm-runtime.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Package VM runtime artifacts into a release tarball. +# +# Used by CI (release-vm-kernel.yml) to bundle libkrun, libkrunfw, and gvproxy +# into a platform-specific tarball for the vm-dev GitHub Release. Handles +# gvproxy download, provenance metadata generation, and tarball creation. +# +# Usage: +# ./package-vm-runtime.sh --platform --build-dir --output +# +# Arguments: +# --platform One of: linux-aarch64, linux-x86_64, darwin-aarch64 +# --build-dir Directory containing built libkrun and libkrunfw artifacts +# --output Path for the output .tar.zst file +# +# Environment (optional, for provenance): +# GITHUB_SHA - Git commit SHA +# GITHUB_RUN_ID - GitHub Actions run ID +# CUSTOM_PROVENANCE_DIR - Directory containing provenance.json from custom +# libkrunfw build (macOS only) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/_lib.sh" +ROOT="$(vm_lib_root)" + +# Source pins for gvproxy version +source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true +GVPROXY_VERSION="${GVPROXY_VERSION:-v0.8.8}" + +PLATFORM="" +BUILD_DIR="" +OUTPUT="" +CUSTOM_PROVENANCE_DIR="${CUSTOM_PROVENANCE_DIR:-}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --platform) PLATFORM="$2"; shift 2 ;; + --build-dir) BUILD_DIR="$2"; shift 2 ;; + --output) OUTPUT="$2"; shift 2 ;; + --help|-h) + echo "Usage: $0 --platform --build-dir --output " + echo "" + echo "Package VM runtime artifacts into a release tarball." + echo "" + echo "Platforms: linux-aarch64, linux-x86_64, darwin-aarch64" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done + +if [ -z "$PLATFORM" ] || [ -z "$BUILD_DIR" ] || [ -z "$OUTPUT" ]; then + echo "Error: --platform, --build-dir, and --output are all required" >&2 + exit 1 +fi + +echo "==> Packaging VM runtime" +echo " Platform: ${PLATFORM}" +echo " Build dir: ${BUILD_DIR}" +echo " Output: ${OUTPUT}" +echo "" + +# ── Create staging directory ──────────────────────────────────────────── + +PACKAGE_DIR="$(mktemp -d)" +trap 'rm -rf "$PACKAGE_DIR"' EXIT + +# ── Copy runtime libraries ────────────────────────────────────────────── + +case "$PLATFORM" in + linux-*) + cp "${BUILD_DIR}/libkrun.so" "${PACKAGE_DIR}/" + # Copy libkrunfw — find versioned .so and create soname symlink + for f in "${BUILD_DIR}"/libkrunfw.so*; do + [ -f "$f" ] && cp "$f" "${PACKAGE_DIR}/" + done + if [ ! -f "${PACKAGE_DIR}/libkrunfw.so.5" ]; then + versioned="$(ls "${PACKAGE_DIR}"/libkrunfw.so.5.* 2>/dev/null | head -n1 || true)" + [ -n "$versioned" ] && cp "$versioned" "${PACKAGE_DIR}/libkrunfw.so.5" + fi + ;; + darwin-aarch64) + cp "${BUILD_DIR}/libkrun.dylib" "${PACKAGE_DIR}/" + # libkrunfw — prefer build dir, fall back to custom runtime dir + candidates=("${BUILD_DIR}/libkrunfw.dylib" "${BUILD_DIR}/libkrunfw.5.dylib") + if [ -n "$CUSTOM_PROVENANCE_DIR" ]; then + candidates+=("${CUSTOM_PROVENANCE_DIR}/libkrunfw.dylib" "${CUSTOM_PROVENANCE_DIR}/libkrunfw.5.dylib") + fi + for candidate in "${candidates[@]}"; do + if [ -f "$candidate" ]; then + cp "$candidate" "${PACKAGE_DIR}/" + fi + done + ;; + *) + echo "Error: Unknown platform: ${PLATFORM}" >&2 + exit 1 + ;; +esac + +# ── Download gvproxy ──────────────────────────────────────────────────── + +echo "==> Downloading gvproxy ${GVPROXY_VERSION} for ${PLATFORM}..." +case "$PLATFORM" in + linux-aarch64) GVPROXY_SUFFIX="linux-arm64" ;; + linux-x86_64) GVPROXY_SUFFIX="linux-amd64" ;; + darwin-aarch64) GVPROXY_SUFFIX="darwin" ;; +esac + +curl -fsSL -o "${PACKAGE_DIR}/gvproxy" \ + "https://github.com/containers/gvisor-tap-vsock/releases/download/${GVPROXY_VERSION}/gvproxy-${GVPROXY_SUFFIX}" +chmod +x "${PACKAGE_DIR}/gvproxy" + +# ── Write provenance metadata ─────────────────────────────────────────── + +echo "==> Writing provenance metadata..." + +LIBKRUNFW_COMMIT="unknown" +KERNEL_VERSION="unknown" + +# Try custom provenance first (macOS builds produce this) +if [ -n "$CUSTOM_PROVENANCE_DIR" ] && [ -f "${CUSTOM_PROVENANCE_DIR}/provenance.json" ]; then + LIBKRUNFW_COMMIT="$(jq -r '.libkrunfw_commit // "unknown"' "${CUSTOM_PROVENANCE_DIR}/provenance.json" 2>/dev/null || echo unknown)" + KERNEL_VERSION="$(jq -r '.kernel_version // "unknown"' "${CUSTOM_PROVENANCE_DIR}/provenance.json" 2>/dev/null || echo unknown)" +fi + +# Fall back to inspecting the build directory (Linux builds) +if [ "$LIBKRUNFW_COMMIT" = "unknown" ] && [ -d "${BUILD_DIR}/libkrunfw/.git" ]; then + LIBKRUNFW_COMMIT="$(git -C "${BUILD_DIR}/libkrunfw" rev-parse HEAD 2>/dev/null || echo unknown)" +fi +if [ "$KERNEL_VERSION" = "unknown" ] && [ -f "${BUILD_DIR}/libkrunfw/Makefile" ]; then + KERNEL_VERSION="$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' "${BUILD_DIR}/libkrunfw/Makefile" | head -1 | sed 's/.*= *//' || echo unknown)" +fi + +if ! command -v jq &>/dev/null; then + echo "Error: jq is required for provenance generation" >&2 + exit 1 +fi + +jq -n \ + --arg artifact "vm-runtime" \ + --arg platform "$PLATFORM" \ + --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + --arg kfw_commit "$LIBKRUNFW_COMMIT" \ + --arg kver "$KERNEL_VERSION" \ + --arg sha "${GITHUB_SHA:-unknown}" \ + --arg run "${GITHUB_RUN_ID:-unknown}" \ + '{artifact: $artifact, platform: $platform, build_timestamp: $ts, libkrunfw_commit: $kfw_commit, kernel_version: $kver, github_sha: $sha, github_run_id: $run}' \ + > "${PACKAGE_DIR}/provenance.json" + +# ── Create tarball ────────────────────────────────────────────────────── + +echo "==> Creating tarball..." +mkdir -p "$(dirname "$OUTPUT")" +tar -C "${PACKAGE_DIR}" -cf - . | zstd -19 -T0 -o "$OUTPUT" + +echo "" +echo "==> Packaged ${OUTPUT} ($(du -sh "$OUTPUT" | cut -f1))" +echo " Contents:" +ls -lah "${PACKAGE_DIR}" diff --git a/tasks/scripts/vm/run-vm.sh b/tasks/scripts/vm/run-vm.sh new file mode 100755 index 000000000..630d1eecd --- /dev/null +++ b/tasks/scripts/vm/run-vm.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +RUNTIME_DIR="${ROOT}/target/debug/openshell-vm.runtime" +GATEWAY_BIN="${ROOT}/target/debug/openshell-vm" + +if [ "$(uname -s)" = "Darwin" ]; then + export DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}" +fi + +args=("$@") +name="default" +rootfs_args=() +expect_name=0 +expect_rootfs=0 +subcommand="" +skip_prepare=0 + +for arg in "${args[@]}"; do + if [ "${expect_name}" -eq 1 ]; then + name="${arg}" + expect_name=0 + continue + fi + + if [ "${expect_rootfs}" -eq 1 ]; then + rootfs_args=(--rootfs "${arg}") + expect_rootfs=0 + continue + fi + + case "${arg}" in + --name) + expect_name=1 + ;; + --name=*) + name="${arg#--name=}" + ;; + --rootfs) + expect_rootfs=1 + ;; + --rootfs=*) + rootfs_args=("${arg}") + ;; + --help|-h|--version) + skip_prepare=1 + ;; + exec|prepare-rootfs) + subcommand="${arg}" + break + ;; + esac +done + +if [ "${skip_prepare}" -eq 0 ] && [ -z "${subcommand}" ]; then + prep_args=(--name "${name}") + if [ "${#rootfs_args[@]}" -gt 0 ]; then + prep_args=("${rootfs_args[@]}" "${prep_args[@]}") + fi + resolved_rootfs="$("${ROOT}/tasks/scripts/vm/ensure-vm-rootfs.sh" "${prep_args[@]}" \ + | tail -n 1 | sed 's/^using openshell-vm rootfs at //')" + "${ROOT}/tasks/scripts/vm/sync-vm-rootfs.sh" "${prep_args[@]}" + + # When no --rootfs was supplied by the caller, inject the resolved rootfs path + # so the binary finds the rootfs regardless of whether it is embedded. + if [ "${#rootfs_args[@]}" -eq 0 ] && [ -n "${resolved_rootfs}" ]; then + args=(--rootfs "${resolved_rootfs}" "${args[@]}") + fi +fi + +exec "${GATEWAY_BIN}" "${args[@]}" diff --git a/tasks/scripts/vm/sync-vm-rootfs.sh b/tasks/scripts/vm/sync-vm-rootfs.sh new file mode 100755 index 000000000..fa13ee1e5 --- /dev/null +++ b/tasks/scripts/vm/sync-vm-rootfs.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Sync mutable development artifacts into the existing VM rootfs. +# Runs on every `mise run vm` so that script changes, helm chart +# updates, manifest changes, and supervisor binary rebuilds are +# picked up without a full rootfs rebuild. +# +# This is fast (<1s) — it only copies files, no Docker or VM boot. + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +SCRIPT_DIR="${ROOT}/crates/openshell-vm/scripts" +IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-openshell}" +IMAGE_TAG="${IMAGE_TAG:-dev}" +SERVER_IMAGE="${IMAGE_REPO_BASE}/gateway:${IMAGE_TAG}" +NAME="default" +ROOTFS_ARGS=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --name) + NAME="$2" + shift 2 + ;; + --name=*) + NAME="${1#--name=}" + shift + ;; + --rootfs) + ROOTFS_ARGS=("$1" "$2") + shift 2 + ;; + --rootfs=*) + ROOTFS_ARGS=("$1") + shift + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +ensure_args=(--name "${NAME}") +if [ "${#ROOTFS_ARGS[@]}" -gt 0 ]; then + ensure_args=("${ROOTFS_ARGS[@]}" "${ensure_args[@]}") +fi + +if ! ROOTFS_DIR="$("${ROOT}/tasks/scripts/vm/ensure-vm-rootfs.sh" "${ensure_args[@]}" | tail -n 1 | sed 's/^using openshell-vm rootfs at //')"; then + echo "ERROR: ensure-vm-rootfs.sh failed — no rootfs available." >&2 + exit 1 +fi + +patch_vm_helmchart() { + local helmchart="$1" + [ -f "${helmchart}" ] || return 0 + + sed_in_place() { + local expr="$1" + sed -i.bak -E "${expr}" "${helmchart}" + rm -f "${helmchart}.bak" + } + + # Mirror the build-rootfs patching so the VM keeps using the locally + # imported openshell/gateway:dev image after incremental rootfs syncs. + sed_in_place 's|__IMAGE_PULL_POLICY__|IfNotPresent|g' + sed_in_place 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g' + sed_in_place 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' + sed_in_place "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|" + sed_in_place "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|" + sed_in_place 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' + sed_in_place 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' + sed_in_place 's|__DISABLE_GATEWAY_AUTH__|false|g' + sed_in_place 's|__DISABLE_TLS__|false|g' + sed_in_place 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' + sed_in_place '/__CHART_CHECKSUM__/d' +} + +if [ ! -d "${ROOTFS_DIR}/srv" ]; then + # Rootfs doesn't exist yet — nothing to sync. ensure-vm-rootfs.sh + # or build-rootfs.sh will create it. + exit 0 +fi + +echo "Syncing development artifacts into rootfs..." + +# ── Init scripts and utilities ───────────────────────────────────────── +for script in openshell-vm-init.sh openshell-vm-exec-agent.py check-vm-capabilities.sh; do + src="${SCRIPT_DIR}/${script}" + dst="${ROOTFS_DIR}/srv/${script}" + if [ -f "$src" ]; then + if ! cmp -s "$src" "$dst" 2>/dev/null; then + cp "$src" "$dst" + chmod +x "$dst" + echo " updated: /srv/${script}" + fi + fi +done + +# ── Helm chart ───────────────────────────────────────────────────────── +HELM_CHART_DIR="${ROOT}/deploy/helm/openshell" +CHART_STAGING="${ROOTFS_DIR}/opt/openshell/charts" +if [ -d "${HELM_CHART_DIR}" ]; then + mkdir -p "${CHART_STAGING}" + # Package into a temp dir and compare — only update if changed. + TMP_CHART=$(mktemp -d) + helm package "${HELM_CHART_DIR}" -d "${TMP_CHART}" >/dev/null 2>&1 + for tgz in "${TMP_CHART}"/*.tgz; do + [ -f "$tgz" ] || continue + base=$(basename "$tgz") + if ! cmp -s "$tgz" "${CHART_STAGING}/${base}" 2>/dev/null; then + cp "$tgz" "${CHART_STAGING}/${base}" + echo " updated: /opt/openshell/charts/${base}" + fi + done + rm -rf "${TMP_CHART}" +fi + +# ── Kubernetes manifests ─────────────────────────────────────────────── +MANIFEST_SRC="${ROOT}/deploy/kube/manifests" +MANIFEST_DST="${ROOTFS_DIR}/opt/openshell/manifests" +if [ -d "${MANIFEST_SRC}" ]; then + mkdir -p "${MANIFEST_DST}" + for manifest in "${MANIFEST_SRC}"/*.yaml; do + [ -f "$manifest" ] || continue + base=$(basename "$manifest") + if ! cmp -s "$manifest" "${MANIFEST_DST}/${base}" 2>/dev/null; then + cp "$manifest" "${MANIFEST_DST}/${base}" + echo " updated: /opt/openshell/manifests/${base}" + fi + done +fi + +patch_vm_helmchart "${MANIFEST_DST}/openshell-helmchart.yaml" +patch_vm_helmchart "${ROOTFS_DIR}/var/lib/rancher/k3s/server/manifests/openshell-helmchart.yaml" + +# ── Gateway image tarball ────────────────────────────────────────────── +# The VM rootfs airgap-imports openshell/gateway:dev from k3s/agent/images/. +# Keep that tarball in sync with the local Docker image so `mise run e2e:vm` +# validates the current openshell-server code, not whatever image happened to +# be baked into the rootfs last time it was rebuilt. +SERVER_IMAGE_TAR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images/openshell-server.tar.zst" +SERVER_IMAGE_ID_FILE="${ROOTFS_DIR}/opt/openshell/.gateway-image-id" +if command -v docker >/dev/null 2>&1 && docker image inspect "${SERVER_IMAGE}" >/dev/null 2>&1; then + current_image_id=$(docker image inspect --format '{{.Id}}' "${SERVER_IMAGE}") + previous_image_id="" + if [ -f "${SERVER_IMAGE_ID_FILE}" ]; then + previous_image_id=$(cat "${SERVER_IMAGE_ID_FILE}") + fi + + if [ "${current_image_id}" != "${previous_image_id}" ] || [ ! -f "${SERVER_IMAGE_TAR}" ]; then + mkdir -p "$(dirname "${SERVER_IMAGE_TAR}")" "$(dirname "${SERVER_IMAGE_ID_FILE}")" + tmp_tar=$(mktemp /tmp/openshell-server-image.XXXXXX) + docker save "${SERVER_IMAGE}" | zstd -f -T0 -3 -o "${tmp_tar}" >/dev/null + mv "${tmp_tar}" "${SERVER_IMAGE_TAR}" + printf '%s\n' "${current_image_id}" > "${SERVER_IMAGE_ID_FILE}" + echo " updated: /var/lib/rancher/k3s/agent/images/openshell-server.tar.zst" + fi +fi + +# ── Supervisor binary ───────────────────────────────────────────────── +SUPERVISOR_TARGET="aarch64-unknown-linux-gnu" +SUPERVISOR_BIN="${ROOT}/target/${SUPERVISOR_TARGET}/release/openshell-sandbox" +SUPERVISOR_DST="${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" +if [ -f "${SUPERVISOR_BIN}" ]; then + mkdir -p "$(dirname "${SUPERVISOR_DST}")" + if ! cmp -s "${SUPERVISOR_BIN}" "${SUPERVISOR_DST}" 2>/dev/null; then + cp "${SUPERVISOR_BIN}" "${SUPERVISOR_DST}" + chmod +x "${SUPERVISOR_DST}" + echo " updated: /opt/openshell/bin/openshell-sandbox" + fi +fi + +# ── Fix execute permissions on k3s data binaries ────────────────────── +# docker export and macOS virtio-fs can strip execute bits. +chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/* 2>/dev/null || true +chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/aux/* 2>/dev/null || true + +echo "Sync complete." diff --git a/tasks/scripts/vm/vm-clean.sh b/tasks/scripts/vm/vm-clean.sh new file mode 100755 index 000000000..c293348d0 --- /dev/null +++ b/tasks/scripts/vm/vm-clean.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Remove all openshell-vm cached artifacts. +# +# Use this when you need a clean slate — after running this, you will need to +# re-run `mise run vm:setup` before building again. +# +# Usage: +# ./vm-clean.sh # clean VM-specific artifacts +# ./vm-clean.sh --all # also remove the compiled binary + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/_lib.sh" +ROOT="$(vm_lib_root)" + +CLEAN_ALL=0 +while [[ $# -gt 0 ]]; do + case "$1" in + --all) + CLEAN_ALL=1 + shift + ;; + --help|-h) + echo "Usage: $0 [--all]" + echo "" + echo "Remove all openshell-vm cached build artifacts." + echo "" + echo "Options:" + echo " --all Also remove compiled binaries (target/debug/openshell-vm)" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +echo "==> Cleaning openshell-vm artifacts..." + +removed=0 + +remove_if_exists() { + local path="$1" + local label="$2" + if [ -e "$path" ]; then + local size + size="$(du -sh "$path" 2>/dev/null | cut -f1 || echo "?")" + rm -rf "$path" + echo " Removed ${label} (${size}): ${path}" + removed=$((removed + 1)) + fi +} + +# Build artifacts under target/ +remove_if_exists "${ROOT}/target/vm-runtime" "uncompressed staging" +remove_if_exists "${ROOT}/target/vm-runtime-compressed" "compressed artifacts" +remove_if_exists "${ROOT}/target/vm-runtime-download" "downloaded tarballs" +remove_if_exists "${ROOT}/target/vm-runtime-extracted" "extraction temp" +remove_if_exists "${ROOT}/target/libkrun-build" "libkrun source build" +remove_if_exists "${ROOT}/target/custom-runtime" "custom libkrunfw" +remove_if_exists "${ROOT}/target/rootfs-build" "rootfs directory" + +# Named instance rootfs directories +XDG_DATA="${XDG_DATA_HOME:-${HOME}/.local/share}" +VM_DATA_DIR="${XDG_DATA}/openshell/openshell-vm" +remove_if_exists "${VM_DATA_DIR}" "named instance rootfs" + +# Embedded runtime cache +VM_RUNTIME_CACHE="${XDG_DATA}/openshell/vm-runtime" +remove_if_exists "${VM_RUNTIME_CACHE}" "embedded runtime cache" + +if [ "$CLEAN_ALL" -eq 1 ]; then + # Remove compiled binaries and sidecar bundles + for profile in debug release; do + remove_if_exists "${ROOT}/target/${profile}/openshell-vm" "${profile} binary" + remove_if_exists "${ROOT}/target/${profile}/openshell-vm.runtime" "${profile} runtime bundle" + done +fi + +echo "" +if [ "$removed" -eq 0 ]; then + echo " Nothing to clean." +else + echo " Removed ${removed} item(s)." +fi +echo "" +echo "Next step: mise run vm:setup" diff --git a/tasks/scripts/vm/vm-setup.sh b/tasks/scripts/vm/vm-setup.sh new file mode 100755 index 000000000..16eb2aaa9 --- /dev/null +++ b/tasks/scripts/vm/vm-setup.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# One-time setup for the openshell-vm runtime. +# +# Downloads pre-built runtime artifacts (libkrun, libkrunfw, gvproxy) from the +# vm-dev GitHub Release, or builds them from source when --from-source is set. +# After obtaining the runtime, compresses the artifacts for embedding into the +# openshell-vm binary. +# +# Usage: +# ./vm-setup.sh # download pre-built (default, ~30s) +# ./vm-setup.sh --from-source # build from source (~15-45min) +# +# Environment: +# FROM_SOURCE=1 - Equivalent to --from-source + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/_lib.sh" +ROOT="$(vm_lib_root)" + +FROM_SOURCE="${FROM_SOURCE:-0}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --from-source) + FROM_SOURCE=1 + shift + ;; + --help|-h) + echo "Usage: $0 [--from-source]" + echo "" + echo "Set up the openshell-vm runtime (libkrun, libkrunfw, gvproxy)." + echo "" + echo "Options:" + echo " --from-source Build runtime from source instead of downloading (~15-45min)" + echo "" + echo "Environment:" + echo " FROM_SOURCE=1 Equivalent to --from-source" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + echo "Use --help for usage information" >&2 + exit 1 + ;; + esac +done + +PLATFORM="$(detect_platform)" +echo "==> openshell-vm setup" +echo " Platform: ${PLATFORM}" +echo " Mode: $([ "$FROM_SOURCE" = "1" ] && echo "build from source" || echo "download pre-built")" +echo "" + +# ── Obtain runtime artifacts ──────────────────────────────────────────── + +if [ "$FROM_SOURCE" = "1" ]; then + echo "==> Building runtime from source..." + echo "" + + case "$PLATFORM" in + darwin-aarch64) + # macOS: build custom libkrunfw (kernel) then portable libkrun + "${ROOT}/crates/openshell-vm/runtime/build-custom-libkrunfw.sh" + echo "" + "${ROOT}/tasks/scripts/vm/build-libkrun-macos.sh" + ;; + linux-*) + # Linux: build both libkrunfw and libkrun in one go + "${ROOT}/tasks/scripts/vm/build-libkrun.sh" + ;; + esac + echo "" + echo "==> Compressing runtime artifacts for embedding..." + "${ROOT}/tasks/scripts/vm/compress-vm-runtime.sh" +else + echo "==> Downloading pre-built runtime..." + "${ROOT}/tasks/scripts/vm/download-kernel-runtime.sh" +fi + +# ── Validate ──────────────────────────────────────────────────────────── + +OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}" + +# Check that we have the essential compressed artifacts +missing=0 +case "$PLATFORM" in + darwin-aarch64) + for f in libkrun.dylib.zst libkrunfw.5.dylib.zst gvproxy.zst; do + if [ ! -f "${OUTPUT_DIR}/${f}" ]; then + echo "ERROR: Missing ${OUTPUT_DIR}/${f}" >&2 + missing=1 + fi + done + ;; + linux-aarch64|linux-x86_64) + for f in libkrun.so.zst libkrunfw.so.5.zst gvproxy.zst; do + if [ ! -f "${OUTPUT_DIR}/${f}" ]; then + echo "ERROR: Missing ${OUTPUT_DIR}/${f}" >&2 + missing=1 + fi + done + ;; +esac + +if [ "$missing" -eq 1 ]; then + echo "" >&2 + echo "Setup failed: some runtime artifacts are missing." >&2 + exit 1 +fi + +echo "" +echo "==> Setup complete!" +echo " Compressed artifacts in: ${OUTPUT_DIR}" +echo "" +echo "Next steps:" +echo " mise run vm:rootfs --base # build rootfs (requires Docker)" +echo " mise run vm # build and run the VM" diff --git a/tasks/test.toml b/tasks/test.toml index c383eafb5..f24ea6f2b 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -17,7 +17,7 @@ depends = ["e2e:python:gpu"] ["test:rust"] description = "Run Rust tests" -run = "cargo test --workspace" +run = "cargo test --workspace --exclude openshell-vm" hide = true ["test:python"] @@ -47,3 +47,8 @@ description = "Run Python GPU e2e tests" depends = ["python:proto", "CLUSTER_GPU=1 cluster"] env = { UV_NO_SYNC = "1", PYTHONPATH = "python" } run = "uv run pytest -o python_files='test_*.py' -m gpu -n ${E2E_PARALLEL:-1} e2e/python" + +["e2e:vm"] +description = "Boot openshell-vm and run smoke e2e (macOS ARM64; pass -- --vm-port=N [--vm-name=NAME] to reuse)" +depends = ["build:docker:gateway", "vm:build"] +run = "e2e/rust/e2e-vm.sh" diff --git a/tasks/vm.toml b/tasks/vm.toml new file mode 100644 index 000000000..ca06b08c1 --- /dev/null +++ b/tasks/vm.toml @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# openshell-vm development tasks +# +# Workflow: +# mise run vm:setup # one-time: download pre-built runtime (~30s) +# mise run vm # build + run the VM +# mise run vm:clean # wipe everything and start over +# +# See crates/openshell-vm/README.md for full documentation. + +# ═══════════════════════════════════════════════════════════════════════════ +# Main Commands +# ═══════════════════════════════════════════════════════════════════════════ + +[vm] +description = "Build and run the openshell-vm microVM" +depends = ["build:docker:gateway"] +run = [ + "mise run vm:build", + "tasks/scripts/vm/run-vm.sh", +] + +["vm:build"] +description = "Build the openshell-vm binary with embedded runtime" +run = [ + "tasks/scripts/vm/compress-vm-runtime.sh", + "OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=$PWD/target/vm-runtime-compressed cargo build -p openshell-vm", + "tasks/scripts/vm/codesign-openshell-vm.sh", + "tasks/scripts/vm/bundle-vm-runtime.sh", +] + +["vm:setup"] +description = "One-time setup: download (or build) the VM runtime" +run = "tasks/scripts/vm/vm-setup.sh" + +["vm:rootfs"] +description = "Build the VM rootfs tarball (use -- --base for lightweight)" +run = "tasks/scripts/vm/build-rootfs-tarball.sh" + +["vm:clean"] +description = "Remove all VM cached artifacts (runtime, rootfs, builds)" +run = "tasks/scripts/vm/vm-clean.sh" diff --git a/uv.lock b/uv.lock index 38a03ce29..3869daf05 100644 --- a/uv.lock +++ b/uv.lock @@ -537,6 +537,7 @@ dependencies = [ dev = [ { name = "grpcio-tools" }, { name = "maturin" }, + { name = "pyelftools" }, { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, @@ -566,6 +567,7 @@ requires-dist = [ dev = [ { name = "grpcio-tools", specifier = ">=1.60" }, { name = "maturin", specifier = ">=1.5,<2.0" }, + { name = "pyelftools", specifier = ">=0.30" }, { name = "pytest", specifier = ">=8.0" }, { name = "pytest-asyncio", specifier = ">=0.23" }, { name = "pytest-cov", specifier = ">=4.0" }, @@ -635,6 +637,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264, upload-time = "2024-12-17T10:53:35.645Z" }, ] +[[package]] +name = "pyelftools" +version = "0.32" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b9/ab/33968940b2deb3d92f5b146bc6d4009a5f95d1d06c148ea2f9ee965071af/pyelftools-0.32.tar.gz", hash = "sha256:6de90ee7b8263e740c8715a925382d4099b354f29ac48ea40d840cf7aa14ace5", size = 15047199, upload-time = "2025-02-19T14:20:05.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/af/43/700932c4f0638c3421177144a2e86448c0d75dbaee2c7936bda3f9fd0878/pyelftools-0.32-py3-none-any.whl", hash = "sha256:013df952a006db5e138b1edf6d8a68ecc50630adbd0d83a2d41e7f846163d738", size = 188525, upload-time = "2025-02-19T14:19:59.919Z" }, +] + [[package]] name = "pygments" version = "2.20.0"