diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml
new file mode 100644
index 000000000..d08a208bd
--- /dev/null
+++ b/.github/workflows/release-vm-dev.yml
@@ -0,0 +1,518 @@
+name: Release VM Dev
+
+# Build openshell-vm binaries for all supported platforms and upload them to
+# the rolling "vm-dev" GitHub Release.  Each binary is self-extracting: it
+# embeds pre-built kernel runtime artifacts (from release-vm-kernel.yml) and a
+# base rootfs tarball.
+#
+# Prerequisites: the vm-dev release must already contain kernel runtime
+# tarballs.  Run the "Release VM Kernel" workflow first if they are missing.
+
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  packages: read
+
+# Serialize with release-vm-kernel.yml — both update the vm-dev release.
+concurrency:
+  group: vm-dev-release
+  cancel-in-progress: false
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  # ---------------------------------------------------------------------------
+  # Compute versions (reuse the same logic as release-dev.yml)
+  # ---------------------------------------------------------------------------
+  compute-versions:
+    name: Compute Versions
+    runs-on: build-amd64
+    timeout-minutes: 5
+    container:
+      image: ghcr.io/nvidia/openshell/ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    outputs:
+      cargo_version: ${{ steps.v.outputs.cargo }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Fetch tags
+        run: git fetch --tags --force
+
+      - name: Compute versions
+        id: v
+        run: |
+          set -euo pipefail
+          echo "cargo=$(uv run python tasks/scripts/release.py get-version --cargo)" >> "$GITHUB_OUTPUT"
+
+  # ---------------------------------------------------------------------------
+  # Download kernel runtime tarballs from the vm-dev release
+  # ---------------------------------------------------------------------------
+  download-kernel-runtime:
+    name: Download Kernel Runtime
+    runs-on: build-amd64
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download all runtime tarballs
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          mkdir -p runtime-artifacts
+
+          for platform in linux-aarch64 linux-x86_64 darwin-aarch64; do
+            echo "Downloading vm-runtime-${platform}.tar.zst..."
+            gh release download vm-dev \
+              --repo "${GITHUB_REPOSITORY}" \
+              --pattern "vm-runtime-${platform}.tar.zst" \
+              --dir runtime-artifacts \
+              --clobber
+          done
+
+          echo "Downloaded runtime artifacts:"
+          ls -lah runtime-artifacts/
+
+      - name: Verify downloads
+        run: |
+          set -euo pipefail
+          for platform in linux-aarch64 linux-x86_64 darwin-aarch64; do
+            file="runtime-artifacts/vm-runtime-${platform}.tar.zst"
+            if [ ! -f "$file" ]; then
+              echo "ERROR: Missing ${file}" >&2
+              echo "" >&2
+              echo "The vm-dev release does not have kernel runtime artifacts." >&2
+              echo "Run the 'Release VM Kernel' workflow first:" >&2
+              echo "  gh workflow run release-vm-kernel.yml" >&2
+              exit 1
+            fi
+            echo "OK: ${file} ($(du -sh "$file" | cut -f1))"
+          done
+
+      - name: Upload as workflow artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: kernel-runtime-tarballs
+          path: runtime-artifacts/vm-runtime-*.tar.zst
+          retention-days: 1
+
+  # ---------------------------------------------------------------------------
+  # Build base rootfs tarballs (architecture-specific)
+  # ---------------------------------------------------------------------------
+  build-rootfs:
+    name: Build Rootfs (${{ matrix.arch }})
+    needs: [compute-versions]
+    strategy:
+      matrix:
+        include:
+          - arch: arm64
+            runner: build-arm64
+            guest_arch: aarch64
+          - arch: amd64
+            runner: build-amd64
+            guest_arch: x86_64
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 30
+    container:
+      image: ghcr.io/nvidia/openshell/ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --privileged
+      volumes:
+        - /var/run/docker.sock:/var/run/docker.sock
+    env:
+      MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      OPENSHELL_IMAGE_TAG: dev
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Log in to GHCR
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
+
+      - name: Install tools
+        run: mise install
+
+      - name: Build base rootfs tarball
+        run: |
+          set -euo pipefail
+          crates/openshell-vm/scripts/build-rootfs.sh \
+            --base \
+            --arch ${{ matrix.guest_arch }} \
+            target/rootfs-build
+
+          mkdir -p target/vm-runtime-compressed
+          tar -C target/rootfs-build -cf - . \
+            | zstd -19 -T0 -o target/vm-runtime-compressed/rootfs.tar.zst
+
+          echo "Rootfs tarball: $(du -sh target/vm-runtime-compressed/rootfs.tar.zst | cut -f1)"
+
+      - name: Upload rootfs artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: rootfs-${{ matrix.arch }}
+          path: target/vm-runtime-compressed/rootfs.tar.zst
+          retention-days: 1
+
+  # ---------------------------------------------------------------------------
+  # Build openshell-vm binary (Linux — native on each arch)
+  # ---------------------------------------------------------------------------
+  build-vm-linux:
+    name: Build VM (Linux ${{ matrix.arch }})
+    needs: [compute-versions, download-kernel-runtime, build-rootfs]
+    strategy:
+      matrix:
+        include:
+          - arch: arm64
+            runner: build-arm64
+            target: aarch64-unknown-linux-gnu
+            platform: linux-aarch64
+            guest_arch: aarch64
+          - arch: amd64
+            runner: build-amd64
+            target: x86_64-unknown-linux-gnu
+            platform: linux-x86_64
+            guest_arch: x86_64
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 30
+    container:
+      image: ghcr.io/nvidia/openshell/ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --privileged
+    env:
+      MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }}
+      OPENSHELL_IMAGE_TAG: dev
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Fetch tags
+        run: git fetch --tags --force
+
+      - name: Install tools
+        run: mise install
+
+      - name: Cache Rust target and registry
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
+        with:
+          shared-key: vm-linux-${{ matrix.arch }}
+          cache-directories: .cache/sccache
+          cache-targets: "true"
+
+      - name: Download kernel runtime tarball
+        uses: actions/download-artifact@v4
+        with:
+          name: kernel-runtime-tarballs
+          path: runtime-download/
+
+      - name: Download rootfs tarball
+        uses: actions/download-artifact@v4
+        with:
+          name: rootfs-${{ matrix.arch }}
+          path: rootfs-download/
+
+      - name: Stage compressed runtime for embedding
+        run: |
+          set -euo pipefail
+          COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed"
+          mkdir -p "$COMPRESSED_DIR"
+
+          # Extract kernel runtime tarball and re-compress individual files
+          EXTRACT_DIR=$(mktemp -d)
+          zstd -d "runtime-download/vm-runtime-${{ matrix.platform }}.tar.zst" --stdout \
+            | tar -xf - -C "$EXTRACT_DIR"
+
+          echo "Extracted runtime files:"
+          ls -lah "$EXTRACT_DIR"
+
+          for file in "$EXTRACT_DIR"/*; do
+            [ -f "$file" ] || continue
+            name=$(basename "$file")
+            [ "$name" = "provenance.json" ] && continue
+            zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file"
+          done
+
+          # Copy rootfs tarball (already zstd-compressed)
+          cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst"
+
+          echo "Staged compressed artifacts:"
+          ls -lah "$COMPRESSED_DIR"
+
+      - name: Scope workspace to VM crates
+        run: |
+          set -euo pipefail
+          sed -i 's|members = \["crates/\*"\]|members = ["crates/openshell-vm", "crates/openshell-core", "crates/openshell-bootstrap", "crates/openshell-policy"]|' Cargo.toml
+
+      - name: Patch workspace version
+        if: needs.compute-versions.outputs.cargo_version != ''
+        run: |
+          set -euo pipefail
+          sed -i -E '/^\[workspace\.package\]/,/^\[/{s/^version[[:space:]]*=[[:space:]]*".*"/version = "'"${{ needs.compute-versions.outputs.cargo_version }}"'"/}' Cargo.toml
+
+      - name: Build openshell-vm
+        run: |
+          set -euo pipefail
+          OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed" \
+            mise x -- cargo build --release -p openshell-vm
+
+      - name: sccache stats
+        if: always()
+        run: mise x -- sccache --show-stats
+
+      - name: Package binary
+        run: |
+          set -euo pipefail
+          mkdir -p artifacts
+          tar -czf "artifacts/openshell-vm-${{ matrix.target }}.tar.gz" \
+            -C target/release openshell-vm
+          ls -lh artifacts/
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: vm-linux-${{ matrix.arch }}
+          path: artifacts/*.tar.gz
+          retention-days: 5
+
+  # ---------------------------------------------------------------------------
+  # Build openshell-vm binary (macOS ARM64 via osxcross)
+  # ---------------------------------------------------------------------------
+  build-vm-macos:
+    name: Build VM (macOS)
+    needs: [compute-versions, download-kernel-runtime, build-rootfs]
+    runs-on: build-amd64
+    timeout-minutes: 60
+    container:
+      image: ghcr.io/nvidia/openshell/ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --privileged
+      volumes:
+        - /var/run/docker.sock:/var/run/docker.sock
+    env:
+      MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Fetch tags
+        run: git fetch --tags --force
+
+      - name: Log in to GHCR
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
+
+      - name: Set up Docker Buildx
+        uses: ./.github/actions/setup-buildx
+
+      - name: Download kernel runtime tarball
+        uses: actions/download-artifact@v4
+        with:
+          name: kernel-runtime-tarballs
+          path: runtime-download/
+
+      - name: Download rootfs tarball (arm64)
+        uses: actions/download-artifact@v4
+        with:
+          name: rootfs-arm64
+          path: rootfs-download/
+
+      - name: Prepare compressed runtime directory
+        run: |
+          set -euo pipefail
+          COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed-macos"
+          mkdir -p "$COMPRESSED_DIR"
+
+          # Extract the darwin runtime tarball and re-compress for embedding.
+          # The macOS embedded.rs expects: libkrun.dylib.zst, libkrunfw.5.dylib.zst, gvproxy.zst
+          EXTRACT_DIR=$(mktemp -d)
+          zstd -d "runtime-download/vm-runtime-darwin-aarch64.tar.zst" --stdout \
+            | tar -xf - -C "$EXTRACT_DIR"
+
+          echo "Extracted darwin runtime files:"
+          ls -lah "$EXTRACT_DIR"
+
+          for file in "$EXTRACT_DIR"/*; do
+            [ -f "$file" ] || continue
+            name=$(basename "$file")
+            [ "$name" = "provenance.json" ] && continue
+            zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file"
+          done
+
+          # The macOS VM guest is always Linux ARM64, so use the arm64 rootfs
+          cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst"
+
+          echo "Staged macOS compressed artifacts:"
+          ls -lah "$COMPRESSED_DIR"
+
+      - name: Build macOS binary via Docker (osxcross)
+        run: |
+          set -euo pipefail
+          docker buildx build \
+            --file deploy/docker/Dockerfile.vm-macos \
+            --build-arg OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" \
+            --build-arg OPENSHELL_IMAGE_TAG=dev \
+            --build-arg CARGO_TARGET_CACHE_SCOPE="${{ github.sha }}" \
+            --build-context vm-runtime-compressed="${PWD}/target/vm-runtime-compressed-macos" \
+            --target binary \
+            --output type=local,dest=out/ \
+            .
+
+      - name: Package binary
+        run: |
+          set -euo pipefail
+          mkdir -p artifacts
+          tar -czf artifacts/openshell-vm-aarch64-apple-darwin.tar.gz \
+            -C out openshell-vm
+          ls -lh artifacts/
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: vm-macos
+          path: artifacts/*.tar.gz
+          retention-days: 5
+
+  # ---------------------------------------------------------------------------
+  # Upload all VM binaries to the vm-dev rolling release
+  # ---------------------------------------------------------------------------
+  release-vm-dev:
+    name: Release VM Dev
+    needs: [build-vm-linux, build-vm-macos]
+    runs-on: build-amd64
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download all VM binary artifacts
+        uses: actions/download-artifact@v4
+        with:
+          pattern: vm-*
+          path: release/
+          merge-multiple: true
+
+      - name: Filter to only binary tarballs
+        run: |
+          set -euo pipefail
+          mkdir -p release-final
+          # Only include the openshell-vm binary tarballs, not kernel runtime
+          cp release/openshell-vm-*.tar.gz release-final/
+          count=$(ls release-final/openshell-vm-*.tar.gz 2>/dev/null | wc -l)
+          if [ "$count" -eq 0 ]; then
+            echo "ERROR: No VM binary tarballs found in release/" >&2
+            exit 1
+          fi
+          echo "Release artifacts (${count} binaries):"
+          ls -lh release-final/
+
+      - name: Generate checksums
+        run: |
+          set -euo pipefail
+          cd release-final
+          sha256sum openshell-vm-*.tar.gz > vm-binary-checksums-sha256.txt
+          cat vm-binary-checksums-sha256.txt
+
+      - name: Ensure vm-dev tag exists
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git tag -fa vm-dev -m "VM Development Build" "${GITHUB_SHA}"
+          git push --force origin vm-dev
+
+      - name: Prune stale VM binary assets from vm-dev release
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/');
+            let release;
+            try {
+              release = await github.rest.repos.getReleaseByTag({ owner, repo, tag: 'vm-dev' });
+            } catch (err) {
+              if (err.status === 404) {
+                core.info('No existing vm-dev release; will create fresh.');
+                return;
+              }
+              throw err;
+            }
+            // Delete old VM binary assets (keep kernel runtime assets)
+            for (const asset of release.data.assets) {
+              if (asset.name.startsWith('openshell-vm-') || asset.name === 'vm-binary-checksums-sha256.txt') {
+                core.info(`Deleting stale asset: ${asset.name}`);
+                await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id });
+              }
+            }
+
+      - name: Upload to vm-dev GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          name: OpenShell VM Development Build
+          prerelease: true
+          tag_name: vm-dev
+          target_commitish: ${{ github.sha }}
+          body: |
+            Rolling development build of **openshell-vm** — the MicroVM runtime for OpenShell.
+
+            > **NOTE**: This is a development build, not a tagged release, and may be unstable.
+
+            ### Kernel Runtime Artifacts
+
+            Pre-built kernel runtime (libkrunfw + libkrun + gvproxy) for embedding into
+            the openshell-vm binary. These are rebuilt when the kernel config or pinned
+            dependency versions change.
+
+            | Platform | Artifact |
+            |----------|----------|
+            | Linux ARM64 | `vm-runtime-linux-aarch64.tar.zst` |
+            | Linux x86_64 | `vm-runtime-linux-x86_64.tar.zst` |
+            | macOS ARM64 | `vm-runtime-darwin-aarch64.tar.zst` |
+
+            ### VM Binaries
+
+            Self-extracting openshell-vm binaries with embedded kernel runtime and base
+            rootfs. These are rebuilt on every push to main.
+
+            | Platform | Artifact |
+            |----------|----------|
+            | Linux ARM64 | `openshell-vm-aarch64-unknown-linux-gnu.tar.gz` |
+            | Linux x86_64 | `openshell-vm-x86_64-unknown-linux-gnu.tar.gz` |
+            | macOS ARM64 | `openshell-vm-aarch64-apple-darwin.tar.gz` |
+
+            **macOS users:** The binary must be codesigned with the Hypervisor entitlement:
+            ```bash
+            codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm
+            ```
+
+          files: |
+            release-final/openshell-vm-aarch64-unknown-linux-gnu.tar.gz
+            release-final/openshell-vm-x86_64-unknown-linux-gnu.tar.gz
+            release-final/openshell-vm-aarch64-apple-darwin.tar.gz
+            release-final/vm-binary-checksums-sha256.txt
diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml
new file mode 100644
index 000000000..d461cd166
--- /dev/null
+++ b/.github/workflows/release-vm-kernel.yml
@@ -0,0 +1,247 @@
+name: Release VM Kernel
+
+# Build custom libkrunfw (kernel firmware) + libkrun (VMM) + gvproxy for all
+# supported openshell-vm platforms.  Artifacts are uploaded to the rolling
+# "vm-dev" GitHub Release and consumed by release-vm-dev.yml when building the
+# openshell-vm binary.
+#
+# This workflow runs on-demand (or when kernel config / pins change).  It is
+# intentionally decoupled from the per-commit VM binary build because the
+# kernel rarely changes and takes 15-45 minutes to compile.
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: write
+
+# Serialize with release-vm-dev.yml — both update the vm-dev release.
+concurrency:
+  group: vm-dev-release
+  cancel-in-progress: false
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  # ---------------------------------------------------------------------------
+  # Linux ARM64 — native kernel + libkrun build
+  # ---------------------------------------------------------------------------
+  build-runtime-linux-arm64:
+    name: Build Runtime (Linux ARM64)
+    runs-on: build-arm64
+    timeout-minutes: 60
+    container:
+      image: ghcr.io/nvidia/openshell/ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --privileged
+    env:
+      MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Build libkrunfw + libkrun from source
+        run: tasks/scripts/vm/build-libkrun.sh
+
+      - name: Package runtime tarball
+        run: |
+          tasks/scripts/vm/package-vm-runtime.sh \
+            --platform linux-aarch64 \
+            --build-dir target/libkrun-build \
+            --output artifacts/vm-runtime-linux-aarch64.tar.zst
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: vm-runtime-linux-arm64
+          path: artifacts/vm-runtime-linux-aarch64.tar.zst
+          retention-days: 5
+
+  # ---------------------------------------------------------------------------
+  # Linux AMD64 — native kernel + libkrun build
+  # ---------------------------------------------------------------------------
+  build-runtime-linux-amd64:
+    name: Build Runtime (Linux AMD64)
+    runs-on: build-amd64
+    timeout-minutes: 60
+    container:
+      image: ghcr.io/nvidia/openshell/ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --privileged
+    env:
+      MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Build libkrunfw + libkrun from source
+        run: tasks/scripts/vm/build-libkrun.sh
+
+      - name: Package runtime tarball
+        run: |
+          tasks/scripts/vm/package-vm-runtime.sh \
+            --platform linux-x86_64 \
+            --build-dir target/libkrun-build \
+            --output artifacts/vm-runtime-linux-x86_64.tar.zst
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: vm-runtime-linux-amd64
+          path: artifacts/vm-runtime-linux-x86_64.tar.zst
+          retention-days: 5
+
+  # ---------------------------------------------------------------------------
+  # macOS ARM64 — kernel built via krunvm, libkrun built natively
+  # ---------------------------------------------------------------------------
+  build-runtime-macos-arm64:
+    name: Build Runtime (macOS ARM64)
+    runs-on: macos-latest-xlarge
+    timeout-minutes: 90
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          set -euo pipefail
+          brew install rust lld dtc xz
+          # libkrunfw from Homebrew (used as a fallback/reference by build scripts)
+          brew install libkrunfw
+          # krunvm is needed to build the Linux kernel inside a Fedora VM
+          brew tap slp/krun
+          brew install krunvm
+
+      - name: Build custom libkrunfw (kernel)
+        run: crates/openshell-vm/runtime/build-custom-libkrunfw.sh
+
+      - name: Build portable libkrun
+        run: tasks/scripts/vm/build-libkrun-macos.sh
+
+      - name: Package runtime tarball
+        env:
+          CUSTOM_PROVENANCE_DIR: target/custom-runtime
+        run: |
+          tasks/scripts/vm/package-vm-runtime.sh \
+            --platform darwin-aarch64 \
+            --build-dir target/libkrun-build \
+            --output artifacts/vm-runtime-darwin-aarch64.tar.zst
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: vm-runtime-macos-arm64
+          path: artifacts/vm-runtime-darwin-aarch64.tar.zst
+          retention-days: 5
+
+  # ---------------------------------------------------------------------------
+  # Upload all runtime tarballs to the vm-dev rolling release
+  # ---------------------------------------------------------------------------
+  release-kernel:
+    name: Release Kernel Runtime
+    needs: [build-runtime-linux-arm64, build-runtime-linux-amd64, build-runtime-macos-arm64]
+    runs-on: build-amd64
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download all runtime artifacts
+        uses: actions/download-artifact@v4
+        with:
+          pattern: vm-runtime-*
+          path: release/
+          merge-multiple: true
+
+      - name: Generate checksums
+        run: |
+          set -euo pipefail
+          cd release
+          sha256sum vm-runtime-*.tar.zst > vm-runtime-checksums-sha256.txt
+          cat vm-runtime-checksums-sha256.txt
+
+      - name: Ensure vm-dev tag exists
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git tag -fa vm-dev -m "VM Development Build" "${GITHUB_SHA}"
+          git push --force origin vm-dev
+
+      - name: Prune stale runtime assets from vm-dev release
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/');
+            let release;
+            try {
+              release = await github.rest.repos.getReleaseByTag({ owner, repo, tag: 'vm-dev' });
+            } catch (err) {
+              if (err.status === 404) {
+                core.info('No existing vm-dev release; will create fresh.');
+                return;
+              }
+              throw err;
+            }
+            // Delete old runtime tarballs and checksums (keep vm binary assets)
+            for (const asset of release.data.assets) {
+              if (asset.name.startsWith('vm-runtime-')) {
+                core.info(`Deleting stale asset: ${asset.name}`);
+                await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id });
+              }
+            }
+
+      - name: Create / update vm-dev GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          name: OpenShell VM Development Build
+          prerelease: true
+          tag_name: vm-dev
+          target_commitish: ${{ github.sha }}
+          body: |
+            Rolling development build of **openshell-vm** — the MicroVM runtime for OpenShell.
+
+            > **NOTE**: This is a development build, not a tagged release, and may be unstable.
+            > The VM implementation itself is also experimental and may change or break without
+            > notice.
+
+            ### Kernel Runtime Artifacts
+
+            Pre-built kernel runtime (libkrunfw + libkrun + gvproxy) for embedding into
+            the openshell-vm binary. These are rebuilt when the kernel config or pinned
+            dependency versions change.
+
+            | Platform | Artifact |
+            |----------|----------|
+            | Linux ARM64 | `vm-runtime-linux-aarch64.tar.zst` |
+            | Linux x86_64 | `vm-runtime-linux-x86_64.tar.zst` |
+            | macOS ARM64 | `vm-runtime-darwin-aarch64.tar.zst` |
+
+            ### VM Binaries
+
+            Self-extracting openshell-vm binaries with embedded kernel runtime and base
+            rootfs. These are rebuilt on every push to main.
+
+            | Platform | Artifact |
+            |----------|----------|
+            | Linux ARM64 | `openshell-vm-aarch64-unknown-linux-gnu.tar.gz` |
+            | Linux x86_64 | `openshell-vm-x86_64-unknown-linux-gnu.tar.gz` |
+            | macOS ARM64 | `openshell-vm-aarch64-apple-darwin.tar.gz` |
+
+            **macOS users:** The binary must be codesigned with the Hypervisor entitlement:
+            ```bash
+            codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm
+            ```
+
+          files: |
+            release/vm-runtime-linux-aarch64.tar.zst
+            release/vm-runtime-linux-x86_64.tar.zst
+            release/vm-runtime-darwin-aarch64.tar.zst
+            release/vm-runtime-checksums-sha256.txt
diff --git a/.gitignore b/.gitignore
index 32610f714..145c30695 100644
--- a/.gitignore
+++ b/.gitignore
@@ -181,6 +181,9 @@ kubeconfig
 # Documentation build output
 _build/
 
+# Gateway microVM rootfs build artifacts
+rootfs/
+
 # Docker build artifacts (image tarballs, packaged helm charts)
 deploy/docker/.build/
 
diff --git a/AGENTS.md b/AGENTS.md
index 79dc29d1b..0972d1d6a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -38,6 +38,7 @@ These pipelines connect skills into end-to-end workflows. Individual skill files
 | `crates/openshell-core/` | Shared core | Common types, configuration, error handling |
 | `crates/openshell-providers/` | Provider management | Credential provider backends |
 | `crates/openshell-tui/` | Terminal UI | Ratatui-based dashboard for monitoring |
+| `crates/openshell-vm/` | MicroVM runtime | Experimental, work-in-progress libkrun-based VM execution |
 | `python/openshell/` | Python SDK | Python bindings and CLI packaging |
 | `proto/` | Protobuf definitions | gRPC service contracts |
 | `deploy/` | Docker, Helm, K8s | Dockerfiles, Helm chart, manifests |
diff --git a/Cargo.lock b/Cargo.lock
index 852d97a0c..dfc75fdc0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -106,9 +106,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
 
 [[package]]
 name = "anstyle-parse"
@@ -488,9 +488,9 @@ dependencies = [
 
 [[package]]
 name = "bollard"
-version = "0.20.1"
+version = "0.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "227aa051deec8d16bd9c34605e7aaf153f240e35483dd42f6f78903847934738"
+checksum = "ee04c4c84f1f811b017f2fbb7dd8815c976e7ca98593de9c1e2afad0f636bff4"
 dependencies = [
  "base64 0.22.1",
  "bollard-stubs",
@@ -584,9 +584,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.56"
+version = "1.2.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
+checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
 dependencies = [
  "find-msvc-tools",
  "jobserver",
@@ -710,9 +710,9 @@ checksum = "5417da527aa9bf6a1e10a781231effd1edd3ee82f27d5f8529ac9b279babce96"
 
 [[package]]
 name = "colorchoice"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
 
 [[package]]
 name = "compact_str"
@@ -1125,9 +1125,9 @@ dependencies = [
 
 [[package]]
 name = "digest"
-version = "0.11.1"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "285743a676ccb6b3e116bc14cc69319b957867930ae9c4822f8e0f54509d7243"
+checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c"
 dependencies = [
  "block-buffer 0.12.0",
  "const-oid 0.10.2",
@@ -2112,7 +2112,7 @@ checksum = "fe44f2bbd99fcb302e246e2d6bcf51aeda346d02a365f80296a07a8c711b6da6"
 dependencies = [
  "argon2",
  "bcrypt-pbkdf",
- "digest 0.11.1",
+ "digest 0.11.2",
  "ecdsa",
  "ed25519-dalek",
  "hex",
@@ -2143,9 +2143,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
 
 [[package]]
 name = "iri-string"
-version = "0.7.10"
+version = "0.7.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb"
 dependencies = [
  "memchr",
  "serde",
@@ -2201,9 +2201,9 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.17"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
 
 [[package]]
 name = "jobserver"
@@ -2468,6 +2468,16 @@ dependencies = [
  "rand 0.9.2",
 ]
 
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link",
+]
+
 [[package]]
 name = "libm"
 version = "0.2.16"
@@ -2476,9 +2486,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
 
 [[package]]
 name = "libredox"
-version = "0.1.14"
+version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
+checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
 dependencies = [
  "bitflags",
  "libc",
@@ -2734,9 +2744,9 @@ dependencies = [
 
 [[package]]
 name = "num-conv"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"
 
 [[package]]
 name = "num-integer"
@@ -3060,6 +3070,33 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "openshell-vm"
+version = "0.0.0"
+dependencies = [
+ "base64 0.22.1",
+ "clap",
+ "indicatif",
+ "libc",
+ "libloading",
+ "miette",
+ "nix",
+ "openshell-bootstrap",
+ "openshell-core",
+ "rustls",
+ "rustls-pemfile",
+ "serde",
+ "serde_json",
+ "tar",
+ "thiserror 2.0.18",
+ "tokio",
+ "tokio-rustls",
+ "tonic",
+ "tracing",
+ "tracing-subscriber",
+ "zstd",
+]
+
 [[package]]
 name = "openssh"
 version = "0.11.6"
@@ -3913,7 +3950,7 @@ dependencies = [
  "const-oid 0.10.2",
  "crypto-bigint 0.7.0-rc.18",
  "crypto-primes",
- "digest 0.11.1",
+ "digest 0.11.2",
  "pkcs1 0.8.0-rc.4",
  "pkcs8 0.11.0-rc.11",
  "rand_core 0.10.0-rc-3",
@@ -4105,9 +4142,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.9"
+version = "0.103.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
+checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
 dependencies = [
  "ring",
  "rustls-pki-types",
@@ -4402,7 +4439,7 @@ checksum = "3b167252f3c126be0d8926639c4c4706950f01445900c4b3db0fd7e89fcb750a"
 dependencies = [
  "cfg-if",
  "cpufeatures",
- "digest 0.11.1",
+ "digest 0.11.2",
 ]
 
 [[package]]
@@ -4424,7 +4461,7 @@ checksum = "7c5f3b1e2dc8aad28310d8410bd4d7e180eca65fca176c52ab00d364475d0024"
 dependencies = [
  "cfg-if",
  "cpufeatures",
- "digest 0.11.1",
+ "digest 0.11.2",
 ]
 
 [[package]]
@@ -4502,7 +4539,7 @@ version = "3.0.0-rc.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "597a96996ccff7dfa16f052bd995b4cecc72af22c35138738dc029f0ead6608d"
 dependencies = [
- "digest 0.11.1",
+ "digest 0.11.2",
  "rand_core 0.10.0-rc-3",
 ]
 
@@ -4986,12 +5023,12 @@ dependencies = [
 
 [[package]]
 name = "terminal_size"
-version = "0.4.3"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0"
+checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874"
 dependencies = [
  "rustix 1.1.4",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -5096,9 +5133,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -5422,9 +5459,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.22"
+version = "0.3.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
+checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
 dependencies = [
  "matchers",
  "nu-ansi-term",
@@ -5530,9 +5567,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
 
 [[package]]
 name = "unicode-segmentation"
-version = "1.12.0"
+version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b"
 
 [[package]]
 name = "unicode-truncate"
@@ -6360,18 +6397,18 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.42"
+version = "0.8.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3"
+checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.42"
+version = "0.8.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f"
+checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6457,3 +6494,31 @@ name = "zmij"
 version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
+[[package]]
+name = "zstd"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "7.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
+dependencies = [
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.16+zstd.1.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md
new file mode 100644
index 000000000..c2e9b57b7
--- /dev/null
+++ b/architecture/custom-vm-runtime.md
@@ -0,0 +1,269 @@
+# Custom libkrunfw VM Runtime
+
+> Status: Experimental and work in progress (WIP). VM support is under active development and may change.
+
+## Overview
+
+The OpenShell gateway VM uses [libkrun](https://github.com/containers/libkrun) to boot a
+lightweight microVM with Apple Hypervisor.framework (macOS) or KVM (Linux). The kernel
+is embedded inside `libkrunfw`, a companion library that packages a pre-built Linux kernel.
+
+The stock `libkrunfw` from Homebrew ships a minimal kernel without bridge, netfilter, or
+conntrack support. This is insufficient for Kubernetes pod networking.
+
+The custom libkrunfw runtime adds bridge CNI, iptables/nftables, and conntrack support to
+the VM kernel, enabling standard Kubernetes networking.
+
+## Architecture
+
+```mermaid
+graph TD
+    subgraph Host["Host (macOS / Linux)"]
+        BIN[openshell-vm binary]
+        EMB["Embedded runtime (zstd-compressed)\nlibkrun · libkrunfw · gvproxy"]
+        CACHE["~/.local/share/openshell/vm-runtime/{version}/"]
+        PROV[Runtime provenance logging]
+        GVP[gvproxy networking proxy]
+
+        BIN --> EMB
+        BIN -->|extracts to| CACHE
+        BIN --> PROV
+        BIN -->|spawns| GVP
+    end
+
+    subgraph Guest["Guest VM"]
+        INIT["openshell-vm-init.sh (PID 1)"]
+        VAL[Validates kernel capabilities]
+        CNI[Configures bridge CNI]
+        EXECA["Starts exec agent\nvsock port 10777"]
+        PKI[Generates mTLS PKI]
+        K3S[Execs k3s server]
+        EXECPY["openshell-vm-exec-agent.py"]
+        CHK["check-vm-capabilities.sh"]
+
+        INIT --> VAL --> CNI --> EXECA --> PKI --> K3S
+    end
+
+    BIN -- "fork + krun_start_enter" --> INIT
+    GVP -- "virtio-net" --> Guest
+```
+
+## Embedded Runtime
+
+The openshell-vm binary is fully self-contained, embedding both the VM runtime libraries
+and a minimal rootfs as zstd-compressed byte arrays. On first use, the binary extracts
+these to XDG cache directories with progress bars:
+
+```
+~/.local/share/openshell/vm-runtime/{version}/
+├── libkrun.{dylib,so}
+├── libkrunfw.{5.dylib,so.5}
+└── gvproxy
+
+~/.local/share/openshell/openshell-vm/{version}/instances/<name>/rootfs/
+├── usr/local/bin/k3s
+├── opt/openshell/bin/openshell-sandbox
+├── opt/openshell/manifests/
+└── ...
+```
+
+This eliminates the need for separate bundles or downloads - a single ~120MB binary
+provides everything needed to run the VM. Old cache versions are automatically
+cleaned up when a new version is extracted.
+
+### Hybrid Approach
+
+The embedded rootfs uses a "minimal" configuration:
+- Includes: Base Ubuntu, k3s binary, supervisor binary, helm charts, manifests
+- Excludes: Pre-loaded container images (~1GB savings)
+
+Container images are pulled on demand when sandboxes are created. First boot takes
+~30-60s as k3s initializes; subsequent boots use cached state for ~3-5s startup.
+
+For fully air-gapped environments requiring pre-loaded images, build with:
+```bash
+mise run vm:rootfs                 # Full rootfs (~2GB, includes images)
+mise run vm:build                  # Rebuild binary with full rootfs
+```
+
+## Network Profile
+
+The VM uses the bridge CNI profile, which requires a custom libkrunfw with bridge and
+netfilter kernel support. The init script validates these capabilities at boot and fails
+fast with an actionable error if they are missing.
+
+### Bridge Profile
+
+- CNI: bridge plugin with `cni0` interface
+- IP masquerade: enabled (iptables-legacy via CNI bridge plugin)
+- kube-proxy: enabled (nftables mode)
+- Service VIPs: functional (ClusterIP, NodePort)
+- hostNetwork workarounds: not required
+
+## Runtime Provenance
+
+At boot, the openshell-vm binary logs provenance metadata about the loaded runtime bundle:
+
+- Library paths and SHA-256 hashes
+- Whether the runtime is custom-built or stock
+- For custom runtimes: libkrunfw commit, kernel version, build timestamp
+
+This information is sourced from `provenance.json` (generated by the build script)
+and makes it straightforward to correlate VM behavior with a specific runtime artifact.
+
+## Build Pipeline
+
+```mermaid
+graph LR
+    subgraph Source["crates/openshell-vm/runtime/"]
+        BUILD["build-custom-libkrunfw.sh\nClones libkrunfw, applies config, builds"]
+        KCONF["kernel/openshell.kconfig\nKernel config fragment"]
+        README["README.md\nOperator documentation"]
+    end
+
+    subgraph Output["target/custom-runtime/"]
+        LIB["libkrunfw.dylib\nCustom library"]
+        META["provenance.json\nBuild metadata"]
+        FRAG["openshell.kconfig\nConfig fragment used"]
+        FULL["kernel.config\nFull kernel .config"]
+    end
+
+    KCONF --> BUILD
+    BUILD --> LIB
+    BUILD --> META
+    BUILD --> FRAG
+    BUILD --> FULL
+```
+
+## Kernel Config Fragment
+
+The `openshell.kconfig` fragment enables these kernel features on top of the stock
+libkrunfw kernel:
+
+| Feature | Key Configs | Purpose |
+|---------|-------------|---------|
+| Network namespaces | `CONFIG_NET_NS`, `CONFIG_NAMESPACES` | Pod isolation |
+| veth | `CONFIG_VETH` | Pod network namespace pairs |
+| Bridge device | `CONFIG_BRIDGE`, `CONFIG_BRIDGE_NETFILTER` | cni0 bridge for pod networking, kube-proxy bridge traffic visibility |
+| Netfilter framework | `CONFIG_NETFILTER`, `CONFIG_NETFILTER_ADVANCED`, `CONFIG_NETFILTER_XTABLES` | iptables/nftables framework |
+| xtables match modules | `CONFIG_NETFILTER_XT_MATCH_CONNTRACK`, `_COMMENT`, `_MULTIPORT`, `_MARK`, `_STATISTIC`, `_ADDRTYPE`, `_RECENT`, `_LIMIT` | kube-proxy and kubelet iptables rules |
+| Connection tracking | `CONFIG_NF_CONNTRACK`, `CONFIG_NF_CT_NETLINK` | NAT state tracking |
+| NAT | `CONFIG_NF_NAT` | Service VIP DNAT/SNAT |
+| iptables | `CONFIG_IP_NF_IPTABLES`, `CONFIG_IP_NF_FILTER`, `CONFIG_IP_NF_NAT`, `CONFIG_IP_NF_MANGLE` | CNI bridge masquerade and compat |
+| nftables | `CONFIG_NF_TABLES`, `CONFIG_NFT_CT`, `CONFIG_NFT_NAT`, `CONFIG_NFT_MASQ`, `CONFIG_NFT_NUMGEN`, `CONFIG_NFT_FIB_IPV4` | kube-proxy nftables mode (primary) |
+| IP forwarding | `CONFIG_IP_ADVANCED_ROUTER`, `CONFIG_IP_MULTIPLE_TABLES` | Pod-to-pod routing |
+| IPVS | `CONFIG_IP_VS`, `CONFIG_IP_VS_RR`, `CONFIG_IP_VS_NFCT` | kube-proxy IPVS mode (optional) |
+| Traffic control | `CONFIG_NET_SCH_HTB`, `CONFIG_NET_CLS_CGROUP` | Kubernetes QoS |
+| Cgroups | `CONFIG_CGROUPS`, `CONFIG_CGROUP_DEVICE`, `CONFIG_MEMCG`, `CONFIG_CGROUP_PIDS` | Container resource limits |
+| TUN/TAP | `CONFIG_TUN` | CNI plugin support |
+| Dummy interface | `CONFIG_DUMMY` | Fallback networking |
+| Landlock | `CONFIG_SECURITY_LANDLOCK` | Filesystem sandboxing support |
+| Seccomp filter | `CONFIG_SECCOMP_FILTER` | Syscall filtering support |
+
+See `crates/openshell-vm/runtime/kernel/openshell.kconfig` for the full fragment with
+inline comments explaining why each option is needed.
+
+## Verification
+
+One verification tool is provided:
+
+1. **Capability checker** (`check-vm-capabilities.sh`): Runs inside the VM to verify
+   kernel capabilities. Produces pass/fail results for each required feature.
+
+## Running Commands In A Live VM
+
+The standalone `openshell-vm` binary supports `openshell-vm exec -- <command...>` for a running VM.
+
+- Each VM instance stores local runtime state next to its instance rootfs
+- libkrun maps a per-instance host Unix socket into the guest on vsock port `10777`
+- `openshell-vm-init.sh` starts `openshell-vm-exec-agent.py` during boot
+- `openshell-vm exec` connects to the host socket, which libkrun forwards into the guest exec agent
+- The guest exec agent spawns the command, then streams stdout, stderr, and exit status back
+- The host-side bootstrap also uses the exec agent to read PKI cert files from the guest
+  (via `cat /opt/openshell/pki/<file>`) instead of requiring a separate vsock server
+
+`openshell-vm exec` also injects `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` by default so kubectl-style
+commands work the same way they would inside the VM shell.
+
+## Build Commands
+
+```bash
+# One-time setup: download pre-built runtime (~30s)
+mise run vm:setup
+
+# Build and run
+mise run vm
+
+# Build embedded binary with base rootfs (~120MB, recommended)
+mise run vm:rootfs -- --base              # Build base rootfs tarball
+mise run vm:build                          # Build binary with embedded rootfs
+
+# Build with full rootfs (air-gapped, ~2GB+)
+mise run vm:rootfs                         # Build full rootfs tarball
+mise run vm:build                          # Rebuild binary
+
+# With custom kernel (optional, adds ~20 min)
+FROM_SOURCE=1 mise run vm:setup            # Build runtime from source
+mise run vm:build                          # Then build embedded binary
+
+# Wipe everything and start over
+mise run vm:clean
+```
+
+## CI/CD
+
+The openshell-vm build is split into two GitHub Actions workflows that publish to a
+rolling `vm-dev` GitHub Release:
+
+### Kernel Runtime (`release-vm-kernel.yml`)
+
+Builds the custom libkrunfw (kernel firmware), libkrun (VMM), and gvproxy for all
+supported platforms. Runs on-demand or when the kernel config / pinned versions change.
+
+| Platform | Runner | Build Method |
+|----------|--------|-------------|
+| Linux ARM64 | `build-arm64` (self-hosted) | Native `build-libkrun.sh` |
+| Linux x86_64 | `build-amd64` (self-hosted) | Native `build-libkrun.sh` |
+| macOS ARM64 | `macos-latest-xlarge` (GitHub-hosted) | `build-custom-libkrunfw.sh` (krunvm) + `build-libkrun-macos.sh` |
+
+Artifacts: `vm-runtime-{platform}.tar.zst` containing libkrun, libkrunfw, gvproxy, and
+provenance metadata.
+
+The macOS kernel build requires a real macOS ARM64 runner because it uses `krunvm` to
+compile the Linux kernel inside a Fedora VM (Hypervisor.framework). The kernel inside
+libkrunfw is always Linux regardless of host platform.
+
+### VM Binary (`release-vm-dev.yml`)
+
+Builds the self-extracting openshell-vm binary for all platforms. Runs on every push
+to `main` that touches VM-related crates.
+
+```mermaid
+graph TD
+    CV[compute-versions] --> DL[download-kernel-runtime\nfrom vm-dev release]
+    DL --> RFS_ARM[build-rootfs arm64]
+    DL --> RFS_AMD[build-rootfs amd64]
+    RFS_ARM --> VM_ARM[build-vm linux-arm64]
+    RFS_AMD --> VM_AMD[build-vm linux-amd64]
+    RFS_ARM --> VM_MAC["build-vm-macos\n(osxcross, reuses arm64 rootfs)"]
+    VM_ARM --> REL[release-vm-dev\nupload to rolling release]
+    VM_AMD --> REL
+    VM_MAC --> REL
+```
+
+The macOS binary is cross-compiled via osxcross (no macOS runner needed for the binary
+build — only for the kernel build). The macOS VM guest is always Linux ARM64, so it
+reuses the arm64 rootfs.
+
+macOS binaries produced via osxcross are not codesigned. Users must self-sign:
+```bash
+codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm
+```
+
+## Rollout Strategy
+
+1. Custom runtime is embedded by default when building with `mise run vm:build`.
+2. The init script validates kernel capabilities at boot and fails fast if missing.
+3. For development, override with `OPENSHELL_VM_RUNTIME_DIR` to use a local directory.
+4. In CI, kernel runtime is pre-built and cached in the `vm-dev` release. The binary
+   build downloads it via `download-kernel-runtime.sh`.
diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs
index 8ce10703e..71d223d66 100644
--- a/crates/openshell-bootstrap/src/lib.rs
+++ b/crates/openshell-bootstrap/src/lib.rs
@@ -6,12 +6,12 @@ pub mod edge_token;
 pub mod errors;
 pub mod image;
 
-mod constants;
+pub mod constants;
 mod docker;
 mod metadata;
-mod mtls;
-mod paths;
-mod pki;
+pub mod mtls;
+pub mod paths;
+pub mod pki;
 pub(crate) mod push;
 mod runtime;
 
diff --git a/crates/openshell-bootstrap/src/paths.rs b/crates/openshell-bootstrap/src/paths.rs
index cd3cb7693..1c514f370 100644
--- a/crates/openshell-bootstrap/src/paths.rs
+++ b/crates/openshell-bootstrap/src/paths.rs
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use miette::Result;
-use openshell_core::paths::xdg_config_dir;
+use openshell_core::paths::{xdg_config_dir, xdg_data_dir};
 use std::path::PathBuf;
 
 /// Path to the file that stores the active gateway name.
@@ -26,6 +26,13 @@ pub fn last_sandbox_path(gateway: &str) -> Result<PathBuf> {
     Ok(gateways_dir()?.join(gateway).join("last_sandbox"))
 }
 
+/// Base directory for openshell-vm data (without version).
+///
+/// Location: `$XDG_DATA_HOME/openshell/openshell-vm/`
+pub fn openshell_vm_base_dir() -> Result<PathBuf> {
+    Ok(xdg_data_dir()?.join("openshell").join("openshell-vm"))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index 87d377b39..1d9305ff6 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -62,9 +62,13 @@ fn resolve_gateway(
     gateway_endpoint: &Option<String>,
 ) -> Result<GatewayContext> {
     if let Some(endpoint) = gateway_endpoint {
+        // When a gateway name is explicitly provided (via flag or env var),
+        // trust it directly — don't require metadata to exist yet. This
+        // avoids a race condition where mTLS certs are stored under the
+        // real gateway name but the CLI falls back to using the raw
+        // endpoint URL (producing a mangled path like `https___...`).
         let name = gateway_flag
             .clone()
-            .filter(|name| get_gateway_metadata(name).is_some())
             .or_else(|| find_gateway_by_endpoint(endpoint))
             .unwrap_or_else(|| endpoint.clone());
         return Ok(GatewayContext {
diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs
index c40640c30..218d04e99 100644
--- a/crates/openshell-cli/src/run.rs
+++ b/crates/openshell-cli/src/run.rs
@@ -2118,7 +2118,12 @@ pub async fn sandbox_create(
     // Track whether we have seen a non-Ready phase during the watch.
     let mut saw_non_ready = SandboxPhase::try_from(sandbox.phase) != Ok(SandboxPhase::Ready);
     let start_time = Instant::now();
-    let provision_timeout = Duration::from_secs(300);
+    let provision_timeout = Duration::from_secs(
+        std::env::var("OPENSHELL_PROVISION_TIMEOUT")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(300),
+    );
     // Track whether we saw the gateway become ready (from log messages).
     let mut saw_gateway_ready = false;
 
diff --git a/crates/openshell-core/src/paths.rs b/crates/openshell-core/src/paths.rs
index bd9ce23d4..fd0a141b3 100644
--- a/crates/openshell-core/src/paths.rs
+++ b/crates/openshell-core/src/paths.rs
@@ -29,6 +29,19 @@ pub fn openshell_config_dir() -> Result<PathBuf> {
     Ok(xdg_config_dir()?.join("openshell"))
 }
 
+/// Resolve the XDG data base directory.
+///
+/// Returns `$XDG_DATA_HOME` if set, otherwise `$HOME/.local/share`.
+pub fn xdg_data_dir() -> Result<PathBuf> {
+    if let Ok(path) = std::env::var("XDG_DATA_HOME") {
+        return Ok(PathBuf::from(path));
+    }
+    let home = std::env::var("HOME")
+        .into_diagnostic()
+        .wrap_err("HOME is not set")?;
+    Ok(PathBuf::from(home).join(".local").join("share"))
+}
+
 /// Create a directory (and parents) with owner-only permissions (`0o700`) on
 /// Unix. On non-Unix platforms, falls back to default permissions.
 ///
diff --git a/crates/openshell-server/src/sandbox/mod.rs b/crates/openshell-server/src/sandbox/mod.rs
index c5e9a8335..a5d7dc071 100644
--- a/crates/openshell-server/src/sandbox/mod.rs
+++ b/crates/openshell-server/src/sandbox/mod.rs
@@ -786,7 +786,11 @@ fn apply_supervisor_sideload(pod_template: &mut serde_json::Value) {
 /// The init container mounts the PVC at a temporary path so it can still see
 /// the image's `/sandbox` directory.  It checks for a sentinel file and skips
 /// the copy if the PVC was already initialised.
-fn apply_workspace_persistence(pod_template: &mut serde_json::Value, image: &str) {
+fn apply_workspace_persistence(
+    pod_template: &mut serde_json::Value,
+    image: &str,
+    image_pull_policy: &str,
+) {
     let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else {
         return;
     };
@@ -827,19 +831,24 @@ fn apply_workspace_persistence(pod_template: &mut serde_json::Value, image: &str
         // read the image's original /sandbox contents.  It copies them into
         // the PVC only when the sentinel file is absent.
         //
+        // Prefer a tar stream over `cp -a`: some sandbox images contain
+        // self-referential symlinks under `/sandbox/.uv`, and GNU cp can
+        // fail while seeding the PVC even though preserving the symlink as-is
+        // is valid. `tar` copies the tree without dereferencing those links.
+        //
         // The inner `[ -d ... ]` guard handles custom images that don't have
         // a /sandbox directory — the copy is skipped but the sentinel is
         // still written so subsequent starts are instant.
         let copy_cmd = format!(
             "if [ ! -f {WORKSPACE_INIT_MOUNT_PATH}/{WORKSPACE_SENTINEL} ]; then \
                if [ -d {WORKSPACE_MOUNT_PATH} ]; then \
-                 cp -a {WORKSPACE_MOUNT_PATH}/. {WORKSPACE_INIT_MOUNT_PATH}/; \
+                 tar -C {WORKSPACE_MOUNT_PATH} -cf - . | tar -C {WORKSPACE_INIT_MOUNT_PATH} -xpf -; \
                fi && \
                touch {WORKSPACE_INIT_MOUNT_PATH}/{WORKSPACE_SENTINEL}; \
              fi"
         );
 
-        init_containers.push(serde_json::json!({
+        let mut init_spec = serde_json::json!({
             "name": WORKSPACE_INIT_CONTAINER_NAME,
             "image": image,
             "command": ["sh", "-c", copy_cmd],
@@ -848,7 +857,11 @@ fn apply_workspace_persistence(pod_template: &mut serde_json::Value, image: &str
                 "name": WORKSPACE_VOLUME_NAME,
                 "mountPath": WORKSPACE_INIT_MOUNT_PATH
             }]
-        }));
+        });
+        if !image_pull_policy.is_empty() {
+            init_spec["imagePullPolicy"] = serde_json::json!(image_pull_policy);
+        }
+        init_containers.push(init_spec);
     }
 }
 
@@ -1126,7 +1139,7 @@ fn sandbox_template_to_k8s(
     // that /sandbox data survives pod rescheduling.  Skipped when the user
     // provides custom volumeClaimTemplates to avoid conflicts.
     if inject_workspace {
-        apply_workspace_persistence(&mut result, image);
+        apply_workspace_persistence(&mut result, image, image_pull_policy);
     }
 
     result
@@ -2024,7 +2037,11 @@ mod tests {
             }
         });
 
-        apply_workspace_persistence(&mut pod_template, "openshell/sandbox:latest");
+        apply_workspace_persistence(
+            &mut pod_template,
+            "openshell/sandbox:latest",
+            "IfNotPresent",
+        );
 
         // Init container
         let init_containers = pod_template["spec"]["initContainers"]
@@ -2033,6 +2050,7 @@ mod tests {
         assert_eq!(init_containers.len(), 1);
         assert_eq!(init_containers[0]["name"], WORKSPACE_INIT_CONTAINER_NAME);
         assert_eq!(init_containers[0]["image"], "openshell/sandbox:latest");
+        assert_eq!(init_containers[0]["imagePullPolicy"], "IfNotPresent");
         assert_eq!(init_containers[0]["securityContext"]["runAsUser"], 0);
 
         // Init container mounts PVC at temp path, not /sandbox
@@ -2078,7 +2096,7 @@ mod tests {
             }
         });
 
-        apply_workspace_persistence(&mut pod_template, "my-custom-image:v2");
+        apply_workspace_persistence(&mut pod_template, "my-custom-image:v2", "IfNotPresent");
 
         let init_image = pod_template["spec"]["initContainers"][0]["image"]
             .as_str()
@@ -2100,7 +2118,7 @@ mod tests {
             }
         });
 
-        apply_workspace_persistence(&mut pod_template, "img:latest");
+        apply_workspace_persistence(&mut pod_template, "img:latest", "Always");
 
         let cmd = pod_template["spec"]["initContainers"][0]["command"]
             .as_array()
@@ -2111,8 +2129,8 @@ mod tests {
             "init script must check for sentinel file"
         );
         assert!(
-            script.contains("cp -a"),
-            "init script must copy image contents"
+            script.contains("tar -C"),
+            "init script must seed image contents with a tar stream"
         );
     }
 
diff --git a/crates/openshell-vm/Cargo.toml b/crates/openshell-vm/Cargo.toml
new file mode 100644
index 000000000..7d74b3139
--- /dev/null
+++ b/crates/openshell-vm/Cargo.toml
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+[package]
+name = "openshell-vm"
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+repository.workspace = true
+description = "MicroVM runtime using libkrun for hardware-isolated execution"
+
+[lib]
+name = "openshell_vm"
+path = "src/lib.rs"
+
+[[bin]]
+name = "openshell-vm"
+path = "src/main.rs"
+
+[dependencies]
+base64 = "0.22"
+clap = { workspace = true }
+indicatif = "0.17"
+libc = "0.2"
+libloading = "0.8"
+miette = { workspace = true }
+nix = { workspace = true }
+openshell-bootstrap = { path = "../openshell-bootstrap" }
+openshell-core = { path = "../openshell-core" }
+serde = { workspace = true }
+serde_json = "1"
+tar = "0.4"
+thiserror = { workspace = true }
+tracing = { workspace = true }
+tracing-subscriber = { workspace = true }
+zstd = "0.13"
+
+# Async runtime and gRPC for health check
+tokio = { workspace = true }
+tonic = { workspace = true, features = ["tls", "tls-native-roots"] }
+rustls = { workspace = true }
+rustls-pemfile = { workspace = true }
+tokio-rustls = { workspace = true }
+
+[build-dependencies]
+zstd = "0.13"
+
+[lints]
+workspace = true
diff --git a/crates/openshell-vm/README.md b/crates/openshell-vm/README.md
new file mode 100644
index 000000000..fcca20d5b
--- /dev/null
+++ b/crates/openshell-vm/README.md
@@ -0,0 +1,244 @@
+# openshell-vm
+
+> Status: Experimental and work in progress (WIP). VM support is under active development and may change.
+
+MicroVM runtime for OpenShell, powered by [libkrun](https://github.com/containers/libkrun). Boots a lightweight ARM64 Linux VM on macOS (Apple Hypervisor.framework) or Linux (KVM) running a single-node k3s cluster with the OpenShell control plane.
+
+## Quick Start
+
+```bash
+# One-time setup: download pre-built runtime (~30s)
+mise run vm:setup
+
+# Build and run the VM
+mise run vm
+```
+
+## Prerequisites
+
+- **macOS (Apple Silicon)** or **Linux (aarch64 or x86_64 with KVM)**
+- Rust toolchain
+- [mise](https://mise.jdx.dev/) task runner
+- Docker (for rootfs builds)
+- `gh` CLI (for downloading pre-built runtime)
+
+### macOS-Specific
+
+The binary must be codesigned with the Hypervisor.framework entitlement. The `mise run vm` flow handles this automatically. To codesign manually:
+
+```bash
+codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - target/debug/openshell-vm
+```
+
+## Setup
+
+### Download Pre-Built Runtime (Default)
+
+Downloads libkrun, libkrunfw, and gvproxy from the `vm-dev` GitHub Release:
+
+```bash
+mise run vm:setup
+```
+
+### Build from Source
+
+Compiles the runtime from source (15-45 minutes, needed for custom kernel work):
+
+```bash
+FROM_SOURCE=1 mise run vm:setup
+```
+
+On macOS this builds a custom libkrunfw (kernel firmware with bridge/netfilter support) via `krunvm`, then builds a portable libkrun. On Linux it builds both natively.
+
+## Build
+
+Build the openshell-vm binary with embedded runtime:
+
+```bash
+mise run vm:build
+```
+
+This compresses runtime artifacts, compiles the Rust binary with `include_bytes!()` embedding, codesigns it (macOS), and stages the sidecar runtime bundle.
+
+## Rootfs
+
+The rootfs is an Ubuntu filesystem containing k3s, pre-loaded container images, and the OpenShell binaries. Build it with:
+
+```bash
+# Base rootfs (~200-300MB, cold starts in ~30-60s)
+mise run vm:rootfs -- --base
+
+# Full rootfs (~2GB+, pre-initialized, boots in ~3-5s)
+mise run vm:rootfs
+```
+
+## Run
+
+### Default (Gateway Mode)
+
+Boots the full OpenShell gateway -- k3s + openshell-server + openshell-sandbox:
+
+```bash
+mise run vm
+```
+
+Or run the binary directly:
+
+```bash
+./target/debug/openshell-vm
+```
+
+### Custom Process
+
+Run an arbitrary process inside a fresh VM instead of k3s:
+
+```bash
+./target/debug/openshell-vm --exec /bin/sh --vcpus 2 --mem 2048
+```
+
+### Execute in a Running VM
+
+Attach to a running VM and run a command:
+
+```bash
+./target/debug/openshell-vm exec -- ls /
+./target/debug/openshell-vm exec -- sh   # interactive shell
+```
+
+### Named Instances
+
+Run multiple isolated VM instances side-by-side:
+
+```bash
+./target/debug/openshell-vm --name dev
+./target/debug/openshell-vm --name staging
+```
+
+Each instance gets its own extracted rootfs under `~/.local/share/openshell/openshell-vm/<version>/instances/<name>/rootfs`.
+
+## CLI Reference
+
+```
+openshell-vm [OPTIONS] [COMMAND]
+
+Options:
+  --rootfs <PATH>          Path to aarch64 Linux rootfs directory
+  --name <NAME>            Named VM instance (auto-clones rootfs)
+  --exec <PATH>            Run a custom process instead of k3s
+  --args <ARGS>...         Arguments to the executable
+  --env <KEY=VALUE>...     Environment variables
+  --workdir <DIR>          Working directory inside the VM [default: /]
+  -p, --port <H:G>...     Port mappings (host_port:guest_port)
+  --vcpus <N>              Virtual CPUs [default: 4 gateway, 2 exec]
+  --mem <MiB>              RAM in MiB [default: 8192 gateway, 2048 exec]
+  --krun-log-level <0-5>   libkrun log level [default: 1]
+  --net <BACKEND>          Networking: gvproxy, tsi, none [default: gvproxy]
+  --reset                  Wipe runtime state before booting
+
+Subcommands:
+  prepare-rootfs           Ensure the target rootfs exists
+  exec                     Execute a command inside a running VM
+```
+
+## mise Tasks Reference
+
+| Task | Description |
+|------|-------------|
+| `vm` | Build and run the VM |
+| `vm:build` | Build openshell-vm binary with embedded runtime |
+| `vm:setup` | One-time setup: download (or build) the VM runtime |
+| `vm:rootfs` | Build the VM rootfs tarball (`-- --base` for lightweight) |
+| `vm:clean` | Remove all VM cached artifacts |
+| `e2e:vm` | Boot VM and run smoke e2e tests |
+
+### Common Workflows
+
+```bash
+# First time setup
+mise run vm:setup              # download pre-built runtime (~30s)
+mise run vm                    # build + run
+
+# Day-to-day iteration
+mise run vm                    # incremental build + run
+
+# Need fresh rootfs
+mise run vm:rootfs -- --base   # rebuild base rootfs
+mise run vm:build              # rebuild binary with new rootfs
+
+# Something broken, start over
+mise run vm:clean              # wipe everything
+mise run vm:setup              # re-download runtime
+mise run vm                    # full rebuild + run
+
+# Custom kernel work (rare)
+FROM_SOURCE=1 mise run vm:setup
+```
+
+## Architecture
+
+```
+Host (macOS / Linux)
+  openshell-vm binary
+    |-- Embedded runtime (libkrun, libkrunfw, gvproxy, rootfs.tar.zst)
+    |-- FFI: loads libkrun at runtime via dlopen
+    |-- gvproxy: virtio-net networking (real eth0 + DHCP)
+    |-- virtio-fs: shares rootfs with guest
+    \-- vsock: host-to-guest command execution (port 10777)
+
+Guest VM (aarch64 Linux)
+  PID 1: openshell-vm-init.sh
+    |-- Mounts filesystems, configures networking
+    |-- Sets up bridge CNI, generates PKI
+    \-- Execs k3s server
+        |-- openshell-server (gateway control plane)
+        \-- openshell-sandbox (pod supervisor)
+```
+
+## Environment Variables
+
+| Variable | When | Purpose |
+|----------|------|---------|
+| `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` | Build time | Path to compressed runtime artifacts |
+| `OPENSHELL_VM_RUNTIME_DIR` | Runtime | Override the runtime bundle directory |
+| `OPENSHELL_VM_DIAG=1` | Runtime | Enable diagnostic output inside the VM |
+| `FROM_SOURCE=1` | `vm:setup` | Build runtime from source instead of downloading |
+
+## Custom Kernel (libkrunfw)
+
+The stock libkrunfw (e.g. from Homebrew) lacks bridge, netfilter, and conntrack support needed for pod networking. OpenShell builds a custom libkrunfw with these enabled.
+
+Build it via the setup command:
+
+```bash
+FROM_SOURCE=1 mise run vm:setup
+```
+
+See [`runtime/README.md`](runtime/README.md) for details on the kernel config and troubleshooting.
+
+## Testing
+
+Integration tests require a built rootfs and macOS ARM64 with libkrun:
+
+```bash
+cargo test -p openshell-vm -- --ignored
+```
+
+Individual tests:
+
+```bash
+# Full gateway boot test (boots VM, waits for gRPC on port 30051)
+cargo test -p openshell-vm gateway_boots -- --ignored
+
+# Run a command inside the VM
+cargo test -p openshell-vm gateway_exec_runs -- --ignored
+
+# Exec into a running VM
+cargo test -p openshell-vm gateway_exec_attaches -- --ignored
+```
+
+Verify kernel capabilities inside a running VM:
+
+```bash
+./target/debug/openshell-vm exec -- /srv/check-vm-capabilities.sh
+./target/debug/openshell-vm exec -- /srv/check-vm-capabilities.sh --json
+```
diff --git a/crates/openshell-vm/build.rs b/crates/openshell-vm/build.rs
new file mode 100644
index 000000000..33fab9a78
--- /dev/null
+++ b/crates/openshell-vm/build.rs
@@ -0,0 +1,142 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Build script for openshell-vm.
+//!
+//! This script copies pre-compressed VM runtime artifacts (libkrun, libkrunfw,
+//! gvproxy) to `OUT_DIR` for embedding via `include_bytes!()`.
+//!
+//! The compressed artifacts are expected to be prepared by:
+//!   `mise run vm:setup` (one-time) then `mise run vm:build`
+//!
+//! Environment:
+//!   `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` - Path to compressed artifacts
+
+use std::path::PathBuf;
+use std::{env, fs};
+
+fn main() {
+    println!("cargo:rerun-if-env-changed=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR");
+
+    // Re-run if any compressed artifact changes.
+    if let Ok(dir) = env::var("OPENSHELL_VM_RUNTIME_COMPRESSED_DIR") {
+        println!("cargo:rerun-if-changed={dir}");
+        for name in &[
+            "libkrun.so.zst",
+            "libkrunfw.so.5.zst",
+            "libkrun.dylib.zst",
+            "libkrunfw.5.dylib.zst",
+            "gvproxy.zst",
+            "rootfs.tar.zst",
+        ] {
+            println!("cargo:rerun-if-changed={dir}/{name}");
+        }
+    }
+
+    let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set"));
+    let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
+    let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
+
+    // Determine platform-specific file names
+    let (libkrun_name, libkrunfw_name) = match target_os.as_str() {
+        "macos" => ("libkrun.dylib", "libkrunfw.5.dylib"),
+        "linux" => ("libkrun.so", "libkrunfw.so.5"),
+        _ => {
+            println!("cargo:warning=VM runtime not available for {target_os}-{target_arch}");
+            generate_stub_resources(&out_dir);
+            return;
+        }
+    };
+
+    // Check for pre-compressed artifacts from mise task
+    let compressed_dir = if let Ok(dir) = env::var("OPENSHELL_VM_RUNTIME_COMPRESSED_DIR") {
+        PathBuf::from(dir)
+    } else {
+        println!("cargo:warning=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR not set");
+        println!("cargo:warning=Run: mise run vm:setup");
+        generate_stub_resources(&out_dir);
+        return;
+    };
+
+    if !compressed_dir.is_dir() {
+        println!(
+            "cargo:warning=Compressed runtime dir not found: {}",
+            compressed_dir.display()
+        );
+        println!("cargo:warning=Run: mise run vm:setup");
+        generate_stub_resources(&out_dir);
+        return;
+    }
+
+    // Copy compressed files to OUT_DIR
+    let files = [
+        (format!("{libkrun_name}.zst"), format!("{libkrun_name}.zst")),
+        (
+            format!("{libkrunfw_name}.zst"),
+            format!("{libkrunfw_name}.zst"),
+        ),
+        ("gvproxy.zst".to_string(), "gvproxy.zst".to_string()),
+        ("rootfs.tar.zst".to_string(), "rootfs.tar.zst".to_string()),
+    ];
+
+    let mut all_found = true;
+    for (src_name, dst_name) in &files {
+        let src_path = compressed_dir.join(src_name);
+        let dst_path = out_dir.join(dst_name);
+
+        if src_path.exists() {
+            // Remove existing file first (may be read-only from previous build)
+            if dst_path.exists() {
+                let _ = fs::remove_file(&dst_path);
+            }
+            fs::copy(&src_path, &dst_path).unwrap_or_else(|e| {
+                panic!(
+                    "Failed to copy {} to {}: {}",
+                    src_path.display(),
+                    dst_path.display(),
+                    e
+                )
+            });
+            let size = fs::metadata(&dst_path).map(|m| m.len()).unwrap_or(0);
+            println!("cargo:warning=Embedded {src_name}: {size} bytes");
+        } else {
+            println!(
+                "cargo:warning=Missing compressed artifact: {}",
+                src_path.display()
+            );
+            all_found = false;
+        }
+    }
+
+    if !all_found {
+        println!("cargo:warning=Some artifacts missing. Run: mise run vm:setup");
+        generate_stub_resources(&out_dir);
+    }
+}
+
+/// Generate stub (empty) resource files so the build can complete.
+/// The embedded module will fail at runtime if these stubs are used.
+fn generate_stub_resources(out_dir: &PathBuf) {
+    let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
+
+    let (libkrun_name, libkrunfw_name) = match target_os.as_str() {
+        "macos" => ("libkrun.dylib", "libkrunfw.5.dylib"),
+        _ => ("libkrun.so", "libkrunfw.so.5"),
+    };
+
+    let stubs = [
+        format!("{libkrun_name}.zst"),
+        format!("{libkrunfw_name}.zst"),
+        "gvproxy.zst".to_string(),
+        "rootfs.tar.zst".to_string(),
+    ];
+
+    for name in &stubs {
+        let path = out_dir.join(name);
+        if !path.exists() {
+            // Write an empty file as a stub
+            fs::write(&path, b"")
+                .unwrap_or_else(|e| panic!("Failed to write stub {}: {}", path.display(), e));
+        }
+    }
+}
diff --git a/crates/openshell-vm/entitlements.plist b/crates/openshell-vm/entitlements.plist
new file mode 100644
index 000000000..154f3308e
--- /dev/null
+++ b/crates/openshell-vm/entitlements.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>com.apple.security.hypervisor</key>
+    <true/>
+</dict>
+</plist>
diff --git a/crates/openshell-vm/pins.env b/crates/openshell-vm/pins.env
new file mode 100644
index 000000000..3c34a4af2
--- /dev/null
+++ b/crates/openshell-vm/pins.env
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Pinned dependency versions for openshell-vm builds.
+#
+# This file is sourced by build-rootfs.sh and
+# build-custom-libkrunfw.sh. It centralises version pins and content-addressed
+# digests so that builds are reproducible and auditable.
+#
+# Environment variables override these defaults — CI and local dev workflows
+# can still set IMAGE_TAG, K3S_VERSION, etc. as before.
+#
+# To update a dependency:
+#   1. Change the version/digest below.
+#   2. Run the relevant build script to verify.
+#   3. Commit pins.env alongside any script changes.
+
+# ── k3s binary ─────────────────────────────────────────────────────────
+K3S_VERSION="${K3S_VERSION:-v1.35.2+k3s1}"
+K3S_ARM64_SHA256="${K3S_ARM64_SHA256:-228809a7ef47d25c1bdbe746944931ec2fd2edf842b9cf50f1dd4f9ec2505b0e}"
+K3S_AMD64_SHA256="${K3S_AMD64_SHA256:-3ae8e35a62ac83e8e197c117858a564134057a7b8703cf73e67ce60d19f4a22b}"
+
+# ── Base Docker image (digest-pinned) ──────────────────────────────────
+# Tag: nvcr.io/nvidia/base/ubuntu:noble-20251013
+VM_BASE_IMAGE="${VM_BASE_IMAGE:-nvcr.io/nvidia/base/ubuntu@sha256:43fa5063e80fbbc533892af3ccca190868ce48db5a8928b19d7815c40436af8e}"
+
+# ── Container images for rootfs pre-loading (digest-pinned) ────────────
+# Tag: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0
+AGENT_SANDBOX_IMAGE="${AGENT_SANDBOX_IMAGE:-registry.k8s.io/agent-sandbox/agent-sandbox-controller@sha256:ba71ea40ae0872791197badf2ab84f3f482df3902f1fce7ca9e076b1de9b57f6}"
+# Tag: ghcr.io/nvidia/openshell-community/sandboxes/base:latest
+COMMUNITY_SANDBOX_IMAGE="${COMMUNITY_SANDBOX_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/base@sha256:d446c17105e7448e602238a8a5a4ddd0233c071082406522f81c31f8b1309525}"
+
+# SERVER_IMAGE is intentionally NOT pinned here — it changes frequently
+# during local development. Override via IMAGE_REPO_BASE and IMAGE_TAG
+# environment variables (defaults: openshell/gateway:dev).
+
+# ── gvproxy (networking proxy) ──────────────────────────────────────────
+# Repo: https://github.com/containers/gvisor-tap-vsock
+GVPROXY_VERSION="${GVPROXY_VERSION:-v0.8.8}"
+
+# ── libkrunfw upstream (commit-pinned) ─────────────────────────────────
+# Repo: https://github.com/containers/libkrunfw
+# Pinned: 2026-03-27 (main branch HEAD at time of pinning)
+LIBKRUNFW_REF="${LIBKRUNFW_REF:-463f717bbdd916e1352a025b6fb2456e882b0b39}"
diff --git a/crates/openshell-vm/runtime/README.md b/crates/openshell-vm/runtime/README.md
new file mode 100644
index 000000000..c30308e3a
--- /dev/null
+++ b/crates/openshell-vm/runtime/README.md
@@ -0,0 +1,172 @@
+# Custom libkrunfw Runtime
+
+> Status: Experimental and work in progress (WIP). VM support is under active development and may change.
+
+This directory contains the build infrastructure for a custom `libkrunfw` runtime
+that enables bridge CNI and netfilter support in the OpenShell gateway VM.
+
+## Why
+
+The stock `libkrunfw` (from Homebrew) ships a kernel without bridge, netfilter,
+or conntrack support. This means the VM cannot:
+
+- Create `cni0` bridge interfaces (required by the bridge CNI plugin)
+- Run kube-proxy (requires nftables)
+- Route service VIP traffic (requires NAT/conntrack)
+
+The custom runtime builds libkrunfw with an additional kernel config fragment
+that enables these networking and sandboxing features.
+
+## Directory Structure
+
+```
+runtime/
+  build-custom-libkrunfw.sh   # Build script for custom libkrunfw
+  kernel/
+    openshell.kconfig          # Kernel config fragment (networking + sandboxing)
+```
+
+## Building
+
+### Prerequisites
+
+- Rust toolchain
+- make, git, curl
+- On macOS: Xcode command line tools and cross-compilation tools for aarch64
+
+### Quick Build
+
+```bash
+# Build custom libkrunfw (clones libkrunfw repo, applies config, builds)
+./crates/openshell-vm/runtime/build-custom-libkrunfw.sh
+
+# Or build the full runtime from source via mise:
+FROM_SOURCE=1 mise run vm:setup
+```
+
+### Output
+
+Build artifacts are placed in `target/custom-runtime/`:
+
+```
+target/custom-runtime/
+  libkrunfw.dylib              # The custom library
+  libkrunfw.<version>.dylib    # Version-suffixed copy
+  provenance.json              # Build metadata (commit, hash, timestamp)
+  openshell.kconfig            # The config fragment used
+  kernel.config                # Full kernel .config (for debugging)
+```
+
+### Using the Custom Runtime
+
+```bash
+# Point the bundle script at the custom build and rebuild:
+export OPENSHELL_VM_RUNTIME_SOURCE_DIR=target/custom-runtime
+mise run vm:build
+
+# Then boot the VM as usual:
+mise run vm
+```
+
+## Networking
+
+The VM uses bridge CNI for pod networking with nftables-mode kube-proxy for
+service VIP / ClusterIP support. The kernel config fragment enables both
+iptables (for CNI bridge masquerade) and nftables (for kube-proxy).
+
+k3s is started with `--kube-proxy-arg=proxy-mode=nftables` because the
+bundled iptables binaries in k3s have revision-negotiation issues with the
+libkrun kernel's xt_MARK module. nftables mode uses the kernel's nf_tables
+subsystem directly and avoids this entirely.
+
+## Runtime Provenance
+
+At VM boot, the openshell-vm binary logs provenance information about the loaded
+runtime:
+
+```
+runtime: /path/to/openshell-vm.runtime
+  libkrunfw: libkrunfw.dylib
+  sha256: a1b2c3d4e5f6...
+  type: custom (OpenShell-built)
+  libkrunfw-commit: abc1234
+  kernel-version: 6.6.30
+  build-timestamp: 2026-03-23T10:00:00Z
+```
+
+For stock runtimes:
+```
+runtime: /path/to/openshell-vm.runtime
+  libkrunfw: libkrunfw.dylib
+  sha256: f6e5d4c3b2a1...
+  type: stock (system/homebrew)
+```
+
+## Verification
+
+### Capability Check (inside VM)
+
+```bash
+# Run inside the VM to verify kernel capabilities:
+/srv/check-vm-capabilities.sh
+
+# JSON output for CI:
+/srv/check-vm-capabilities.sh --json
+```
+
+### Rollback
+
+To revert to the stock runtime:
+
+```bash
+# Unset the custom runtime source:
+unset OPENSHELL_VM_RUNTIME_SOURCE_DIR
+
+# Re-download pre-built runtime and rebuild:
+mise run vm:setup
+mise run vm:build
+
+# Boot:
+mise run vm
+```
+
+## Troubleshooting
+
+### "FailedCreatePodSandBox" bridge errors
+
+The kernel does not have bridge support. Verify:
+```bash
+# Inside VM:
+ip link add test0 type bridge && echo "bridge OK" && ip link del test0
+```
+
+If this fails, you are running the stock runtime. Build and use the custom one.
+
+### kube-proxy CrashLoopBackOff
+
+kube-proxy runs in nftables mode. If it crashes, verify nftables support:
+```bash
+# Inside VM:
+nft list ruleset
+```
+
+If this fails, the kernel may lack `CONFIG_NF_TABLES`. Use the custom runtime.
+
+Common errors:
+- `unknown option "--xor-mark"`: kube-proxy is running in iptables mode instead
+  of nftables. Verify `--kube-proxy-arg=proxy-mode=nftables` is in the k3s args.
+
+### Runtime mismatch after upgrade
+
+If libkrunfw is updated (e.g., via `brew upgrade`), the stock runtime may
+change. Check provenance:
+```bash
+# Look for provenance info in VM boot output
+grep "runtime:" ~/.local/share/openshell/openshell-vm/console.log
+```
+
+Re-build the custom runtime if needed:
+```bash
+FROM_SOURCE=1 mise run vm:setup
+mise run vm:build
+```
diff --git a/crates/openshell-vm/runtime/build-custom-libkrunfw.sh b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh
new file mode 100755
index 000000000..5d50c9153
--- /dev/null
+++ b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh
@@ -0,0 +1,401 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Build a custom libkrunfw with bridge/netfilter kernel support.
+#
+# This script clones libkrunfw, applies the OpenShell kernel config
+# fragment (bridge CNI, iptables, conntrack), builds the library, and
+# stages the artifact with provenance metadata.
+#
+# Prerequisites:
+#   - Rust toolchain (cargo)
+#   - make, git, curl
+#   - Cross-compilation toolchain for aarch64 (if building on x86_64)
+#   - On macOS: Xcode command line tools
+#
+# Usage:
+#   ./build-custom-libkrunfw.sh [--output-dir DIR] [--libkrunfw-ref REF]
+#
+# Environment:
+#   LIBKRUNFW_REF      - git ref to check out (default: main)
+#   LIBKRUNFW_REPO     - git repo URL (default: github.com/containers/libkrunfw)
+#   OPENSHELL_RUNTIME_OUTPUT_DIR - output directory for built artifacts
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
+KERNEL_CONFIG_FRAGMENT="${SCRIPT_DIR}/kernel/openshell.kconfig"
+
+# Source pinned dependency versions (digests, checksums, commit SHAs).
+# Environment variables override pins — see pins.env for details.
+PINS_FILE="${SCRIPT_DIR}/../pins.env"
+if [ -f "$PINS_FILE" ]; then
+    # shellcheck source=../pins.env
+    source "$PINS_FILE"
+fi
+
+# Defaults (LIBKRUNFW_REF is commit-pinned in pins.env; falls back to main
+# only if pins.env is missing and no env var is set).
+LIBKRUNFW_REPO="${LIBKRUNFW_REPO:-https://github.com/containers/libkrunfw.git}"
+LIBKRUNFW_REF="${LIBKRUNFW_REF:-main}"
+OUTPUT_DIR="${OPENSHELL_RUNTIME_OUTPUT_DIR:-${PROJECT_ROOT}/target/custom-runtime}"
+BUILD_DIR="${PROJECT_ROOT}/target/libkrunfw-build"
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --output-dir)
+            OUTPUT_DIR="$2"; shift 2 ;;
+        --libkrunfw-ref)
+            LIBKRUNFW_REF="$2"; shift 2 ;;
+        --help|-h)
+            echo "Usage: $0 [--output-dir DIR] [--libkrunfw-ref REF]"
+            echo ""
+            echo "Build a custom libkrunfw with bridge/netfilter kernel support."
+            echo ""
+            echo "Options:"
+            echo "  --output-dir DIR     Output directory for built artifacts"
+            echo "  --libkrunfw-ref REF  Git ref to check out (default: main)"
+            echo ""
+            echo "Environment:"
+            echo "  LIBKRUNFW_REPO                  Git repo URL"
+            echo "  LIBKRUNFW_REF                   Git ref (branch/tag/commit)"
+            echo "  OPENSHELL_RUNTIME_OUTPUT_DIR    Output directory"
+            exit 0
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2; exit 1 ;;
+    esac
+done
+
+echo "==> Building custom libkrunfw"
+echo "    Repo:            ${LIBKRUNFW_REPO}"
+echo "    Ref:             ${LIBKRUNFW_REF}"
+echo "    Config fragment: ${KERNEL_CONFIG_FRAGMENT}"
+echo "    Output:          ${OUTPUT_DIR}"
+echo ""
+
+# ── Clone / update libkrunfw ────────────────────────────────────────────
+
+if [ -d "${BUILD_DIR}/libkrunfw/.git" ]; then
+    echo "==> Updating existing libkrunfw checkout..."
+    git -C "${BUILD_DIR}/libkrunfw" fetch origin
+    git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}"
+    git -C "${BUILD_DIR}/libkrunfw" pull --ff-only 2>/dev/null || true
+else
+    echo "==> Cloning libkrunfw..."
+    mkdir -p "${BUILD_DIR}"
+    git clone "${LIBKRUNFW_REPO}" "${BUILD_DIR}/libkrunfw"
+    git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}"
+fi
+
+LIBKRUNFW_DIR="${BUILD_DIR}/libkrunfw"
+LIBKRUNFW_COMMIT=$(git -C "${LIBKRUNFW_DIR}" rev-parse HEAD)
+LIBKRUNFW_SHORT=$(git -C "${LIBKRUNFW_DIR}" rev-parse --short HEAD)
+
+echo "    Commit: ${LIBKRUNFW_COMMIT}"
+
+# ── Detect the kernel version libkrunfw targets ────────────────────────
+
+# libkrunfw's Makefile typically sets KERNEL_VERSION or has it in a
+# config file. Try to detect it.
+KERNEL_VERSION=""
+if [ -f "${LIBKRUNFW_DIR}/Makefile" ]; then
+    KERNEL_VERSION=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' "${LIBKRUNFW_DIR}/Makefile" 2>/dev/null | head -1 | sed 's/.*= *//' || true)
+fi
+if [ -z "$KERNEL_VERSION" ] && [ -f "${LIBKRUNFW_DIR}/kernel_version" ]; then
+    KERNEL_VERSION=$(cat "${LIBKRUNFW_DIR}/kernel_version")
+fi
+echo "    Kernel version: ${KERNEL_VERSION:-unknown}"
+
+# ── Apply kernel config fragment ────────────────────────────────────────
+
+echo "==> Applying OpenShell kernel config fragment..."
+
+# libkrunfw builds the kernel with a config generated from its own
+# sources. The config merge happens after `make olddefconfig` runs
+# on the base config. We use the kernel's scripts/kconfig/merge_config.sh
+# when available, otherwise do a simple append+olddefconfig.
+
+MERGE_HOOK="${LIBKRUNFW_DIR}/openshell-kconfig-hook.sh"
+cat > "${MERGE_HOOK}" << 'HOOKEOF'
+#!/usr/bin/env bash
+# Hook called by the libkrunfw build after extracting the kernel source.
+# Merges the OpenShell kernel config fragment into .config.
+set -euo pipefail
+
+KERNEL_DIR="$1"
+FRAGMENT="$2"
+
+if [ ! -d "$KERNEL_DIR" ]; then
+    echo "ERROR: kernel source dir not found: $KERNEL_DIR" >&2
+    exit 1
+fi
+
+if [ ! -f "$FRAGMENT" ]; then
+    echo "ERROR: config fragment not found: $FRAGMENT" >&2
+    exit 1
+fi
+
+cd "$KERNEL_DIR"
+
+if [ -f scripts/kconfig/merge_config.sh ]; then
+    echo "  Using kernel merge_config.sh"
+    KCONFIG_CONFIG=.config ./scripts/kconfig/merge_config.sh -m .config "$FRAGMENT"
+else
+    echo "  Appending fragment and running olddefconfig"
+    cat "$FRAGMENT" >> .config
+fi
+
+# Detect the kernel ARCH value from the host (or krunvm guest) architecture.
+case "$(uname -m)" in
+    aarch64) KARCH="arm64" ;;
+    x86_64)  KARCH="x86_64" ;;
+    *)       KARCH="$(uname -m)" ;;
+esac
+echo "  Kernel ARCH: ${KARCH}"
+make ARCH="${KARCH}" olddefconfig
+
+# Verify critical configs are set
+REQUIRED=(
+    CONFIG_BRIDGE
+    CONFIG_BRIDGE_NETFILTER
+    CONFIG_NETFILTER
+    CONFIG_NF_CONNTRACK
+    CONFIG_NF_NAT
+    CONFIG_IP_NF_IPTABLES
+    CONFIG_IP_NF_FILTER
+    CONFIG_IP_NF_NAT
+    CONFIG_NF_TABLES
+    CONFIG_NFT_NUMGEN
+    CONFIG_NFT_FIB_IPV4
+    CONFIG_NFT_FIB_IPV6
+    CONFIG_NFT_CT
+    CONFIG_NFT_NAT
+    CONFIG_NFT_MASQ
+    CONFIG_VETH
+    CONFIG_NET_NS
+)
+
+MISSING=()
+for cfg in "${REQUIRED[@]}"; do
+    if ! grep -q "^${cfg}=[ym]" .config; then
+        MISSING+=("$cfg")
+    fi
+done
+
+if [ ${#MISSING[@]} -gt 0 ]; then
+    echo "ERROR: Required kernel configs not set after merge:" >&2
+    printf "  %s\n" "${MISSING[@]}" >&2
+    exit 1
+fi
+
+echo "  All required kernel configs verified."
+HOOKEOF
+chmod +x "${MERGE_HOOK}"
+
+# ── Build libkrunfw ────────────────────────────────────────────────────
+
+echo "==> Building libkrunfw (this may take 10-30 minutes)..."
+
+cd "${LIBKRUNFW_DIR}"
+
+# Detect macOS vs Linux and pick the right library extension / target
+if [ "$(uname -s)" = "Darwin" ]; then
+    LIB_EXT="dylib"
+else
+    LIB_EXT="so"
+fi
+
+# Detect the kernel source directory name from the Makefile
+KERNEL_DIR_NAME=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' Makefile | head -1 | sed 's/KERNEL_VERSION *= *//')
+if [ -z "$KERNEL_DIR_NAME" ]; then
+    echo "ERROR: Could not detect KERNEL_VERSION from Makefile" >&2
+    exit 1
+fi
+echo "  Kernel source dir: ${KERNEL_DIR_NAME}"
+
+if [ "$(uname -s)" = "Darwin" ]; then
+    # On macOS, use krunvm to build the kernel inside a lightweight Linux VM.
+    # This matches the upstream libkrunfw build approach and avoids all the
+    # issues with Docker emulation and APFS filesystem limitations.
+    #
+    # Prerequisites: brew tap slp/krun && brew install krunvm
+
+    if ! command -v krunvm &>/dev/null; then
+        echo "ERROR: krunvm is required to build the kernel on macOS" >&2
+        echo "  Install with: brew tap slp/krun && brew install krunvm" >&2
+        exit 1
+    fi
+
+    echo "==> Building kernel inside krunvm (macOS detected)..."
+
+    VM_NAME="libkrunfw-openshell"
+
+    # Clean up any leftover VM from a previous failed run
+    krunvm delete "${VM_NAME}" 2>/dev/null || true
+
+    # Copy the config fragment into the libkrunfw tree so the VM can see it.
+    # The merge hook (MERGE_HOOK) is already written there by the cat above.
+    cp -f "${KERNEL_CONFIG_FRAGMENT}" "${LIBKRUNFW_DIR}/openshell.kconfig"
+
+    echo "  Creating VM..."
+    # krunvm may print "The volume has been configured" on first use of a
+    # volume path and exit non-zero. Retry once if that happens.
+    if ! krunvm create fedora \
+        --name "${VM_NAME}" \
+        --cpus 4 \
+        --mem 4096 \
+        -v "${LIBKRUNFW_DIR}:/work" \
+        -w /work; then
+        echo "  Retrying VM creation..."
+        krunvm create fedora \
+            --name "${VM_NAME}" \
+            --cpus 4 \
+            --mem 4096 \
+            -v "${LIBKRUNFW_DIR}:/work" \
+            -w /work
+    fi
+
+    echo "  Installing build dependencies..."
+    krunvm start "${VM_NAME}" /usr/bin/dnf -- install -y \
+        'dnf-command(builddep)' python3-pyelftools
+
+    krunvm start "${VM_NAME}" /usr/bin/dnf -- builddep -y kernel
+
+    # Step 1: prepare kernel sources (download, extract, patch, base config)
+    echo "  Preparing kernel sources..."
+    krunvm start "${VM_NAME}" /usr/bin/make -- "${KERNEL_DIR_NAME}"
+
+    # Step 2: merge the OpenShell config fragment
+    echo "  Merging OpenShell kernel config fragment..."
+    krunvm start "${VM_NAME}" /usr/bin/bash -- \
+        /work/openshell-kconfig-hook.sh "/work/${KERNEL_DIR_NAME}" /work/openshell.kconfig
+
+    # Step 3: build the kernel and generate the C bundle
+    echo "  Building kernel (this is the slow part)..."
+    krunvm start "${VM_NAME}" /usr/bin/make -- -j4
+
+    echo "  Cleaning up VM..."
+    krunvm delete "${VM_NAME}"
+
+    # Clean up temp files from the libkrunfw tree
+    rm -f "${LIBKRUNFW_DIR}/openshell.kconfig"
+
+    if [ ! -f "${LIBKRUNFW_DIR}/kernel.c" ]; then
+        echo "ERROR: kernel.c was not produced — build failed" >&2
+        exit 1
+    fi
+
+    # Compile the shared library on the host (uses host cc for a .dylib)
+    echo "==> Compiling libkrunfw.dylib on host..."
+    ABI_VERSION=$(grep -oE 'ABI_VERSION\s*=\s*[0-9]+' Makefile | head -1 | sed 's/[^0-9]//g')
+    cc -fPIC -DABI_VERSION="${ABI_VERSION}" -shared -o "libkrunfw.${ABI_VERSION}.dylib" kernel.c
+else
+    # On Linux, we can do everything natively in three steps:
+
+    # Step 1: prepare kernel sources
+    echo "  Preparing kernel sources..."
+    make "${KERNEL_DIR_NAME}"
+
+    # Step 2: merge config fragment
+    echo "==> Merging OpenShell kernel config fragment..."
+    bash "${MERGE_HOOK}" "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}" "${KERNEL_CONFIG_FRAGMENT}"
+
+    # Step 3: build the kernel and shared library
+    make -j"$(nproc)" "$(grep -oE 'KRUNFW_BINARY_Linux\s*=\s*\S+' Makefile | head -1 | sed 's/[^=]*= *//')" || \
+    make -j"$(nproc)" libkrunfw.so
+fi
+
+# ── Stage output artifacts ──────────────────────────────────────────────
+
+echo "==> Staging artifacts..."
+mkdir -p "${OUTPUT_DIR}"
+
+# Find the built library — check versioned names (e.g. libkrunfw.5.dylib) first
+BUILT_LIB=""
+for candidate in \
+    "${LIBKRUNFW_DIR}"/libkrunfw*.${LIB_EXT} \
+    "${LIBKRUNFW_DIR}/libkrunfw.${LIB_EXT}" \
+    "${LIBKRUNFW_DIR}/target/release/libkrunfw.${LIB_EXT}" \
+    "${LIBKRUNFW_DIR}/build/libkrunfw.${LIB_EXT}"; do
+    if [ -f "$candidate" ]; then
+        BUILT_LIB="$candidate"
+        break
+    fi
+done
+
+if [ -z "$BUILT_LIB" ]; then
+    echo "ERROR: Could not find built libkrunfw.${LIB_EXT}" >&2
+    echo "  Searched in ${LIBKRUNFW_DIR}/ for libkrunfw*.${LIB_EXT}"
+    exit 1
+fi
+
+echo "  Found library: ${BUILT_LIB}"
+
+# Compute SHA-256 (shasum on macOS, sha256sum on Linux)
+if command -v sha256sum &>/dev/null; then
+    ARTIFACT_HASH=$(sha256sum "${BUILT_LIB}" | cut -d' ' -f1)
+else
+    ARTIFACT_HASH=$(shasum -a 256 "${BUILT_LIB}" | cut -d' ' -f1)
+fi
+ARTIFACT_HASH_SHORT="${ARTIFACT_HASH:0:12}"
+
+# Copy the library — always stage as libkrunfw.dylib / libkrunfw.so
+# (the base name the runtime loader expects) plus the original name
+cp "${BUILT_LIB}" "${OUTPUT_DIR}/libkrunfw.${LIB_EXT}"
+BUILT_BASENAME="$(basename "${BUILT_LIB}")"
+if [ "${BUILT_BASENAME}" != "libkrunfw.${LIB_EXT}" ]; then
+    cp "${BUILT_LIB}" "${OUTPUT_DIR}/${BUILT_BASENAME}"
+fi
+
+# Copy the kernel config that was actually used (for reproducibility)
+KERNEL_SRC_DIR=""
+for candidate in \
+    "${LIBKRUNFW_DIR}/linux-"* \
+    "${LIBKRUNFW_DIR}/build/linux-"* \
+    "${LIBKRUNFW_DIR}/kernel/linux-"*; do
+    if [ -d "$candidate" ] && [ -f "${candidate}/.config" ]; then
+        KERNEL_SRC_DIR="$candidate"
+        break
+    fi
+done
+
+if [ -n "$KERNEL_SRC_DIR" ] && [ -f "${KERNEL_SRC_DIR}/.config" ]; then
+    cp "${KERNEL_SRC_DIR}/.config" "${OUTPUT_DIR}/kernel.config"
+fi
+
+# Copy our fragment for reference
+cp "${KERNEL_CONFIG_FRAGMENT}" "${OUTPUT_DIR}/openshell.kconfig"
+
+# ── Write provenance metadata ──────────────────────────────────────────
+
+cat > "${OUTPUT_DIR}/provenance.json" << EOF
+{
+  "artifact": "libkrunfw-custom",
+  "version": "0.1.0-openshell",
+  "build_timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
+  "libkrunfw_repo": "${LIBKRUNFW_REPO}",
+  "libkrunfw_ref": "${LIBKRUNFW_REF}",
+  "libkrunfw_commit": "${LIBKRUNFW_COMMIT}",
+  "kernel_version": "${KERNEL_VERSION:-unknown}",
+  "kernel_config_fragment": "openshell.kconfig",
+  "artifact_sha256": "${ARTIFACT_HASH}",
+  "host_os": "$(uname -s)",
+  "host_arch": "$(uname -m)",
+  "builder": "build-custom-libkrunfw.sh"
+}
+EOF
+
+echo ""
+echo "==> Build complete"
+echo "    Library:    ${OUTPUT_DIR}/libkrunfw.${LIB_EXT}"
+echo "    SHA256:     ${ARTIFACT_HASH_SHORT}..."
+echo "    Provenance: ${OUTPUT_DIR}/provenance.json"
+echo "    Commit:     ${LIBKRUNFW_SHORT}"
+echo ""
+echo "To use this runtime:"
+echo "  export OPENSHELL_VM_RUNTIME_SOURCE_DIR=${OUTPUT_DIR}"
+echo "  mise run vm:build"
diff --git a/crates/openshell-vm/runtime/kernel/openshell.kconfig b/crates/openshell-vm/runtime/kernel/openshell.kconfig
new file mode 100644
index 000000000..cc219f50d
--- /dev/null
+++ b/crates/openshell-vm/runtime/kernel/openshell.kconfig
@@ -0,0 +1,121 @@
+# Custom kernel config fragment for libkrunfw (OpenShell VM)
+#
+# This fragment is applied on top of libkrunfw's base kernel config
+# to enable bridge CNI, netfilter/iptables, and conntrack support
+# required for Kubernetes pod networking in the VM.
+#
+# Apply with: scripts/merge-kconfig.sh <base_config> <this_file>
+#
+# See also: check-vm-capabilities.sh for runtime verification.
+
+# ── Network Namespaces (required for pod isolation) ─────────────────────
+CONFIG_NET_NS=y
+CONFIG_NAMESPACES=y
+
+# ── Virtual Ethernet (veth pairs for pod networking) ────────────────────
+CONFIG_VETH=y
+
+# ── Linux Bridge (required for bridge CNI plugin) ──────────────────────
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_NETFILTER=y
+CONFIG_BRIDGE_IGMP_SNOOPING=y
+
+# ── Netfilter framework ────────────────────────────────────────────────
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_INGRESS=y
+CONFIG_NETFILTER_NETLINK=y
+CONFIG_NETFILTER_NETLINK_QUEUE=y
+CONFIG_NETFILTER_NETLINK_LOG=y
+
+# ── Connection tracking (required for NAT and kube-proxy) ──────────────
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+
+# ── Netfilter xtables match modules (required by kube-proxy & kubelet) ─
+# kube-proxy uses xt_conntrack for stateful rules and xt_comment for
+# labeling chains. Without these, iptables fails with:
+#   "Couldn't load match 'conntrack': No such file or directory"
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
+CONFIG_NETFILTER_XT_MATCH_RECENT=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_TARGET_LOG=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+
+# ── NAT (required for service VIP / DNAT / SNAT) ──────────────────────
+CONFIG_NF_NAT=y
+CONFIG_NF_NAT_MASQUERADE_IPV4=y
+
+# ── iptables (CNI bridge masquerade + compat) ──────────────────────────
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_REJECT=y
+
+# ── nftables (kube-proxy nftables mode — primary proxy backend) ─────────
+# kube-proxy nftables proxier requires: numgen (random LB), fib (local
+# address detection), counter, ct, nat, masq, reject, limit, redir.
+CONFIG_NF_TABLES=y
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_CT=y
+CONFIG_NFT_NAT=y
+CONFIG_NFT_MASQ=y
+CONFIG_NFT_REJECT=y
+CONFIG_NFT_COMPAT=y
+CONFIG_NFT_NUMGEN=y
+CONFIG_NFT_FIB_IPV4=y
+CONFIG_NFT_FIB_IPV6=y
+CONFIG_NFT_LIMIT=y
+CONFIG_NFT_REDIR=y
+CONFIG_NFT_TPROXY=y
+
+# ── IP forwarding and routing (required for pod-to-pod) ────────────────
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_NET_IP_TUNNEL=y
+
+# ── IPVS (optional: kube-proxy IPVS mode) ─────────────────────────────
+CONFIG_IP_VS=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_RR=y
+CONFIG_IP_VS_WRR=y
+CONFIG_IP_VS_SH=y
+CONFIG_IP_VS_NFCT=y
+
+# ── Misc networking required by Kubernetes ─────────────────────────────
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CGROUP_NET_CLASSID=y
+
+# ── Dummy interface (fallback networking) ──────────────────────────────
+CONFIG_DUMMY=y
+
+# ── TUN/TAP (used by some CNI plugins) ────────────────────────────────
+CONFIG_TUN=y
+
+# ── Cgroups (already in base, ensure v2 is available) ──────────────────
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_MEMCG=y
+
+# ── Security features required by the sandbox runtime ───────────────────
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_SECCOMP_FILTER=y
diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh
new file mode 100755
index 000000000..16a0a23de
--- /dev/null
+++ b/crates/openshell-vm/scripts/build-rootfs.sh
@@ -0,0 +1,763 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Build a Ubuntu rootfs for the openshell-vm microVM.
+#
+# By default, produces a fully pre-initialized rootfs with k3s pre-installed,
+# the OpenShell helm chart and manifests baked in, container images pre-loaded,
+# AND a fully initialized k3s cluster state (database, TLS, images imported,
+# all services deployed). On first VM boot, k3s resumes from this pre-baked
+# state instead of cold-starting, achieving ~3-5s startup times.
+#
+# With --base, produces a lightweight rootfs (~200-300MB) with:
+# - Base Ubuntu with k3s binary
+# - OpenShell supervisor binary
+# - Helm charts and Kubernetes manifests
+# - NO pre-loaded container images (pulled on demand)
+# - NO pre-initialized k3s state (cold start on first boot)
+# First boot will be slower (~30-60s) as k3s initializes and pulls images.
+#
+# Supports aarch64 and x86_64 guest architectures. The target architecture
+# is auto-detected from the host but can be overridden with --arch.
+#
+# Usage:
+#   ./build-rootfs.sh [--base] [--arch aarch64|x86_64] [output_dir]
+#
+# If output_dir is omitted, the rootfs is built under target/rootfs-build.
+#
+# Requires: Docker (or compatible container runtime), curl, helm
+# Full mode (default) also requires: zstd, sqlite3, a built openshell-vm binary
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Source pinned dependency versions (digests, checksums, commit SHAs).
+# Environment variables override pins — see pins.env for details.
+PINS_FILE="${SCRIPT_DIR}/../pins.env"
+if [ -f "$PINS_FILE" ]; then
+    # shellcheck source=../pins.env
+    source "$PINS_FILE"
+fi
+
+# ── Argument parsing ───────────────────────────────────────────────────
+BASE_ONLY=false
+GUEST_ARCH=""
+POSITIONAL_ARGS=()
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --base)
+            BASE_ONLY=true; shift ;;
+        --arch)
+            GUEST_ARCH="$2"; shift 2 ;;
+        *)
+            POSITIONAL_ARGS+=("$1"); shift ;;
+    esac
+done
+
+# ── Architecture detection ─────────────────────────────────────────────
+# Allow override via --arch flag; default to host architecture.
+if [ -z "$GUEST_ARCH" ]; then
+    case "$(uname -m)" in
+        aarch64|arm64) GUEST_ARCH="aarch64" ;;
+        x86_64)        GUEST_ARCH="x86_64" ;;
+        *)
+            echo "ERROR: Unsupported host architecture: $(uname -m)" >&2
+            echo "       Use --arch aarch64 or --arch x86_64 to override." >&2
+            exit 1
+            ;;
+    esac
+fi
+
+case "$GUEST_ARCH" in
+    aarch64)
+        DOCKER_PLATFORM="linux/arm64"
+        K3S_BINARY_SUFFIX="-arm64"
+        K3S_CHECKSUM_VAR="K3S_ARM64_SHA256"
+        RUST_TARGET="aarch64-unknown-linux-gnu"
+        ;;
+    x86_64)
+        DOCKER_PLATFORM="linux/amd64"
+        K3S_BINARY_SUFFIX=""    # x86_64 binary has no suffix
+        K3S_CHECKSUM_VAR="K3S_AMD64_SHA256"
+        RUST_TARGET="x86_64-unknown-linux-gnu"
+        ;;
+    *)
+        echo "ERROR: Unsupported guest architecture: ${GUEST_ARCH}" >&2
+        echo "       Supported: aarch64, x86_64" >&2
+        exit 1
+        ;;
+esac
+
+# Project root (two levels up from crates/openshell-vm/scripts/)
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
+DEFAULT_ROOTFS="${PROJECT_ROOT}/target/rootfs-build"
+ROOTFS_DIR="${POSITIONAL_ARGS[0]:-${DEFAULT_ROOTFS}}"
+CONTAINER_NAME="krun-rootfs-builder"
+BASE_IMAGE_TAG="krun-rootfs:openshell-vm"
+# K3S_VERSION uses the semver "+" form for GitHub releases.
+# The mise env may provide the Docker-tag form with "-" instead of "+";
+# normalise to "+" so the GitHub download URL works.
+K3S_VERSION="${K3S_VERSION:-v1.35.2+k3s1}"
+K3S_VERSION="${K3S_VERSION//-k3s/+k3s}"
+
+# Container images to pre-load into k3s (full mode only).
+# AGENT_SANDBOX_IMAGE and COMMUNITY_SANDBOX_IMAGE are digest-pinned in pins.env.
+# SERVER_IMAGE is intentionally unpinned (local dev artifact).
+IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-openshell}"
+IMAGE_TAG="${IMAGE_TAG:-dev}"
+SERVER_IMAGE="${IMAGE_REPO_BASE}/gateway:${IMAGE_TAG}"
+
+# Cross-platform checksum helper
+verify_checksum() {
+    local expected="$1" file="$2"
+    if command -v sha256sum &>/dev/null; then
+        echo "${expected}  ${file}" | sha256sum -c -
+    else
+        echo "${expected}  ${file}" | shasum -a 256 -c -
+    fi
+}
+
+if [ "$BASE_ONLY" = true ]; then
+    echo "==> Building base openshell-vm rootfs"
+    echo "    Guest arch:  ${GUEST_ARCH}"
+    echo "    k3s version: ${K3S_VERSION}"
+    echo "    Output:      ${ROOTFS_DIR}"
+    echo "    Mode:        base (no pre-loaded images, cold start)"
+else
+    echo "==> Building openshell-vm rootfs"
+    echo "    Guest arch:  ${GUEST_ARCH}"
+    echo "    k3s version: ${K3S_VERSION}"
+    echo "    Images:      ${SERVER_IMAGE}, ${COMMUNITY_SANDBOX_IMAGE}"
+    echo "    Output:      ${ROOTFS_DIR}"
+    echo "    Mode:        full (pre-loaded images, pre-initialized)"
+fi
+echo ""
+
+# ── Check for running VM ────────────────────────────────────────────────
+# If an openshell-vm is using this rootfs via virtio-fs, wiping the rootfs
+# corrupts the VM's filesystem (e.g. /var disappears) causing cascading
+# k3s failures. We use two checks:
+#
+# 1. flock: The Rust openshell-vm process holds an exclusive flock on the lock
+#    file for its entire lifetime. This is the primary guard — it works
+#    even if the state file was deleted, and the OS releases the lock
+#    automatically when the process dies (including SIGKILL).
+#
+# 2. State file: Fallback check for the PID in the state file. This
+#    catches VMs launched before the flock guard was added.
+
+VM_LOCK_FILE="$(dirname "${ROOTFS_DIR}")/$(basename "${ROOTFS_DIR}")-vm.lock"
+if [ -f "${VM_LOCK_FILE}" ]; then
+    # Try to acquire the lock non-blocking. Use Python's fcntl.flock()
+    # because the `flock` CLI tool is not available on macOS.
+    if ! python3 -c "
+import fcntl, os, sys
+fd = os.open(sys.argv[1], os.O_RDONLY)
+try:
+    fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+    fcntl.flock(fd, fcntl.LOCK_UN)
+except BlockingIOError:
+    sys.exit(1)
+finally:
+    os.close(fd)
+" "${VM_LOCK_FILE}" 2>/dev/null; then
+        HOLDER_PID=$(cat "${VM_LOCK_FILE}" 2>/dev/null | tr -d '[:space:]')
+        echo ""
+        echo "ERROR: An openshell-vm (pid ${HOLDER_PID:-unknown}) holds a lock on this rootfs."
+        echo "       Wiping the rootfs while the VM is running will corrupt its"
+        echo "       filesystem and cause k3s failures."
+        echo ""
+        echo "       Stop the VM first:  kill ${HOLDER_PID:-<pid>}"
+        echo "       Then re-run this script."
+        echo ""
+        exit 1
+    fi
+fi
+
+VM_STATE_FILE="$(dirname "${ROOTFS_DIR}")/$(basename "${ROOTFS_DIR}")-vm-state.json"
+if [ -f "${VM_STATE_FILE}" ]; then
+    VM_PID=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1]))['pid'])" "${VM_STATE_FILE}" 2>/dev/null || echo "")
+    if [ -n "${VM_PID}" ] && kill -0 "${VM_PID}" 2>/dev/null; then
+        echo ""
+        echo "ERROR: An openshell-vm is running (pid ${VM_PID}) using this rootfs."
+        echo "       Wiping the rootfs while the VM is running will corrupt its"
+        echo "       filesystem and cause k3s failures."
+        echo ""
+        echo "       Stop the VM first:  kill ${VM_PID}"
+        echo "       Then re-run this script."
+        echo ""
+        exit 1
+    else
+        # Stale state file — VM is no longer running. Clean it up.
+        rm -f "${VM_STATE_FILE}"
+    fi
+fi
+
+# ── Download k3s binary (outside Docker — much faster) ─────────────────
+
+K3S_BIN="/tmp/k3s-${GUEST_ARCH}-${K3S_VERSION}"
+if [ -f "${K3S_BIN}" ]; then
+    echo "==> Using cached k3s binary: ${K3S_BIN}"
+else
+    echo "==> Downloading k3s ${K3S_VERSION} for ${GUEST_ARCH}..."
+    curl -fSL "https://github.com/k3s-io/k3s/releases/download/${K3S_VERSION}/k3s${K3S_BINARY_SUFFIX}" \
+        -o "${K3S_BIN}"
+    chmod +x "${K3S_BIN}"
+fi
+
+# Verify k3s binary integrity.
+K3S_CHECKSUM="${!K3S_CHECKSUM_VAR:-}"
+if [ -n "${K3S_CHECKSUM}" ]; then
+    echo "==> Verifying k3s binary checksum..."
+    verify_checksum "${K3S_CHECKSUM}" "${K3S_BIN}"
+else
+    echo "WARNING: ${K3S_CHECKSUM_VAR} not set, skipping checksum verification"
+fi
+
+# ── Build base image with dependencies ─────────────────────────────────
+
+# Clean up any previous run
+docker rm -f "${CONTAINER_NAME}" 2>/dev/null || true
+
+echo "==> Building base image..."
+docker build --platform "${DOCKER_PLATFORM}" -t "${BASE_IMAGE_TAG}" \
+    --build-arg "BASE_IMAGE=${VM_BASE_IMAGE}" -f - . <<'DOCKERFILE'
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        ca-certificates \
+        e2fsprogs \
+        iptables \
+        iproute2 \
+        python3 \
+        busybox-static \
+        sqlite3 \
+        util-linux \
+        zstd \
+    && rm -rf /var/lib/apt/lists/*
+# busybox-static provides udhcpc for DHCP inside the VM.
+RUN mkdir -p /usr/share/udhcpc && \
+    ln -sf /bin/busybox /sbin/udhcpc
+RUN mkdir -p /var/lib/rancher/k3s /etc/rancher/k3s
+DOCKERFILE
+
+# Create a container and export the filesystem
+echo "==> Creating container..."
+docker create --platform "${DOCKER_PLATFORM}" --name "${CONTAINER_NAME}" "${BASE_IMAGE_TAG}" /bin/true
+
+echo "==> Exporting filesystem..."
+# Previous builds may leave overlayfs work/ dirs with permissions that
+# prevent rm on macOS. Force-fix permissions before removing.
+if [ -d "${ROOTFS_DIR}" ]; then
+    chmod -R u+rwx "${ROOTFS_DIR}" 2>/dev/null || true
+    rm -rf "${ROOTFS_DIR}"
+fi
+mkdir -p "${ROOTFS_DIR}"
+docker export "${CONTAINER_NAME}" | tar -C "${ROOTFS_DIR}" -xf -
+
+docker rm "${CONTAINER_NAME}"
+
+# ── Inject k3s binary ────────────────────────────────────────────────
+
+echo "==> Injecting k3s binary..."
+cp "${K3S_BIN}" "${ROOTFS_DIR}/usr/local/bin/k3s"
+chmod +x "${ROOTFS_DIR}/usr/local/bin/k3s"
+ln -sf /usr/local/bin/k3s "${ROOTFS_DIR}/usr/local/bin/kubectl"
+
+# k3s self-extracts runtime binaries (containerd, runc, CNI plugins,
+# coreutils, etc.) into a versioned data directory the first time it
+# runs. On the pre-initialized rootfs these were extracted during the
+# Docker build or VM pre-init phase. docker export and macOS virtio-fs
+# can strip execute bits from Linux ELF binaries, so fix them here.
+echo "    Fixing execute permissions on k3s data binaries..."
+chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/* 2>/dev/null || true
+chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/aux/* 2>/dev/null || true
+
+# ── Inject scripts ────────────────────────────────────────────────────
+
+echo "==> Injecting scripts..."
+mkdir -p "${ROOTFS_DIR}/srv"
+cp "${SCRIPT_DIR}/openshell-vm-init.sh" "${ROOTFS_DIR}/srv/openshell-vm-init.sh"
+chmod +x "${ROOTFS_DIR}/srv/openshell-vm-init.sh"
+
+# Inject VM capability checker for runtime diagnostics.
+cp "${SCRIPT_DIR}/check-vm-capabilities.sh" "${ROOTFS_DIR}/srv/check-vm-capabilities.sh"
+chmod +x "${ROOTFS_DIR}/srv/check-vm-capabilities.sh"
+
+# Inject the openshell-vm exec agent used by `openshell-vm exec`.
+cp "${SCRIPT_DIR}/openshell-vm-exec-agent.py" "${ROOTFS_DIR}/srv/openshell-vm-exec-agent.py"
+chmod +x "${ROOTFS_DIR}/srv/openshell-vm-exec-agent.py"
+
+# ── Build and inject openshell-sandbox supervisor binary ─────────────
+# The supervisor binary runs inside every sandbox pod. It is side-loaded
+# from the node filesystem via a read-only hostPath volume mount at
+# /opt/openshell/bin. In the Docker-based gateway this is built in the
+# Dockerfile.cluster supervisor-builder stage; here we cross-compile
+# from the host using cargo-zigbuild.
+
+SUPERVISOR_TARGET="${RUST_TARGET}"
+SUPERVISOR_BIN="${PROJECT_ROOT}/target/${SUPERVISOR_TARGET}/release/openshell-sandbox"
+
+echo "==> Building openshell-sandbox supervisor binary (${SUPERVISOR_TARGET})..."
+if ! command -v cargo-zigbuild >/dev/null 2>&1; then
+    echo "ERROR: cargo-zigbuild is not installed."
+    echo "       Install it with: cargo install cargo-zigbuild"
+    echo "       Also requires: zig (brew install zig)"
+    exit 1
+fi
+
+cargo zigbuild --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" \
+    --manifest-path "${PROJECT_ROOT}/Cargo.toml" 2>&1 | tail -5
+
+if [ ! -f "${SUPERVISOR_BIN}" ]; then
+    echo "ERROR: supervisor binary not found at ${SUPERVISOR_BIN}"
+    exit 1
+fi
+
+echo "    Injecting supervisor binary into rootfs..."
+mkdir -p "${ROOTFS_DIR}/opt/openshell/bin"
+cp "${SUPERVISOR_BIN}" "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox"
+chmod +x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox"
+echo "    Size: $(du -h "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" | cut -f1)"
+
+# ── Package and inject helm chart ────────────────────────────────────
+
+HELM_CHART_DIR="${PROJECT_ROOT}/deploy/helm/openshell"
+CHART_DEST="${ROOTFS_DIR}/var/lib/rancher/k3s/server/static/charts"
+
+if [ -d "${HELM_CHART_DIR}" ]; then
+    echo "==> Packaging helm chart..."
+    mkdir -p "${CHART_DEST}"
+    helm package "${HELM_CHART_DIR}" -d "${CHART_DEST}"
+    echo "    $(ls "${CHART_DEST}"/*.tgz 2>/dev/null | xargs -I{} basename {})"
+    # Also stage to /opt/openshell/charts/ so the init script can
+    # restore them after a --reset wipes server/static/charts/.
+    mkdir -p "${ROOTFS_DIR}/opt/openshell/charts"
+    cp "${CHART_DEST}"/*.tgz "${ROOTFS_DIR}/opt/openshell/charts/"
+else
+    echo "WARNING: Helm chart not found at ${HELM_CHART_DIR}, skipping"
+fi
+
+# ── Inject Kubernetes manifests ──────────────────────────────────────
+# These are copied to /opt/openshell/manifests/ (staging). openshell-vm-init.sh
+# moves them to /var/lib/rancher/k3s/server/manifests/ at boot so the
+# k3s Helm Controller auto-deploys them.
+
+MANIFEST_SRC="${PROJECT_ROOT}/deploy/kube/manifests"
+MANIFEST_DEST="${ROOTFS_DIR}/opt/openshell/manifests"
+
+echo "==> Injecting Kubernetes manifests..."
+mkdir -p "${MANIFEST_DEST}"
+
+for manifest in openshell-helmchart.yaml agent-sandbox.yaml; do
+    if [ -f "${MANIFEST_SRC}/${manifest}" ]; then
+        cp "${MANIFEST_SRC}/${manifest}" "${MANIFEST_DEST}/"
+        echo "    ${manifest}"
+    else
+        echo "WARNING: ${manifest} not found in ${MANIFEST_SRC}"
+    fi
+done
+
+# ── Base mode: mark rootfs type and skip pre-loading ───────────────────
+
+if [ "$BASE_ONLY" = true ]; then
+    # k3s expects this directory to exist for airgap image loading.
+    mkdir -p "${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images"
+
+    # Mark as base (not pre-initialized). The init script checks for
+    # this file to determine if cold start is expected.
+    echo "base" > "${ROOTFS_DIR}/opt/openshell/.rootfs-type"
+
+    # ── Verify ─────────────────────────────────────────────────────────
+    if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then
+        echo "ERROR: k3s binary not found in rootfs."
+        exit 1
+    fi
+
+    if [ ! -x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" ]; then
+        echo "ERROR: openshell-sandbox supervisor binary not found in rootfs."
+        exit 1
+    fi
+
+    echo ""
+    echo "==> Base rootfs ready at: ${ROOTFS_DIR}"
+    echo "    Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)"
+    echo "    Type: base (cold start, images pulled on demand)"
+    echo ""
+    echo "Note: First boot will take ~30-60s as k3s initializes."
+    echo "      Container images will be pulled from registries on first use."
+    exit 0
+fi
+
+# ══════════════════════════════════════════════════════════════════════════
+# Full mode: pre-load images and pre-initialize k3s cluster state
+# ══════════════════════════════════════════════════════════════════════════
+
+# ── Pre-load container images ────────────────────────────────────────
+# Pull images for the target architecture and save as tarballs in the
+# k3s airgap images directory. k3s auto-imports from
+# /var/lib/rancher/k3s/agent/images/ on startup, so no internet access
+# is needed at boot time.
+#
+# Tarballs are cached in a persistent directory outside the rootfs so
+# they survive rebuilds. This avoids re-pulling and re-saving ~1 GiB
+# of images each time.
+
+IMAGES_DIR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images"
+IMAGE_CACHE_DIR="${XDG_CACHE_HOME:-${HOME}/.cache}/openshell/openshell-vm/images"
+mkdir -p "${IMAGES_DIR}" "${IMAGE_CACHE_DIR}"
+
+echo "==> Pre-loading container images (${GUEST_ARCH})..."
+
+pull_and_save() {
+    local image="$1"
+    local output="$2"
+    local cache="${IMAGE_CACHE_DIR}/$(basename "${output}")"
+
+    # Use cached tarball if available.
+    if [ -f "${cache}" ]; then
+        echo "    cached: $(basename "${output}")"
+        cp "${cache}" "${output}"
+        return 0
+    fi
+
+    # Try to pull; if the registry is unavailable, fall back to the
+    # local Docker image cache (image may exist from a previous pull).
+    echo "    pulling: ${image}..."
+    if ! docker pull --platform "${DOCKER_PLATFORM}" "${image}" --quiet 2>/dev/null; then
+        echo "    pull failed, checking local Docker cache..."
+        if ! docker image inspect "${image}" >/dev/null 2>&1; then
+            echo "ERROR: image ${image} not available locally or from registry"
+            exit 1
+        fi
+        echo "    using locally cached image"
+    fi
+
+    echo "    saving:  $(basename "${output}")..."
+    # Pipe through zstd for faster decompression and smaller tarballs.
+    # k3s auto-imports .tar.zst files from the airgap images directory.
+    # -T0 uses all CPU cores; -3 is a good speed/ratio tradeoff.
+    docker save "${image}" | zstd -T0 -3 -o "${output}"
+    # Cache for next rebuild.
+    cp "${output}" "${cache}"
+}
+
+pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/openshell-server.tar.zst"
+pull_and_save "${AGENT_SANDBOX_IMAGE}" "${IMAGES_DIR}/agent-sandbox-controller.tar.zst"
+pull_and_save "${COMMUNITY_SANDBOX_IMAGE}" "${IMAGES_DIR}/community-sandbox-base.tar.zst"
+
+# ── Pre-initialize k3s cluster state ─────────────────────────────────
+# Boot k3s inside a Docker container using the rootfs we just built.
+# Wait for it to fully initialize (import images, deploy manifests,
+# create database), then capture the state back into the rootfs.
+#
+# This eliminates cold-start latency: on VM boot, k3s finds existing
+# state and resumes in ~3-5 seconds instead of 30-60s.
+
+echo ""
+echo "==> Pre-initializing k3s cluster state..."
+echo "    This boots k3s in a container, waits for full readiness,"
+echo "    then captures the initialized state into the rootfs."
+
+# Patch the HelmChart manifest for the init container (same patches
+# openshell-vm-init.sh applies at runtime).
+INIT_MANIFESTS="${ROOTFS_DIR}/var/lib/rancher/k3s/server/manifests"
+mkdir -p "${INIT_MANIFESTS}"
+
+# Copy manifests from staging to the k3s manifest directory.
+for manifest in "${MANIFEST_DEST}"/*.yaml; do
+    [ -f "$manifest" ] || continue
+    cp "$manifest" "${INIT_MANIFESTS}/"
+done
+
+# Patch HelmChart for local images and VM settings.
+HELMCHART="${INIT_MANIFESTS}/openshell-helmchart.yaml"
+if [ -f "$HELMCHART" ]; then
+    # Use local images — explicitly imported into containerd.
+    sed -i '' 's|__IMAGE_PULL_POLICY__|IfNotPresent|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|__IMAGE_PULL_POLICY__|IfNotPresent|g' "$HELMCHART"
+    sed -i '' 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g' "$HELMCHART"
+    sed -i '' 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART"
+    # Use the locally imported image references.
+    sed -i '' -E "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|" "$HELMCHART" 2>/dev/null \
+        || sed -i -E "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|" "$HELMCHART"
+    sed -i '' -E "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART" 2>/dev/null \
+        || sed -i -E "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART"
+    # Clear SSH gateway placeholders.
+    sed -i '' 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART"
+    sed -i '' 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART"
+    sed -i '' 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART"
+    sed -i '' 's|__DISABLE_TLS__|false|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|__DISABLE_TLS__|false|g' "$HELMCHART"
+    sed -i '' 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART" 2>/dev/null \
+        || sed -i 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART"
+    sed -i '' '/__CHART_CHECKSUM__/d' "$HELMCHART" 2>/dev/null \
+        || sed -i '/__CHART_CHECKSUM__/d' "$HELMCHART"
+fi
+
+# Patch agent-sandbox manifest for VM networking constraints.
+AGENT_MANIFEST="${INIT_MANIFESTS}/agent-sandbox.yaml"
+if [ -f "$AGENT_MANIFEST" ]; then
+    # Keep agent-sandbox on pod networking to avoid host port clashes.
+    # Point in-cluster client traffic at the API server node IP because
+    # kube-proxy is disabled in VM mode.
+    sed -i '' '/hostNetwork: true/d' "$AGENT_MANIFEST" 2>/dev/null \
+        || sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST"
+    sed -i '' '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" 2>/dev/null \
+        || sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST"
+    sed -i '' 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\
+        args:\
+        - -metrics-bind-address=:8082\
+        env:\
+        - name: KUBERNETES_SERVICE_HOST\
+          value: 192.168.127.2\
+        - name: KUBERNETES_SERVICE_PORT\
+          value: "6443"|g' "$AGENT_MANIFEST" 2>/dev/null \
+        || sed -i 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\
+        args:\
+        - -metrics-bind-address=:8082\
+        env:\
+        - name: KUBERNETES_SERVICE_HOST\
+          value: 192.168.127.2\
+        - name: KUBERNETES_SERVICE_PORT\
+          value: "6443"|g' "$AGENT_MANIFEST"
+    if grep -q 'hostNetwork: true' "$AGENT_MANIFEST" \
+        || grep -q 'ClusterFirstWithHostNet' "$AGENT_MANIFEST" \
+        || ! grep -q 'KUBERNETES_SERVICE_HOST' "$AGENT_MANIFEST" \
+        || ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then
+        echo "ERROR: failed to patch agent-sandbox manifest for VM networking constraints: $AGENT_MANIFEST" >&2
+        exit 1
+    fi
+fi
+
+# local-path-provisioner (deployed by k3s from local-storage.yaml) provides
+# PVC storage for sandbox workspace volumes. It requires CNI bridge
+# networking, which is now available in the VM kernel.
+
+# ── Pre-initialize using the actual libkrun VM ──────────────────────────
+# Boot the real VM with the rootfs we just built. This uses the same
+# kernel, networking, and kube-proxy config as production — eliminating
+# Docker IP mismatches, snapshotter mismatches, and the Docker volume
+# copy-back dance. The VM writes state directly into the rootfs via
+# virtio-fs.
+#
+# Requirements: the openshell-vm binary must be built and codesigned.
+# mise run vm:build handles this.
+
+GATEWAY_BIN="${PROJECT_ROOT}/target/debug/openshell-vm"
+RUNTIME_DIR="${PROJECT_ROOT}/target/debug/openshell-vm.runtime"
+
+if [ ! -x "${GATEWAY_BIN}" ]; then
+    echo "ERROR: openshell-vm binary not found at ${GATEWAY_BIN}"
+    echo "       Run: mise run vm:build"
+    exit 1
+fi
+
+if [ ! -d "${RUNTIME_DIR}" ]; then
+    echo "ERROR: VM runtime bundle not found at ${RUNTIME_DIR}"
+    echo "       Run: mise run vm:build"
+    exit 1
+fi
+
+# Helper: run a command inside the VM via the exec agent.
+vm_exec() {
+    if [ "$(uname -s)" = "Darwin" ]; then
+        DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}" \
+            "${GATEWAY_BIN}" --rootfs "${ROOTFS_DIR}" exec -- "$@" 2>&1
+    else
+        LD_LIBRARY_PATH="${RUNTIME_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" \
+            "${GATEWAY_BIN}" --rootfs "${ROOTFS_DIR}" exec -- "$@" 2>&1
+    fi
+}
+
+# Ensure no stale VM is using this rootfs.
+echo "    Starting VM for pre-initialization..."
+if [ "$(uname -s)" = "Darwin" ]; then
+    export DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}"
+else
+    export LD_LIBRARY_PATH="${RUNTIME_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
+fi
+# Pre-initialize directly on virtio-fs. Runtime boots attach a separate
+# block-backed state disk and seed it from the rootfs on first launch.
+OPENSHELL_VM_DISABLE_STATE_DISK=1 "${GATEWAY_BIN}" --rootfs "${ROOTFS_DIR}" --reset &
+VM_PID=$!
+
+# Ensure the VM is cleaned up on script exit.
+cleanup_vm() {
+    if kill -0 "${VM_PID}" 2>/dev/null; then
+        echo "    Stopping VM (pid ${VM_PID})..."
+        kill "${VM_PID}" 2>/dev/null || true
+        wait "${VM_PID}" 2>/dev/null || true
+    fi
+}
+trap cleanup_vm EXIT
+
+# Wait for the exec agent to become reachable.
+echo "    Waiting for VM exec agent..."
+for i in $(seq 1 120); do
+    if vm_exec true >/dev/null 2>&1; then
+        echo "    Exec agent ready (${i}s)"
+        break
+    fi
+    if [ "$i" -eq 120 ]; then
+        echo "ERROR: VM exec agent did not become reachable in 120s"
+        exit 1
+    fi
+    sleep 1
+done
+
+# Wait for containerd to be ready.
+echo "    Waiting for containerd..."
+for i in $(seq 1 60); do
+    if vm_exec k3s ctr version >/dev/null 2>&1; then
+        echo "    Containerd ready (${i}s)"
+        break
+    fi
+    if [ "$i" -eq 60 ]; then
+        echo "ERROR: containerd did not become ready in 60s"
+        exit 1
+    fi
+    sleep 1
+done
+
+# Wait for the openshell namespace (Helm controller creates it).
+echo "    Waiting for openshell namespace..."
+for i in $(seq 1 180); do
+    if vm_exec kubectl get namespace openshell -o name 2>/dev/null | grep -q openshell; then
+        echo "    Namespace ready (${i}s)"
+        break
+    fi
+    if [ "$i" -eq 180 ]; then
+        echo "ERROR: openshell namespace did not appear in 180s"
+        exit 1
+    fi
+    sleep 1
+done
+
+# Wait for the openshell StatefulSet to have a ready replica.
+# The VM init script generates PKI and writes TLS secrets manifests
+# automatically — no host-side PKI generation needed.
+echo "    Waiting for openshell pod to be ready..."
+for i in $(seq 1 180); do
+    ready=$(vm_exec kubectl -n openshell get statefulset openshell \
+        -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
+    if [ "$ready" = "1" ]; then
+        echo "    OpenShell pod ready (${i}s)"
+        break
+    fi
+    if [ "$i" -eq 180 ]; then
+        echo "WARNING: openshell pod not ready after 180s, continuing anyway"
+        vm_exec kubectl -n openshell get pods 2>/dev/null | sed 's/^/    /' || true
+        break
+    fi
+    sleep 1
+done
+
+# Pre-unpack container images so the overlayfs snapshotter has ready-to-use
+# snapshots on first boot. The snapshotter now runs directly on virtio-fs,
+# so these unpacked layers persist across VM restarts — eliminating the
+# per-boot layer extraction that previously added ~3-5s per container.
+echo "    Pre-unpacking container images..."
+for img in \
+    "ghcr.io/nvidia/openshell-community/sandboxes/base:latest" \
+    "ghcr.io/nvidia/openshell/gateway:latest"; do
+    if vm_exec k3s ctr -n k8s.io images ls -q 2>/dev/null | grep -qF "$img"; then
+        echo "      unpacking: $img"
+        vm_exec k3s ctr -n k8s.io run --rm "$img" "pre-unpack-$(date +%s)" true 2>/dev/null || true
+    fi
+done
+echo "    Image pre-unpack complete."
+
+# Stop the VM so the kine SQLite DB is flushed.
+echo "    Stopping VM..."
+kill "${VM_PID}" 2>/dev/null || true
+wait "${VM_PID}" 2>/dev/null || true
+
+# Surgically clean the kine SQLite DB. Runtime objects (pods, events,
+# leases) created during pre-initialization would cause the VM's kubelet
+# to reconcile against an empty containerd on first real boot.
+#
+# NOTE: This is build-time cleanup only — it produces a clean rootfs
+# image. At runtime, state.db is preserved across VM restarts so that
+# pods and other cluster objects persist. The init script
+# (openshell-vm-init.sh) handles stale bootstrap lock cleanup via
+# sqlite3, and the host-side Rust code (exec.rs) handles actual DB
+# corruption by removing the file.
+echo "    Cleaning runtime objects from kine DB..."
+DB="${ROOTFS_DIR}/var/lib/rancher/k3s/server/db/state.db"
+if [ -f "$DB" ]; then
+    echo "    Before: $(sqlite3 "$DB" "SELECT COUNT(*) FROM kine;") kine records"
+    sqlite3 "$DB" <<'EOSQL'
+DELETE FROM kine WHERE name LIKE '/registry/pods/%';
+DELETE FROM kine WHERE name LIKE '/registry/events/%';
+DELETE FROM kine WHERE name LIKE '/registry/leases/%';
+DELETE FROM kine WHERE name LIKE '/registry/endpointslices/%';
+DELETE FROM kine WHERE name LIKE '/registry/masterleases/%';
+PRAGMA wal_checkpoint(TRUNCATE);
+VACUUM;
+EOSQL
+    echo "    After:  $(sqlite3 "$DB" "SELECT COUNT(*) FROM kine;") kine records"
+else
+    echo "WARNING: state.db not found at ${DB}"
+fi
+
+# Clean up runtime artifacts that shouldn't persist.
+echo "    Cleaning runtime artifacts..."
+rm -rf "${ROOTFS_DIR}/var/lib/rancher/k3s/server/tls/temporary-certs" 2>/dev/null || true
+rm -f  "${ROOTFS_DIR}/var/lib/rancher/k3s/server/kine.sock" 2>/dev/null || true
+find "${ROOTFS_DIR}/var/lib/rancher/k3s" -name '*.sock' -delete 2>/dev/null || true
+find "${ROOTFS_DIR}/run" -name '*.sock' -delete 2>/dev/null || true
+
+# Write sentinel file so openshell-vm-init.sh and the host-side bootstrap
+# know this rootfs has pre-initialized state.
+echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "${ROOTFS_DIR}/opt/openshell/.initialized"
+
+echo "    Pre-initialization complete."
+
+# ── Verify ────────────────────────────────────────────────────────────
+
+if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then
+    echo "ERROR: k3s binary not found in rootfs. Something went wrong."
+    exit 1
+fi
+
+if [ ! -f "${ROOTFS_DIR}/opt/openshell/.initialized" ]; then
+    echo "WARNING: Pre-initialization sentinel not found. Cold starts will be slow."
+fi
+
+if [ ! -x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" ]; then
+    echo "ERROR: openshell-sandbox supervisor binary not found in rootfs."
+    echo "       Sandbox pods will fail with CreateContainerError."
+    exit 1
+fi
+
+echo ""
+echo "==> Rootfs ready at: ${ROOTFS_DIR}"
+echo "    Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)"
+echo "    Pre-initialized: $(cat "${ROOTFS_DIR}/opt/openshell/.initialized" 2>/dev/null || echo 'no')"
+
+# Show k3s data size
+K3S_DATA="${ROOTFS_DIR}/var/lib/rancher/k3s"
+if [ -d "${K3S_DATA}" ]; then
+    echo "    k3s state: $(du -sh "${K3S_DATA}" | cut -f1)"
+fi
+
+# PKI is generated at first VM boot by the init script — not baked.
+
+# Show supervisor binary
+if [ -x "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" ]; then
+    echo "    Supervisor: $(du -h "${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox" | cut -f1)"
+fi
+
+echo ""
+echo "Next steps:"
+echo "  1. Run:  openshell-vm"
+echo "  Expected startup time: ~3-5 seconds (pre-initialized)"
diff --git a/crates/openshell-vm/scripts/check-vm-capabilities.sh b/crates/openshell-vm/scripts/check-vm-capabilities.sh
new file mode 100755
index 000000000..2e758f5e0
--- /dev/null
+++ b/crates/openshell-vm/scripts/check-vm-capabilities.sh
@@ -0,0 +1,234 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# VM Kernel Capability Checker
+#
+# Runs inside the guest VM (or a container with the same rootfs) to
+# verify that the kernel has the capabilities required for bridge CNI
+# networking, kube-proxy, and Kubernetes pod networking.
+#
+# Usage:
+#   ./check-vm-capabilities.sh [--json]
+#
+# Exit codes:
+#   0 = all required capabilities present
+#   1 = one or more required capabilities missing
+#   2 = script error
+
+set -euo pipefail
+
+JSON_OUTPUT=false
+if [ "${1:-}" = "--json" ]; then
+    JSON_OUTPUT=true
+fi
+
+PASS=0
+FAIL=0
+WARN=0
+RESULTS=()
+
+# ── Helpers ─────────────────────────────────────────────────────────────
+
+check() {
+    local name="$1"
+    local category="$2"
+    local required="$3"  # "required" or "optional"
+    local description="$4"
+    shift 4
+    local cmd=("$@")
+
+    if eval "${cmd[@]}" >/dev/null 2>&1; then
+        RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"pass\",\"required\":\"$required\",\"description\":\"$description\"}")
+        PASS=$((PASS + 1))
+        if [ "$JSON_OUTPUT" = false ]; then
+            printf "  ✓ %-40s %s\n" "$name" "$description"
+        fi
+    else
+        if [ "$required" = "required" ]; then
+            RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"fail\",\"required\":\"$required\",\"description\":\"$description\"}")
+            FAIL=$((FAIL + 1))
+            if [ "$JSON_OUTPUT" = false ]; then
+                printf "  ✗ %-40s %s (REQUIRED)\n" "$name" "$description"
+            fi
+        else
+            RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"warn\",\"required\":\"$required\",\"description\":\"$description\"}")
+            WARN=$((WARN + 1))
+            if [ "$JSON_OUTPUT" = false ]; then
+                printf "  ~ %-40s %s (optional)\n" "$name" "$description"
+            fi
+        fi
+    fi
+}
+
+check_module() {
+    local module="$1"
+    # Check /proc/modules (loaded), /proc/config.gz (builtin), or /sys/module
+    if [ -d "/sys/module/$module" ]; then
+        return 0
+    fi
+    if grep -q "^${module} " /proc/modules 2>/dev/null; then
+        return 0
+    fi
+    # Check if compiled in via /proc/config.gz or /boot/config
+    local config_key
+    config_key="CONFIG_$(echo "$module" | tr '[:lower:]-' '[:upper:]_')"
+    if [ -f /proc/config.gz ]; then
+        if zcat /proc/config.gz 2>/dev/null | grep -q "^${config_key}=[ym]"; then
+            return 0
+        fi
+    fi
+    return 1
+}
+
+# ── Capability Checks ──────────────────────────────────────────────────
+
+if [ "$JSON_OUTPUT" = false ]; then
+    echo "VM Kernel Capability Check"
+    echo "=========================="
+    echo ""
+    echo "Kernel: $(uname -r)"
+    echo ""
+fi
+
+# --- Network Namespaces ---
+if [ "$JSON_OUTPUT" = false ]; then echo "[Network Namespaces]"; fi
+
+check "net_namespace" "netns" "required" \
+    "network namespace support (CONFIG_NET_NS)" \
+    "test -d /proc/self/ns && ls /proc/self/ns/net"
+
+check "veth_pair" "netns" "required" \
+    "veth pair creation (CONFIG_VETH)" \
+    "ip link add _chk0 type veth peer name _chk1 && ip link del _chk0"
+
+# --- Linux Bridge ---
+if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Linux Bridge]"; fi
+
+check "bridge_module" "bridge" "required" \
+    "bridge device support (CONFIG_BRIDGE)" \
+    "ip link add _chkbr0 type bridge && ip link del _chkbr0"
+
+check "bridge_nf_call" "bridge" "required" \
+    "bridge netfilter (CONFIG_BRIDGE_NETFILTER)" \
+    "check_module bridge && test -f /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null || check_module br_netfilter"
+
+# --- Netfilter / iptables ---
+if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Netfilter / iptables]"; fi
+
+check "netfilter" "netfilter" "required" \
+    "netfilter framework (CONFIG_NETFILTER)" \
+    "check_module nf_conntrack || check_module ip_tables || test -d /proc/sys/net/netfilter"
+
+check "nf_conntrack" "netfilter" "required" \
+    "connection tracking (CONFIG_NF_CONNTRACK)" \
+    "check_module nf_conntrack"
+
+check "nf_nat" "netfilter" "required" \
+    "NAT support (CONFIG_NF_NAT)" \
+    "check_module nf_nat"
+
+check "iptables_filter" "netfilter" "required" \
+    "iptables filter (CONFIG_IP_NF_FILTER)" \
+    "check_module ip_tables || iptables -L -n >/dev/null 2>&1"
+
+check "iptables_nat" "netfilter" "required" \
+    "iptables NAT (CONFIG_IP_NF_NAT)" \
+    "check_module iptable_nat || iptables -t nat -L -n >/dev/null 2>&1"
+
+check "iptables_mangle" "netfilter" "optional" \
+    "iptables mangle (CONFIG_IP_NF_MANGLE)" \
+    "check_module iptable_mangle || iptables -t mangle -L -n >/dev/null 2>&1"
+
+check "nf_conntrack_netlink" "netfilter" "optional" \
+    "conntrack netlink (CONFIG_NF_CT_NETLINK)" \
+    "check_module nf_conntrack_netlink"
+
+check "nftables" "netfilter" "optional" \
+    "nftables (CONFIG_NF_TABLES)" \
+    "check_module nf_tables || nft list ruleset >/dev/null 2>&1"
+
+# --- IP Forwarding / Routing ---
+if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[IP Forwarding]"; fi
+
+check "ip_forward" "routing" "required" \
+    "IP forwarding (sysctl)" \
+    "test -f /proc/sys/net/ipv4/ip_forward"
+
+check "ip_route" "routing" "required" \
+    "IP routing" \
+    "ip route show >/dev/null 2>&1"
+
+# --- CNI Plugin Dependencies ---
+if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[CNI Plugins]"; fi
+
+check "cni_bridge_bin" "cni" "required" \
+    "bridge CNI plugin binary" \
+    "test -x /opt/cni/bin/bridge || find /var/lib/rancher/k3s/data -name bridge -type f 2>/dev/null | head -1 | grep -q ."
+
+check "cni_host_local_bin" "cni" "required" \
+    "host-local IPAM plugin binary" \
+    "test -x /opt/cni/bin/host-local || find /var/lib/rancher/k3s/data -name host-local -type f 2>/dev/null | head -1 | grep -q ."
+
+check "cni_loopback_bin" "cni" "required" \
+    "loopback CNI plugin binary" \
+    "test -x /opt/cni/bin/loopback || find /var/lib/rancher/k3s/data -name loopback -type f 2>/dev/null | head -1 | grep -q ."
+
+check "cni_portmap_bin" "cni" "optional" \
+    "portmap CNI plugin binary (needs iptables)" \
+    "test -x /opt/cni/bin/portmap || find /var/lib/rancher/k3s/data -name portmap -type f 2>/dev/null | head -1 | grep -q ."
+
+# --- Userspace Tools ---
+if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Userspace Tools]"; fi
+
+check "iptables_bin" "userspace" "required" \
+    "iptables binary" \
+    "command -v iptables"
+
+check "conntrack_bin" "userspace" "optional" \
+    "conntrack binary" \
+    "command -v conntrack"
+
+check "ip_bin" "userspace" "required" \
+    "iproute2 (ip command)" \
+    "command -v ip"
+
+# ── Summary ────────────────────────────────────────────────────────────
+
+if [ "$JSON_OUTPUT" = true ]; then
+    echo "{"
+    echo "  \"kernel\": \"$(uname -r)\","
+    echo "  \"timestamp\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\","
+    echo "  \"pass\": $PASS,"
+    echo "  \"fail\": $FAIL,"
+    echo "  \"warn\": $WARN,"
+    echo "  \"results\": ["
+    local_first=true
+    for r in "${RESULTS[@]}"; do
+        if [ "$local_first" = true ]; then
+            local_first=false
+        else
+            echo ","
+        fi
+        printf "    %s" "$r"
+    done
+    echo ""
+    echo "  ]"
+    echo "}"
+else
+    echo ""
+    echo "─────────────────────────────────────────"
+    printf "Results: %d passed, %d failed, %d warnings\n" "$PASS" "$FAIL" "$WARN"
+
+    if [ "$FAIL" -gt 0 ]; then
+        echo ""
+        echo "FAIL: $FAIL required capabilities missing."
+        echo "The VM kernel needs to be rebuilt with the missing features."
+        echo "See: crates/openshell-vm/runtime/kernel/README.md"
+        exit 1
+    else
+        echo ""
+        echo "PASS: All required capabilities present."
+        exit 0
+    fi
+fi
diff --git a/crates/openshell-vm/scripts/openshell-vm-exec-agent.py b/crates/openshell-vm/scripts/openshell-vm-exec-agent.py
new file mode 100644
index 000000000..d7ffd81df
--- /dev/null
+++ b/crates/openshell-vm/scripts/openshell-vm-exec-agent.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import base64
+import json
+import os
+import socket
+import subprocess
+import sys
+import threading
+
+
+PORT = 10777
+
+
+def recv_line(sock_file):
+    line = sock_file.readline()
+    if not line:
+        return None
+    return json.loads(line.decode("utf-8"))
+
+
+def send_frame(sock_file, lock, frame):
+    data = (json.dumps(frame, separators=(",", ":")) + "\n").encode("utf-8")
+    with lock:
+        sock_file.write(data)
+        sock_file.flush()
+
+
+def validate_env(env_items):
+    env = {}
+    for item in env_items:
+        if "=" not in item:
+            raise ValueError(f"invalid env item: {item}")
+        key, value = item.split("=", 1)
+        if not key or not (key[0] == "_" or key[0].isalpha()):
+            raise ValueError(f"invalid env key: {key}")
+        if not all(ch == "_" or ch.isalnum() for ch in key):
+            raise ValueError(f"invalid env key: {key}")
+        env[key] = value
+    return env
+
+
+def stream_reader(pipe, frame_type, sock_file, lock):
+    try:
+        while True:
+            chunk = pipe.read(8192)
+            if not chunk:
+                break
+            send_frame(
+                sock_file,
+                lock,
+                {"type": frame_type, "data": base64.b64encode(chunk).decode("ascii")},
+            )
+    finally:
+        pipe.close()
+
+
+def stdin_writer(proc, sock_file, sock, lock):
+    """Forward stdin frames from the client to the subprocess.
+
+    When the client sends ``stdin_close`` (or the connection drops), we
+    close the subprocess's stdin pipe so it sees EOF.  We must NOT
+    terminate the subprocess or shut down the socket here — the main
+    thread needs the process to finish naturally and the stdout/stderr
+    reader threads still need to flush their data back to the client.
+    """
+    try:
+        while True:
+            frame = recv_line(sock_file)
+            if frame is None:
+                break
+            kind = frame.get("type")
+            if kind == "stdin":
+                payload = base64.b64decode(frame.get("data", ""))
+                if proc.stdin is not None:
+                    proc.stdin.write(payload)
+                    proc.stdin.flush()
+            elif kind == "stdin_close":
+                break
+            else:
+                send_frame(
+                    sock_file,
+                    lock,
+                    {"type": "error", "message": f"unknown frame type: {kind}"},
+                )
+                break
+    except BrokenPipeError:
+        pass
+    finally:
+        try:
+            if proc.stdin is not None:
+                proc.stdin.close()
+        except OSError:
+            pass
+
+
+def handle_client(conn):
+    sock_file = conn.makefile("rwb", buffering=0)
+    lock = threading.Lock()
+    try:
+        request = recv_line(sock_file)
+        if request is None:
+            return
+
+        argv = request.get("argv") or ["sh"]
+        cwd = request.get("cwd")
+        env = os.environ.copy()
+        env.update(validate_env(request.get("env") or []))
+
+        proc = subprocess.Popen(
+            argv,
+            cwd=cwd or "/",
+            env=env,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+        stdout_thread = threading.Thread(
+            target=stream_reader,
+            args=(proc.stdout, "stdout", sock_file, lock),
+            daemon=True,
+        )
+        stderr_thread = threading.Thread(
+            target=stream_reader,
+            args=(proc.stderr, "stderr", sock_file, lock),
+            daemon=True,
+        )
+        stdin_thread = threading.Thread(
+            target=stdin_writer, args=(proc, sock_file, conn, lock), daemon=True
+        )
+
+        stdout_thread.start()
+        stderr_thread.start()
+        stdin_thread.start()
+
+        code = proc.wait()
+        stdout_thread.join()
+        stderr_thread.join()
+        send_frame(sock_file, lock, {"type": "exit", "code": code})
+    except Exception as exc:
+        try:
+            send_frame(sock_file, lock, {"type": "error", "message": str(exc)})
+        except Exception:
+            pass
+    finally:
+        try:
+            sock_file.close()
+        except Exception:
+            pass
+        conn.close()
+
+
+def main():
+    if not hasattr(socket, "AF_VSOCK"):
+        print("AF_VSOCK is not available", file=sys.stderr)
+        return 1
+
+    server = socket.socket(socket.AF_VSOCK, socket.SOCK_STREAM)
+    server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    server.bind((socket.VMADDR_CID_ANY, PORT))
+    server.listen(16)
+
+    while True:
+        conn, _addr = server.accept()
+        thread = threading.Thread(target=handle_client, args=(conn,), daemon=True)
+        thread.start()
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/crates/openshell-vm/scripts/openshell-vm-init.sh b/crates/openshell-vm/scripts/openshell-vm-init.sh
new file mode 100755
index 000000000..1cb686a31
--- /dev/null
+++ b/crates/openshell-vm/scripts/openshell-vm-init.sh
@@ -0,0 +1,833 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Init script for the openshell-vm microVM. Runs as PID 1 inside the libkrun VM.
+#
+# Mounts essential virtual filesystems, configures networking, then execs
+# k3s server. If the rootfs was pre-initialized by build-rootfs.sh (sentinel
+# at /opt/openshell/.initialized), the full manifest setup is skipped and
+# k3s resumes from its persisted state (~3-5s startup).
+
+set -euo pipefail
+
+BOOT_START=$(date +%s%3N 2>/dev/null || date +%s)
+
+ts() {
+    local now
+    now=$(date +%s%3N 2>/dev/null || date +%s)
+    local elapsed=$(( (now - BOOT_START) ))
+    printf "[%d.%03ds] %s\n" $((elapsed / 1000)) $((elapsed % 1000)) "$*"
+}
+
+PRE_INITIALIZED=false
+if [ -f /opt/openshell/.initialized ]; then
+    PRE_INITIALIZED=true
+    ts "pre-initialized rootfs detected (fast path)"
+fi
+
+# ── Mount essential filesystems (parallel) ──────────────────────────────
+# These are independent; mount them concurrently.
+
+mount -t proc     proc     /proc     2>/dev/null &
+mount -t sysfs    sysfs    /sys      2>/dev/null &
+mount -t tmpfs    tmpfs    /tmp      2>/dev/null &
+mount -t tmpfs    tmpfs    /run      2>/dev/null &
+mount -t devtmpfs devtmpfs /dev      2>/dev/null &
+wait
+
+# These depend on /dev being mounted.
+mkdir -p /dev/pts /dev/shm
+mount -t devpts   devpts   /dev/pts  2>/dev/null &
+mount -t tmpfs    tmpfs    /dev/shm  2>/dev/null &
+
+# cgroup2 (unified hierarchy) — required by k3s/containerd.
+mkdir -p /sys/fs/cgroup
+mount -t cgroup2 cgroup2 /sys/fs/cgroup 2>/dev/null &
+wait
+
+ts "filesystems mounted"
+
+# ── Networking ──────────────────────────────────────────────────────────
+
+# Non-critical: hostname is cosmetic.
+hostname openshell-vm 2>/dev/null || true
+
+# Ensure loopback is up (k3s binds to 127.0.0.1).
+ip link set lo up 2>/dev/null || true
+
+# Detect whether we have a real network interface (gvproxy) or need a
+# dummy interface (TSI / no networking).
+if ip link show eth0 >/dev/null 2>&1; then
+    # gvproxy networking — bring up eth0 and get an IP via DHCP.
+    # gvproxy has a built-in DHCP server that assigns 192.168.127.2/24
+    # with gateway 192.168.127.1 and configures ARP properly.
+    ts "detected eth0 (gvproxy networking)"
+    ip link set eth0 up 2>/dev/null || true
+
+    # Use DHCP to get IP and configure routes. gvproxy's DHCP server
+    # handles ARP resolution which static config does not.
+    if command -v udhcpc >/dev/null 2>&1; then
+        # udhcpc needs a script to apply the lease. Use the busybox
+        # default script if available, otherwise write a minimal one.
+        UDHCPC_SCRIPT="/usr/share/udhcpc/default.script"
+        if [ ! -f "$UDHCPC_SCRIPT" ]; then
+            mkdir -p /usr/share/udhcpc
+            cat > "$UDHCPC_SCRIPT" << 'DHCP_SCRIPT'
+#!/bin/sh
+case "$1" in
+    bound|renew)
+        ip addr flush dev "$interface"
+        ip addr add "$ip/$mask" dev "$interface"
+        if [ -n "$router" ]; then
+            ip route add default via $router dev "$interface"
+        fi
+        if [ -n "$dns" ]; then
+            echo -n > /etc/resolv.conf
+            for d in $dns; do
+                echo "nameserver $d" >> /etc/resolv.conf
+            done
+        fi
+        ;;
+esac
+DHCP_SCRIPT
+            chmod +x "$UDHCPC_SCRIPT"
+        fi
+        # -f: stay in foreground, -q: quit after obtaining lease,
+        # -n: exit if no lease, -T 1: 1s between retries, -t 3: 3 retries
+        # -A 1: wait 1s before first retry (aggressive for local gvproxy)
+        if ! udhcpc -i eth0 -f -q -n -T 1 -t 3 -A 1 -s "$UDHCPC_SCRIPT" 2>&1; then
+            ts "WARNING: DHCP failed, falling back to static config"
+            ip addr add 192.168.127.2/24 dev eth0 2>/dev/null || true
+            ip route add default via 192.168.127.1 2>/dev/null || true
+        fi
+    else
+        # Fallback to static config if no DHCP client available.
+        ts "no DHCP client, using static config"
+        ip addr add 192.168.127.2/24 dev eth0 2>/dev/null || true
+        ip route add default via 192.168.127.1 2>/dev/null || true
+    fi
+
+    # Ensure DNS is configured. DHCP should have set /etc/resolv.conf,
+    # but if it didn't (or static fallback was used), provide a default.
+    if [ ! -s /etc/resolv.conf ]; then
+        echo "nameserver 8.8.8.8" > /etc/resolv.conf
+        echo "nameserver 8.8.4.4" >> /etc/resolv.conf
+    fi
+
+    # Read back the IP we got (from DHCP or static).
+    NODE_IP=$(ip -4 addr show eth0 2>/dev/null | awk '/inet / {split($2,a,"/"); print a[1]; exit}')
+    NODE_IP="${NODE_IP:-192.168.127.2}"
+    ts "eth0 IP: $NODE_IP"
+else
+    # TSI or no networking — create a dummy interface for k3s.
+    ts "no eth0 found, using dummy interface (TSI mode)"
+    ip link add dummy0 type dummy  2>/dev/null || true
+    ip addr add 10.0.2.15/24 dev dummy0  2>/dev/null || true
+    ip link set dummy0 up  2>/dev/null || true
+    ip route add default dev dummy0  2>/dev/null || true
+
+    NODE_IP="10.0.2.15"
+fi
+
+# ── k3s data directories ───────────────────────────────────────────────
+
+mkdir -p /var/lib/rancher/k3s
+mkdir -p /etc/rancher/k3s
+
+ROOTFS_CONTAINERD_DIR="/var/lib/rancher/k3s/agent/containerd"
+CONTAINERD_DIR="$ROOTFS_CONTAINERD_DIR"
+
+# ── State disk: mount ALL mutable runtime state on the block device ────
+#
+# The virtio-fs share is the immutable OS image (read-only at runtime).
+# All state that changes after first boot lives on an ext4 virtio-blk
+# disk (/dev/vda). This gives full filesystem semantics (chown, hard
+# links, fsync) and keeps every writable path off the host filesystem.
+#
+# Directories on the state disk:
+#   containerd/          → k3s/agent/containerd  (overlayfs snapshotter)
+#   k3s-agent/           → k3s/agent             (kubelet certs, kubeconfigs)
+#   k3s-server-db/       → k3s/server/db         (kine SQLite)
+#   k3s-server-tls/      → k3s/server/tls        (cluster TLS certs)
+#   k3s-server-cred/     → k3s/server/cred       (bootstrap credentials)
+#   k3s-server-etc/      → k3s/server/etc        (k3s-generated config)
+#   local-path-storage/  → k3s/storage           (PVC data)
+#   pki/                 → opt/openshell/pki     (mTLS CA + server/client certs)
+#
+# Directories that stay on virtio-fs (read-only seeds from build-rootfs.sh):
+#   k3s/server/manifests   (k3s auto-deploy manifests, written by init script)
+#   k3s/server/static      (k3s bundled charts)
+#   k3s/agent/images       (airgap image tarballs, seeded once then on disk)
+
+STATE_DISK_DEVICE="${OPENSHELL_VM_STATE_DISK_DEVICE:-/dev/vda}"
+STATE_MOUNT_DIR="/mnt/openshell-state"
+STATE_DISK_ACTIVE=false
+mkdir -p "$STATE_MOUNT_DIR"
+
+if [ -b "$STATE_DISK_DEVICE" ]; then
+    ts "configuring block-backed runtime state on ${STATE_DISK_DEVICE}"
+    if ! blkid "$STATE_DISK_DEVICE" >/dev/null 2>&1; then
+        mkfs.ext4 -F -L openshell-state "$STATE_DISK_DEVICE" >/dev/null 2>&1
+        ts "formatted state disk"
+    fi
+    mount -t ext4 -o noatime "$STATE_DISK_DEVICE" "$STATE_MOUNT_DIR"
+
+    # ── k3s agent: seed images once, then bind entire agent dir ──────────
+    # agent/images contains airgap image tarballs baked into the rootfs.
+    # Seed them to the block device on first use so containerd can import
+    # them; after that they live on the block device alongside everything else.
+    STATE_K3S_AGENT_DIR="${STATE_MOUNT_DIR}/k3s-agent"
+    mkdir -p "$STATE_K3S_AGENT_DIR"
+    if [ ! -f "${STATE_MOUNT_DIR}/.seeded-agent-images" ]; then
+        VIRTIOFS_AGENT_IMAGES="/var/lib/rancher/k3s/agent/images"
+        if [ -d "$VIRTIOFS_AGENT_IMAGES" ] && [ -n "$(ls -A "$VIRTIOFS_AGENT_IMAGES" 2>/dev/null)" ]; then
+            ts "seeding agent images to block device"
+            mkdir -p "${STATE_K3S_AGENT_DIR}/images"
+            tar -C "$VIRTIOFS_AGENT_IMAGES" -cf - . | tar -C "${STATE_K3S_AGENT_DIR}/images" -xf -
+        fi
+        date -u +%Y-%m-%dT%H:%M:%SZ > "${STATE_MOUNT_DIR}/.seeded-agent-images"
+    fi
+    mkdir -p /var/lib/rancher/k3s/agent
+    mount --bind "$STATE_K3S_AGENT_DIR" /var/lib/rancher/k3s/agent
+
+    # ── containerd: bind on top of agent ─────────────────────────────────
+    # Seeded from the virtiofs rootfs on first use (overlayfs snapshots,
+    # content store, meta.db pre-populated by build-rootfs.sh).
+    STATE_CONTAINERD_DIR="${STATE_MOUNT_DIR}/containerd"
+    mkdir -p "$STATE_CONTAINERD_DIR"
+    if [ ! -f "${STATE_MOUNT_DIR}/.seeded-containerd" ]; then
+        if [ -d "$ROOTFS_CONTAINERD_DIR" ] && [ -n "$(ls -A "$ROOTFS_CONTAINERD_DIR" 2>/dev/null)" ]; then
+            ts "seeding containerd state to block device"
+            tar -C "$ROOTFS_CONTAINERD_DIR" -cf - . | tar -C "$STATE_CONTAINERD_DIR" -xf -
+        else
+            ts "containerd state is empty; starting fresh"
+        fi
+        date -u +%Y-%m-%dT%H:%M:%SZ > "${STATE_MOUNT_DIR}/.seeded-containerd"
+    fi
+    mkdir -p "$ROOTFS_CONTAINERD_DIR"
+    mount --bind "$STATE_CONTAINERD_DIR" "$ROOTFS_CONTAINERD_DIR"
+
+    # ── k3s server runtime state ──────────────────────────────────────────
+    # server/manifests and server/static stay on virtiofs (written by init
+    # script each boot from /opt/openshell/manifests; read-only after that).
+    for pair in \
+        "k3s-server-db:/var/lib/rancher/k3s/server/db" \
+        "k3s-server-tls:/var/lib/rancher/k3s/server/tls" \
+        "k3s-server-cred:/var/lib/rancher/k3s/server/cred" \
+        "k3s-server-etc:/var/lib/rancher/k3s/server/etc"
+    do
+        src="${STATE_MOUNT_DIR}/${pair%%:*}"
+        dst="${pair#*:}"
+        mkdir -p "$src" "$dst"
+        mount --bind "$src" "$dst"
+    done
+
+    # ── local-path PVC storage ─────────────────────────────────────────────
+    mkdir -p "${STATE_MOUNT_DIR}/local-path-storage" /var/lib/rancher/k3s/storage
+    mount --bind "${STATE_MOUNT_DIR}/local-path-storage" /var/lib/rancher/k3s/storage
+
+    # ── PKI ────────────────────────────────────────────────────────────────
+    # Certs live on the block device; the host reads them via the exec
+    # agent (vsock port 10777) instead of polling the virtiofs rootfs path.
+    mkdir -p "${STATE_MOUNT_DIR}/pki" /opt/openshell/pki
+    mount --bind "${STATE_MOUNT_DIR}/pki" /opt/openshell/pki
+
+    STATE_DISK_ACTIVE=true
+    ts "all runtime state mounted from block device"
+else
+    ts "no block device found; using virtiofs-backed runtime state"
+fi
+
+# Clean stale sockets from previous boots. Sockets live in /run (tmpfs)
+# and /var/lib/rancher/k3s — they're stale on every boot regardless of
+# whether state is on virtiofs or the block device.
+find /var/lib/rancher/k3s -name '*.sock' -delete 2>/dev/null || true
+find /run -name '*.sock' -delete 2>/dev/null || true
+# On the block-device path, node-passwd is regenerated by k3s on each
+# start; clear it so k3s doesn't fail node re-registration validation.
+rm -f /var/lib/rancher/k3s/server/cred/node-passwd 2>/dev/null || true
+
+# Clean stale containerd runtime state from previous boots.
+#
+# The rootfs persists across VM restarts via virtio-fs. The overlayfs
+# snapshotter now lives on the host-backed state disk when present, so
+# snapshot data and meta.db persist across boots. We only clean runtime
+# state (shim PIDs, sockets) that becomes stale when the VM restarts.
+if [ -d "$CONTAINERD_DIR" ]; then
+    # Remove runtime task state (stale shim PIDs, sockets from dead processes).
+    rm -rf "${CONTAINERD_DIR}/io.containerd.runtime.v2.task" 2>/dev/null || true
+    # Remove sandbox controller shim state. Stale sandbox records cause
+    # containerd to reuse network namespaces from previous boots, which
+    # already have routes configured. The CNI bridge plugin then fails
+    # with "file exists" when adding the default route on retry.
+    rm -rf "${CONTAINERD_DIR}/io.containerd.sandbox.controller.v1.shim" 2>/dev/null || true
+    # Clean stale ingest temp files from the content store.
+    rm -rf "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest" 2>/dev/null || true
+    mkdir -p "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest"
+    # meta.db and overlayfs snapshots persist across boots on virtio-fs.
+    # No need to delete meta.db — snapshot metadata remains valid since
+    # the snapshotter directory is no longer backed by volatile tmpfs.
+    ts "cleaned containerd runtime state (meta.db + snapshots preserved)"
+fi
+rm -rf /run/k3s 2>/dev/null || true
+
+# Ensure the overlayfs snapshotter directory exists. The snapshotter
+# runs directly on virtio-fs, so layer data and snapshot metadata
+# persist across VM restarts. This eliminates the need to re-import
+# image tarballs and re-extract layers on every boot, significantly
+# reducing sandbox creation time.
+OVERLAYFS_DIR="${CONTAINERD_DIR}/io.containerd.snapshotter.v1.overlayfs"
+mkdir -p "$OVERLAYFS_DIR"
+if [ "$STATE_DISK_ACTIVE" = true ]; then
+    ts "overlayfs snapshotter on block-backed containerd state"
+else
+    ts "overlayfs snapshotter on virtio-fs (persistent)"
+fi
+
+ts "stale artifacts cleaned"
+
+# ── Clean stale CNI / pod networking state ──────────────────────────────
+# The rootfs persists across VM restarts via virtio-fs. Previous pod
+# sandboxes leave behind veth pairs, bridge routes, host-local IPAM
+# allocations, and network namespaces. If not cleaned, the bridge CNI
+# plugin fails with:
+#   "failed to add route ... file exists"
+# because the default route via cni0 already exists from the prior boot,
+# or a stale network namespace already has the route configured.
+
+# Tear down the CNI bridge and its associated routes.
+if ip link show cni0 >/dev/null 2>&1; then
+    ip link set cni0 down 2>/dev/null || true
+    ip link delete cni0 2>/dev/null || true
+    ts "deleted stale cni0 bridge"
+fi
+
+# Remove any leftover veth pairs (CNI bridge plugin creates vethXXXX).
+veths=$(ip -o link show type veth 2>/dev/null | awk -F': ' '{print $2}' | cut -d'@' -f1 || true)
+for veth in $veths; do
+    ip link delete "$veth" 2>/dev/null || true
+done
+
+# Flush host-local IPAM allocations so IPs can be reassigned cleanly.
+rm -rf /var/lib/cni/networks 2>/dev/null || true
+rm -rf /var/lib/cni/results 2>/dev/null || true
+
+# Flush any stale CNI-added routes for the pod CIDR. These can conflict
+# with routes the bridge plugin tries to add on the next boot.
+ip route flush 10.42.0.0/24 2>/dev/null || true
+
+# Clean up stale pod network namespaces from previous boots. Containerd
+# creates named netns under /var/run/netns/ for each pod sandbox. If
+# these persist across VM restarts, the CNI bridge plugin fails when
+# adding routes because the stale netns already has the default route
+# configured from the prior boot. Removing all named network namespaces
+# forces containerd to create fresh ones.
+if [ -d /var/run/netns ]; then
+    netns_list=$(ip netns list 2>/dev/null | awk '{print $1}' || true)
+    for ns in $netns_list; do
+        ip netns delete "$ns" 2>/dev/null || true
+    done
+fi
+# Also clean the netns bind-mount directory used by containerd/CRI.
+# Containerd may use /run/netns/ or /var/run/netns/ (same via tmpfs).
+rm -rf /run/netns/* 2>/dev/null || true
+rm -rf /var/run/netns/* 2>/dev/null || true
+
+ts "stale CNI networking state cleaned"
+
+# ── Network profile detection ───────────────────────────────────────────
+# Detect early so manifest patching and k3s flags both use the same value.
+#
+# "bridge" is the only supported profile. It requires a custom libkrunfw
+# with CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT built in. If the
+# kernel lacks these capabilities the VM cannot run pod networking and we
+# fail fast with an actionable error.
+
+NET_PROFILE="bridge"
+
+ts "network profile: ${NET_PROFILE}"
+
+# Validate that the kernel actually has the required capabilities.
+_caps_ok=true
+if ! ip link add _cap_br0 type bridge 2>/dev/null; then
+    echo "ERROR: kernel lacks bridge support (CONFIG_BRIDGE). Use a custom libkrunfw." >&2
+    _caps_ok=false
+else
+    ip link del _cap_br0 2>/dev/null || true
+fi
+if [ ! -d /proc/sys/net/netfilter ] && [ ! -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+    echo "ERROR: kernel lacks netfilter support (CONFIG_NETFILTER). Use a custom libkrunfw." >&2
+    _caps_ok=false
+fi
+if [ "$_caps_ok" = false ]; then
+    echo "FATAL: required kernel capabilities missing — cannot configure pod networking." >&2
+    echo "See: architecture/custom-vm-runtime.md for build instructions." >&2
+    exit 1
+fi
+
+# ── Deploy bundled manifests (cold boot only) ───────────────────────────
+# On pre-initialized rootfs, manifests are already in place from the
+# build-time k3s boot. Skip this entirely for fast startup.
+
+K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests"
+BUNDLED_MANIFESTS="/opt/openshell/manifests"
+
+if [ "$PRE_INITIALIZED" = false ]; then
+
+    mkdir -p "$K3S_MANIFESTS"
+
+    if [ -d "$BUNDLED_MANIFESTS" ]; then
+        ts "deploying bundled manifests (cold boot)..."
+        for manifest in "$BUNDLED_MANIFESTS"/*.yaml; do
+            [ ! -f "$manifest" ] && continue
+            cp "$manifest" "$K3S_MANIFESTS/"
+        done
+
+        # Remove stale OpenShell-managed manifests from previous boots.
+        for existing in "$K3S_MANIFESTS"/openshell-*.yaml \
+                        "$K3S_MANIFESTS"/agent-*.yaml; do
+            [ ! -f "$existing" ] && continue
+            basename=$(basename "$existing")
+            if [ ! -f "$BUNDLED_MANIFESTS/$basename" ]; then
+                rm -f "$existing"
+            fi
+        done
+    fi
+
+    # Restore helm chart tarballs from staging. A --reset wipes
+    # server/static/charts/ but the bundled charts survive in
+    # /opt/openshell/charts/.
+    BUNDLED_CHARTS="/opt/openshell/charts"
+    K3S_CHARTS="/var/lib/rancher/k3s/server/static/charts"
+    if [ -d "$BUNDLED_CHARTS" ]; then
+        mkdir -p "$K3S_CHARTS"
+        cp "$BUNDLED_CHARTS"/*.tgz "$K3S_CHARTS/" 2>/dev/null || true
+        ts "helm charts restored from staging"
+    fi
+
+    ts "manifests deployed"
+else
+    ts "skipping manifest deploy (pre-initialized)"
+fi
+
+# Patch manifests for VM deployment constraints.
+HELMCHART="$K3S_MANIFESTS/openshell-helmchart.yaml"
+if [ -f "$HELMCHART" ]; then
+    # Use pre-loaded images and a tmp-backed database in the VM.
+    sed -i 's|__IMAGE_PULL_POLICY__|IfNotPresent|g' "$HELMCHART"
+    sed -i 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g' "$HELMCHART"
+    sed -i 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART"
+    # Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM).
+    sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART"
+    sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART"
+    sed -i 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART"
+    sed -i 's|__DISABLE_TLS__|false|g' "$HELMCHART"
+    sed -i 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART"
+    sed -i '/__CHART_CHECKSUM__/d' "$HELMCHART"
+fi
+
+AGENT_MANIFEST="$K3S_MANIFESTS/agent-sandbox.yaml"
+if [ -f "$AGENT_MANIFEST" ]; then
+    # Bridge CNI: agent-sandbox uses normal pod networking.
+    # kube-proxy is enabled so kubernetes.default.svc is reachable
+    # via ClusterIP — no need for KUBERNETES_SERVICE_HOST override.
+    sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST"
+    sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST"
+    ts "agent-sandbox: using pod networking (bridge profile)"
+fi
+
+# ── CNI configuration (bridge) ──────────────────────────────────────────
+# Uses the bridge CNI plugin with iptables masquerade. Requires
+# CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT in the VM kernel
+# (validated above at boot). kube-proxy uses nftables mode for service
+# VIP routing.
+
+CNI_CONF_DIR="/etc/cni/net.d"
+CNI_BIN_DIR="/opt/cni/bin"
+mkdir -p "$CNI_CONF_DIR" "$CNI_BIN_DIR"
+
+# Enable IP forwarding (required for masquerade).
+if ! echo 1 > /proc/sys/net/ipv4/ip_forward 2>/dev/null; then
+    echo "FATAL: failed to enable IP forwarding — pod networking will not work" >&2
+    exit 1
+fi
+
+# Enable bridge netfilter call (required for CNI bridge masquerade to
+# see bridged traffic).
+if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+    if ! echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null; then
+        ts "WARNING: failed to enable bridge-nf-call-iptables — CNI masquerade may not work"
+    fi
+fi
+
+cat > "$CNI_CONF_DIR/10-bridge.conflist" << 'CNICFG'
+{
+  "cniVersion": "1.0.0",
+  "name": "bridge",
+  "plugins": [
+    {
+      "type": "bridge",
+      "bridge": "cni0",
+      "isGateway": true,
+      "isDefaultGateway": true,
+      "ipMasq": true,
+      "hairpinMode": true,
+      "ipam": {
+        "type": "host-local",
+        "ranges": [[{ "subnet": "10.42.0.0/24" }]]
+      }
+    },
+    {
+      "type": "portmap",
+      "capabilities": { "portMappings": true },
+      "snat": true
+    },
+    {
+      "type": "loopback"
+    }
+  ]
+}
+CNICFG
+
+# Remove any stale legacy ptp config.
+rm -f "$CNI_CONF_DIR/10-ptp.conflist" 2>/dev/null || true
+
+ts "bridge CNI configured (cni0 + iptables masquerade)"
+
+# Start the local exec agent before k3s so `openshell-vm exec` works as soon as
+# the VM has booted. It only listens on vsock, not on the guest network.
+if command -v python3 >/dev/null 2>&1; then
+    ts "starting openshell-vm exec agent"
+    mkdir -p /run/openshell
+    setsid python3 /srv/openshell-vm-exec-agent.py >/run/openshell/openshell-vm-exec-agent.log 2>&1 &
+else
+    ts "WARNING: python3 missing, openshell-vm exec agent disabled"
+fi
+
+# Symlink k3s-bundled CNI binaries to the default containerd bin path.
+# k3s extracts its tools to /var/lib/rancher/k3s/data/<hash>/bin/ at startup.
+# On cold boot this directory doesn't exist yet (k3s hasn't run), so we
+# first try synchronously, then fall back to a background watcher that
+# polls until k3s extracts the binaries and creates the symlinks before
+# any pods can schedule.
+link_cni_binaries() {
+    local data_bin="$1"
+    # Ensure execute permissions on all binaries. The rootfs may have
+    # been built on macOS where virtio-fs or docker export can strip
+    # execute bits from Linux ELF binaries.
+    chmod +x "$data_bin"/* 2>/dev/null || true
+    if [ -d "$data_bin/aux" ]; then
+        chmod +x "$data_bin/aux"/* 2>/dev/null || true
+    fi
+    for plugin in bridge host-local loopback bandwidth portmap; do
+        [ -e "$data_bin/$plugin" ] && ln -sf "$data_bin/$plugin" "$CNI_BIN_DIR/$plugin"
+    done
+}
+
+# Find the k3s data bin dir, excluding temporary extraction directories
+# (k3s extracts to <hash>-tmp/ then renames to <hash>/).
+find_k3s_data_bin() {
+    find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null \
+        | grep -v '\-tmp/' | head -1 || true
+}
+
+K3S_DATA_BIN=$(find_k3s_data_bin)
+if [ -n "$K3S_DATA_BIN" ]; then
+    link_cni_binaries "$K3S_DATA_BIN"
+    ts "CNI binaries linked from $K3S_DATA_BIN"
+else
+    # Cold boot: k3s hasn't extracted binaries yet. Launch a background
+    # watcher that polls until the data dir appears (k3s creates it in
+    # the first ~2s of startup) and then symlinks the CNI plugins.
+    # We exclude -tmp directories to avoid symlinking to the transient
+    # extraction path that k3s renames once extraction completes.
+    ts "CNI binaries not yet available, starting background watcher"
+    setsid sh -c '
+        CNI_BIN_DIR="/opt/cni/bin"
+        for i in $(seq 1 60); do
+            K3S_DATA_BIN=$(find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null \
+                | grep -v "\-tmp/" | head -1)
+            if [ -n "$K3S_DATA_BIN" ]; then
+                chmod +x "$K3S_DATA_BIN"/* 2>/dev/null || true
+                if [ -d "$K3S_DATA_BIN/aux" ]; then
+                    chmod +x "$K3S_DATA_BIN/aux"/* 2>/dev/null || true
+                fi
+                for plugin in bridge host-local loopback bandwidth portmap; do
+                    [ -e "$K3S_DATA_BIN/$plugin" ] && ln -sf "$K3S_DATA_BIN/$plugin" "$CNI_BIN_DIR/$plugin"
+                done
+                echo "[cni-watcher] CNI binaries linked from $K3S_DATA_BIN after ${i}s"
+                exit 0
+            fi
+            sleep 1
+        done
+        echo "[cni-watcher] ERROR: k3s data bin dir not found after 60s"
+    ' &
+fi
+
+# Also clean up any flannel config from the k3s-specific CNI directory
+# (pre-baked state from the Docker build used host-gw flannel).
+rm -f "/var/lib/rancher/k3s/agent/etc/cni/net.d/10-flannel.conflist" 2>/dev/null || true
+
+# ── PKI: generate once, read via exec agent ───────────────────────────
+# Certs are generated on first boot and stored at /opt/openshell/pki/.
+# With the block-device layout this path is on the state disk, fully
+# isolated from the virtiofs host filesystem.
+# The host-side bootstrap reads certs via the exec agent (vsock port
+# 10777) by running `cat` on each PEM file.
+
+PKI_DIR="/opt/openshell/pki"
+if [ ! -f "$PKI_DIR/ca.crt" ]; then
+    ts "generating PKI (first boot)..."
+    mkdir -p "$PKI_DIR"
+
+    # CA
+    openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \
+        -keyout "$PKI_DIR/ca.key" -out "$PKI_DIR/ca.crt" \
+        -days 3650 -nodes -subj "/O=openshell/CN=openshell-ca" 2>/dev/null
+
+    # Server cert with SANs
+    cat > "$PKI_DIR/server.cnf" <<EOCNF
+[req]
+req_extensions = v3_req
+distinguished_name = req_dn
+prompt = no
+
+[req_dn]
+CN = openshell-server
+
+[v3_req]
+subjectAltName = @alt_names
+
+[alt_names]
+DNS.1 = openshell
+DNS.2 = openshell.openshell.svc
+DNS.3 = openshell.openshell.svc.cluster.local
+DNS.4 = localhost
+DNS.5 = host.docker.internal
+IP.1 = 127.0.0.1
+EOCNF
+
+    openssl req -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \
+        -keyout "$PKI_DIR/server.key" -out "$PKI_DIR/server.csr" \
+        -nodes -config "$PKI_DIR/server.cnf" 2>/dev/null
+    openssl x509 -req -in "$PKI_DIR/server.csr" \
+        -CA "$PKI_DIR/ca.crt" -CAkey "$PKI_DIR/ca.key" -CAcreateserial \
+        -out "$PKI_DIR/server.crt" -days 3650 \
+        -extensions v3_req -extfile "$PKI_DIR/server.cnf" 2>/dev/null
+
+    # Client cert (must be v3 — rustls rejects v1)
+    cat > "$PKI_DIR/client.cnf" <<EOCLIENT
+[req]
+distinguished_name = req_dn
+prompt = no
+
+[req_dn]
+CN = openshell-client
+
+[v3_client]
+basicConstraints = CA:FALSE
+keyUsage = digitalSignature
+extendedKeyUsage = clientAuth
+EOCLIENT
+
+    openssl req -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \
+        -keyout "$PKI_DIR/client.key" -out "$PKI_DIR/client.csr" \
+        -nodes -config "$PKI_DIR/client.cnf" 2>/dev/null
+    openssl x509 -req -in "$PKI_DIR/client.csr" \
+        -CA "$PKI_DIR/ca.crt" -CAkey "$PKI_DIR/ca.key" -CAcreateserial \
+        -out "$PKI_DIR/client.crt" -days 3650 \
+        -extensions v3_client -extfile "$PKI_DIR/client.cnf" 2>/dev/null
+
+    # Clean up CSRs
+    rm -f "$PKI_DIR"/*.csr "$PKI_DIR"/*.cnf "$PKI_DIR"/*.srl
+
+    ts "PKI generated"
+else
+    ts "existing PKI found, skipping generation"
+fi
+
+SSH_HANDSHAKE_SECRET_FILE="${PKI_DIR}/ssh-handshake-secret"
+if [ ! -f "$SSH_HANDSHAKE_SECRET_FILE" ]; then
+    ts "generating SSH handshake secret (first boot)..."
+    head -c 32 /dev/urandom | od -A n -t x1 | tr -d ' \n' > "$SSH_HANDSHAKE_SECRET_FILE"
+    chmod 600 "$SSH_HANDSHAKE_SECRET_FILE"
+else
+    ts "existing SSH handshake secret found, reusing"
+fi
+
+# Write TLS secrets as a k3s auto-deploy manifest. k3s applies any YAML
+# in server/manifests/ on startup. We write this on every boot so that:
+#   - A --reset (which wipes the kine DB and server/ tree) gets secrets re-applied.
+#   - A corrupt kine DB (removed by the host-side corruption check) gets secrets
+#     re-applied on the fresh database.
+# This is idempotent — k3s checksums manifests and only re-applies on change.
+ts "writing TLS secrets manifest..."
+mkdir -p "$K3S_MANIFESTS"
+CA_CRT_B64=$(base64 -w0 < "$PKI_DIR/ca.crt")
+SERVER_CRT_B64=$(base64 -w0 < "$PKI_DIR/server.crt")
+SERVER_KEY_B64=$(base64 -w0 < "$PKI_DIR/server.key")
+CLIENT_CRT_B64=$(base64 -w0 < "$PKI_DIR/client.crt")
+CLIENT_KEY_B64=$(base64 -w0 < "$PKI_DIR/client.key")
+SSH_HANDSHAKE_SECRET_B64=$(base64 -w0 < "$SSH_HANDSHAKE_SECRET_FILE")
+
+cat > "$K3S_MANIFESTS/openshell-tls-secrets.yaml" <<EOTLS
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: openshell
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: openshell-server-tls
+  namespace: openshell
+type: kubernetes.io/tls
+data:
+  tls.crt: "${SERVER_CRT_B64}"
+  tls.key: "${SERVER_KEY_B64}"
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: openshell-server-client-ca
+  namespace: openshell
+type: Opaque
+data:
+  ca.crt: "${CA_CRT_B64}"
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: openshell-client-tls
+  namespace: openshell
+type: Opaque
+data:
+  tls.crt: "${CLIENT_CRT_B64}"
+  tls.key: "${CLIENT_KEY_B64}"
+  ca.crt: "${CA_CRT_B64}"
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: openshell-ssh-handshake
+  namespace: openshell
+type: Opaque
+data:
+  secret: "${SSH_HANDSHAKE_SECRET_B64}"
+EOTLS
+ts "TLS secrets manifest written"
+
+# ── Start k3s ──────────────────────────────────────────────────────────
+# Flags tuned for fast single-node startup. Bridge CNI handles pod
+# networking; kube-proxy runs in nftables mode for service VIP / ClusterIP
+# support.
+#
+# nftables mode: k3s bundles its own iptables binaries whose MARK target
+# doesn't negotiate xt_MARK revision 2 correctly with the libkrun kernel,
+# causing --xor-mark failures. nftables mode uses the kernel's nf_tables
+# subsystem directly and sidesteps the issue entirely. The kernel is
+# configured with CONFIG_NF_TABLES=y and related modules.
+
+K3S_ARGS=(
+    --disable=traefik,servicelb,metrics-server
+    --disable-network-policy
+    --write-kubeconfig-mode=644
+    --node-ip="$NODE_IP"
+    --kube-apiserver-arg=bind-address=0.0.0.0
+    --resolv-conf=/etc/resolv.conf
+    --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2
+    --flannel-backend=none
+    --snapshotter=overlayfs
+    --kube-proxy-arg=proxy-mode=nftables
+    --kube-proxy-arg=nodeport-addresses=0.0.0.0/0
+    # virtio-fs passthrough reports the host disk usage, which is
+    # misleading — kubelet sees 90%+ used and enters eviction pressure,
+    # blocking image pulls and pod scheduling. Disable all disk-based
+    # eviction since the VM shares the host filesystem. Setting
+    # thresholds to 0% effectively disables eviction for each signal.
+    "--kubelet-arg=eviction-hard=imagefs.available<0%,nodefs.available<0%"
+    "--kubelet-arg=eviction-minimum-reclaim=imagefs.available=1%,nodefs.available=1%"
+    --kubelet-arg=image-gc-high-threshold=100
+    --kubelet-arg=image-gc-low-threshold=99
+    # Increase CRI runtime timeout for large image operations. The first
+    # container create after an image import may still be slow if
+    # containerd needs to extract layers. 10m is a conservative safety
+    # margin; typical operations complete much faster with persistent
+    # overlayfs snapshots.
+    --kubelet-arg=runtime-request-timeout=10m
+)
+
+ts "starting k3s server (bridge CNI + nftables kube-proxy)"
+
+# ── DEBUG: dump nftables rules after k3s has had time to sync ───────────
+# Write diagnostic output to a file on the root filesystem (virtio-fs),
+# readable from the host at rootfs/opt/openshell/diag.txt.
+# The subshell runs detached with its own session (setsid) so it survives
+# the exec that replaces this shell with k3s as PID 1.
+# Only runs when OPENSHELL_VM_DIAG=1 is set.
+if [ "${OPENSHELL_VM_DIAG:-0}" = "1" ]; then
+DIAG_FILE="/opt/openshell/diag.txt"
+setsid sh -c '
+    sleep 60
+    DIAG="'"$DIAG_FILE"'"
+    # Find the nft binary — glob must be expanded by the shell, not quoted
+    for f in /var/lib/rancher/k3s/data/*/bin/aux/nft; do
+        [ -x "$f" ] && NFT="$f" && break
+    done
+    if [ -z "$NFT" ]; then
+        echo "ERROR: nft binary not found" > "$DIAG"
+        exit 1
+    fi
+    {
+        echo "=== [DIAG $(date +%s)] nft binary: $NFT ==="
+        echo "=== [DIAG] nft list tables ==="
+        "$NFT" list tables 2>&1
+        echo "=== [DIAG] nft list ruleset (kube-proxy) ==="
+        "$NFT" list ruleset 2>&1
+        echo "=== [DIAG] ss -tlnp ==="
+        ss -tlnp 2>&1 || busybox netstat -tlnp 2>&1 || echo "ss/netstat not available"
+        echo "=== [DIAG] ip addr ==="
+        ip addr 2>&1
+        echo "=== [DIAG] ip route ==="
+        ip route 2>&1
+        echo "=== [DIAG] iptables -t nat -L -n -v ==="
+        iptables -t nat -L -n -v 2>&1
+        echo "=== [DIAG] kube-proxy healthz ==="
+        wget -q -O - http://127.0.0.1:10256/healthz 2>&1 || echo "healthz failed"
+        echo "=== [DIAG] conntrack -L ==="
+        conntrack -L 2>&1 || echo "conntrack not available"
+        echo "=== [DIAG] done ==="
+    } > "$DIAG" 2>&1
+' &
+fi
+
+# ── Clear stale kine bootstrap lock ─────────────────────────────────────
+# k3s uses kine with a SQLite backend at state.db. When k3s starts, kine
+# sets a bootstrap lock row; if k3s is killed before completing bootstrap
+# (SIGKILL, host crash, power loss), the lock persists and the next k3s
+# instance hangs forever on:
+#   "Bootstrap key already locked — waiting for data to be populated by
+#    another server"
+#
+# We clear the lock row before starting k3s so that a warm boot with
+# persistent state.db succeeds. If state.db doesn't exist (first boot or
+# --reset), this is a harmless no-op. If state.db is corrupt, sqlite3
+# fails silently (|| true) and the host-side corruption check in exec.rs
+# will have already removed the file.
+KINE_DB="/var/lib/rancher/k3s/server/db/state.db"
+if [ -f "$KINE_DB" ]; then
+    ts "clearing stale kine bootstrap lock (if any)"
+    # If sqlite3 fails (corrupt DB, missing binary), log the failure.
+    # The host-side corruption check in exec.rs handles the corrupt case,
+    # but we should still know about it.
+    if ! sqlite3 "$KINE_DB" "DELETE FROM kine WHERE name LIKE '/bootstrap/%';" 2>/dev/null; then
+        ts "WARNING: failed to clear kine bootstrap lock — k3s may hang if DB is corrupt"
+    fi
+    if ! sqlite3 "$KINE_DB" "PRAGMA wal_checkpoint(TRUNCATE);" 2>/dev/null; then
+        ts "WARNING: failed to checkpoint kine WAL"
+    fi
+fi
+
+exec /usr/local/bin/k3s server "${K3S_ARGS[@]}"
diff --git a/crates/openshell-vm/src/embedded.rs b/crates/openshell-vm/src/embedded.rs
new file mode 100644
index 000000000..15eaf4bee
--- /dev/null
+++ b/crates/openshell-vm/src/embedded.rs
@@ -0,0 +1,442 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Embedded VM runtime resources.
+//!
+//! Native libraries (libkrun, libkrunfw, gvproxy) and the rootfs are embedded as
+//! zstd-compressed byte arrays and extracted to XDG cache directories on first use.
+//!
+//! Cache locations:
+//! - Runtime: `~/.local/share/openshell/vm-runtime/{version}/`
+//! - Rootfs:  `~/.local/share/openshell/openshell-vm/{version}/instances/<name>/rootfs/`
+
+use std::fs;
+use std::io::{Read, Write};
+use std::path::{Path, PathBuf};
+
+use indicatif::{ProgressBar, ProgressStyle};
+
+use crate::VmError;
+
+// ── Platform-specific embedded resources ───────────────────────────────────
+
+#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
+mod resources {
+    pub const LIBKRUN: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrun.dylib.zst"));
+    pub const LIBKRUNFW: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrunfw.5.dylib.zst"));
+    pub const GVPROXY: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/gvproxy.zst"));
+    pub const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst"));
+    pub const LIBKRUN_NAME: &str = "libkrun.dylib";
+    pub const LIBKRUNFW_NAME: &str = "libkrunfw.5.dylib";
+}
+
+#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
+mod resources {
+    pub const LIBKRUN: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrun.so.zst"));
+    pub const LIBKRUNFW: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrunfw.so.5.zst"));
+    pub const GVPROXY: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/gvproxy.zst"));
+    pub const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst"));
+    pub const LIBKRUN_NAME: &str = "libkrun.so";
+    pub const LIBKRUNFW_NAME: &str = "libkrunfw.so.5";
+}
+
+#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
+mod resources {
+    pub const LIBKRUN: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrun.so.zst"));
+    pub const LIBKRUNFW: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libkrunfw.so.5.zst"));
+    pub const GVPROXY: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/gvproxy.zst"));
+    pub const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst"));
+    pub const LIBKRUN_NAME: &str = "libkrun.so";
+    pub const LIBKRUNFW_NAME: &str = "libkrunfw.so.5";
+}
+
+// Fallback for unsupported platforms (will fail at runtime)
+#[cfg(not(any(
+    all(target_os = "macos", target_arch = "aarch64"),
+    all(target_os = "linux", target_arch = "aarch64"),
+    all(target_os = "linux", target_arch = "x86_64"),
+)))]
+mod resources {
+    pub const LIBKRUN: &[u8] = &[];
+    pub const LIBKRUNFW: &[u8] = &[];
+    pub const GVPROXY: &[u8] = &[];
+    pub const ROOTFS: &[u8] = &[];
+    pub const LIBKRUN_NAME: &str = "libkrun";
+    pub const LIBKRUNFW_NAME: &str = "libkrunfw";
+}
+
+const VERSION: &str = env!("CARGO_PKG_VERSION");
+
+// ── Public API ─────────────────────────────────────────────────────────────
+
+/// Ensures the embedded VM runtime is extracted to the cache directory.
+///
+/// Returns the path to the runtime directory containing:
+/// - libkrun.{dylib,so}
+/// - libkrunfw.{5.dylib,.so.5}
+/// - gvproxy
+///
+/// On first call, this extracts the compressed embedded resources to the cache.
+/// Subsequent calls return the cached path if valid.
+pub fn ensure_runtime_extracted() -> Result<PathBuf, VmError> {
+    // Check if embedded resources are available (non-empty)
+    if resources::LIBKRUN.is_empty() {
+        return Err(VmError::HostSetup(
+            "VM runtime not embedded for this platform. \
+             Supported: macOS ARM64, Linux ARM64, Linux x86_64"
+                .to_string(),
+        ));
+    }
+
+    let cache_dir = runtime_cache_dir()?;
+    let version_marker = cache_dir.join(".version");
+
+    // Cache key: version + content fingerprint (so dev builds at 0.0.0
+    // still invalidate when the embedded libraries change).
+    let cache_key = runtime_cache_key();
+
+    // Check if already extracted with the correct cache key
+    if version_marker.exists()
+        && let Ok(cached_key) = fs::read_to_string(&version_marker)
+        && cached_key.trim() == cache_key
+    {
+        // Validate files exist
+        if validate_runtime_dir(&cache_dir).is_ok() {
+            tracing::debug!(
+                path = %cache_dir.display(),
+                "Using cached VM runtime"
+            );
+            return Ok(cache_dir);
+        }
+    }
+
+    // Clean up old versions before extracting new one
+    cleanup_old_versions(&cache_dir)?;
+
+    // Create fresh directory
+    if cache_dir.exists() {
+        fs::remove_dir_all(&cache_dir)
+            .map_err(|e| VmError::HostSetup(format!("remove old cache: {e}")))?;
+    }
+    fs::create_dir_all(&cache_dir)
+        .map_err(|e| VmError::HostSetup(format!("create cache dir: {e}")))?;
+
+    tracing::info!(
+        path = %cache_dir.display(),
+        version = VERSION,
+        "Extracting embedded VM runtime"
+    );
+
+    // Extract all resources
+    extract_resource(resources::LIBKRUN, &cache_dir.join(resources::LIBKRUN_NAME))?;
+    extract_resource(
+        resources::LIBKRUNFW,
+        &cache_dir.join(resources::LIBKRUNFW_NAME),
+    )?;
+    extract_resource(resources::GVPROXY, &cache_dir.join("gvproxy"))?;
+
+    // Make gvproxy executable
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        fs::set_permissions(cache_dir.join("gvproxy"), fs::Permissions::from_mode(0o755))
+            .map_err(|e| VmError::HostSetup(format!("chmod gvproxy: {e}")))?;
+    }
+
+    // Write version marker (includes content fingerprint for cache invalidation)
+    fs::write(&version_marker, runtime_cache_key())
+        .map_err(|e| VmError::HostSetup(format!("write version marker: {e}")))?;
+
+    tracing::info!(
+        path = %cache_dir.display(),
+        "VM runtime extracted successfully"
+    );
+
+    Ok(cache_dir)
+}
+
+/// Returns the path where the runtime would be cached (without extracting).
+pub fn runtime_cache_path() -> Result<PathBuf, VmError> {
+    runtime_cache_dir()
+}
+
+/// Extract the embedded rootfs to the given destination directory.
+///
+/// If the destination already exists, it is returned as-is (no re-extraction).
+/// Otherwise the embedded `rootfs.tar.zst` is decompressed and unpacked into `dest`.
+///
+/// A `.version` marker is written after successful extraction so that
+/// version-mismatched rootfs directories are detected and rebuilt.
+pub fn extract_rootfs_to(dest: &Path) -> Result<(), VmError> {
+    if resources::ROOTFS.is_empty() {
+        return Err(VmError::HostSetup(
+            "Rootfs not embedded. Build with: mise run vm:build:embedded".to_string(),
+        ));
+    }
+
+    let version_marker = dest.join(".version");
+
+    // Already extracted with the correct version — nothing to do.
+    if version_marker.exists()
+        && let Ok(cached_version) = fs::read_to_string(&version_marker)
+        && cached_version.trim() == VERSION
+    {
+        tracing::debug!(
+            path = %dest.display(),
+            "Using cached rootfs"
+        );
+        return Ok(());
+    }
+
+    // Remove existing if present (version mismatch or incomplete extraction).
+    if dest.exists() {
+        eprintln!("Removing outdated rootfs at {}...", dest.display());
+        fs::remove_dir_all(dest)
+            .map_err(|e| VmError::HostSetup(format!("remove old rootfs: {e}")))?;
+    }
+
+    // Extract with progress bar.
+    extract_rootfs_with_progress(resources::ROOTFS, dest)?;
+
+    // Write version marker.
+    fs::write(&version_marker, VERSION)
+        .map_err(|e| VmError::HostSetup(format!("write rootfs version marker: {e}")))?;
+
+    Ok(())
+}
+
+/// Clean up rootfs directories from older versions.
+///
+/// Call this periodically (e.g. at startup) to reclaim disk from previous
+/// releases. Removes all version directories under the openshell-vm base
+/// except the current version.
+pub fn cleanup_old_rootfs() -> Result<(), VmError> {
+    let base = rootfs_cache_base()?;
+    if !base.exists() {
+        return Ok(());
+    }
+
+    let current_version_dir = base.join(VERSION);
+    cleanup_old_versions_in_base(&base, &current_version_dir)
+}
+
+/// Check if the rootfs is embedded (non-empty).
+pub fn has_embedded_rootfs() -> bool {
+    !resources::ROOTFS.is_empty()
+}
+
+// ── Internal helpers ───────────────────────────────────────────────────────
+
+/// Build a cache key that combines the version string with a short content
+/// fingerprint of the embedded runtime bytes.
+///
+/// Using the version alone is insufficient for dev builds (all `0.0.0`)
+/// because the embedded libraries can change between compiles without the
+/// version changing. The fingerprint is a simple XOR-fold of the first few
+/// bytes of each embedded resource — cheap to compute at startup without
+/// pulling in a hash dependency.
+fn runtime_cache_key() -> String {
+    // XOR-fold the first 64 bytes of each resource to get a cheap fingerprint.
+    let mut fp: u64 = 0;
+    for (i, chunk) in [resources::LIBKRUN, resources::LIBKRUNFW, resources::GVPROXY]
+        .iter()
+        .enumerate()
+    {
+        let sample = &chunk[..chunk.len().min(64)];
+        let mut word: u64 = 0;
+        for (j, &b) in sample.iter().enumerate() {
+            word ^= (b as u64) << ((j % 8) * 8);
+        }
+        // Mix in resource index so identical resources don't cancel out.
+        fp ^= word.rotate_left((i as u32) * 13 + 7);
+        // Also mix in the total length so size changes are detected.
+        fp ^= (chunk.len() as u64).rotate_left((i as u32) * 17 + 3);
+    }
+    format!("{VERSION}-{fp:016x}")
+}
+
+fn runtime_cache_dir() -> Result<PathBuf, VmError> {
+    let base = openshell_core::paths::xdg_data_dir()
+        .map_err(|e| VmError::HostSetup(format!("resolve XDG data dir: {e}")))?;
+    Ok(base.join("openshell").join("vm-runtime").join(VERSION))
+}
+
+fn runtime_cache_base() -> Result<PathBuf, VmError> {
+    let base = openshell_core::paths::xdg_data_dir()
+        .map_err(|e| VmError::HostSetup(format!("resolve XDG data dir: {e}")))?;
+    Ok(base.join("openshell").join("vm-runtime"))
+}
+
+fn rootfs_cache_base() -> Result<PathBuf, VmError> {
+    let base = openshell_core::paths::xdg_data_dir()
+        .map_err(|e| VmError::HostSetup(format!("resolve XDG data dir: {e}")))?;
+    Ok(base.join("openshell").join("openshell-vm"))
+}
+
+fn cleanup_old_versions(current_dir: &Path) -> Result<(), VmError> {
+    cleanup_old_versions_in_base(&runtime_cache_base()?, current_dir)
+}
+
+fn cleanup_old_versions_in_base(base: &Path, current_dir: &Path) -> Result<(), VmError> {
+    if !base.exists() {
+        return Ok(());
+    }
+
+    let entries = match fs::read_dir(base) {
+        Ok(e) => e,
+        Err(_) => return Ok(()), // Can't read, skip cleanup
+    };
+
+    for entry in entries.filter_map(Result::ok) {
+        let path = entry.path();
+        // Skip if this is the current version directory or a parent of it
+        if path.is_dir() && !current_dir.starts_with(&path) && path != current_dir {
+            tracing::debug!(
+                path = %path.display(),
+                "Cleaning up old version"
+            );
+            if let Err(e) = fs::remove_dir_all(&path) {
+                tracing::warn!(
+                    path = %path.display(),
+                    error = %e,
+                    "Failed to clean up old version"
+                );
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn extract_resource(compressed: &[u8], dest: &Path) -> Result<(), VmError> {
+    if compressed.is_empty() {
+        return Err(VmError::HostSetup(format!(
+            "embedded resource is empty: {}",
+            dest.display()
+        )));
+    }
+
+    let decompressed = zstd::decode_all(compressed)
+        .map_err(|e| VmError::HostSetup(format!("decompress {}: {e}", dest.display())))?;
+
+    let mut file = fs::File::create(dest)
+        .map_err(|e| VmError::HostSetup(format!("create {}: {e}", dest.display())))?;
+
+    file.write_all(&decompressed)
+        .map_err(|e| VmError::HostSetup(format!("write {}: {e}", dest.display())))?;
+
+    tracing::debug!(
+        path = %dest.display(),
+        compressed_size = compressed.len(),
+        decompressed_size = decompressed.len(),
+        "Extracted resource"
+    );
+
+    Ok(())
+}
+
+fn extract_rootfs_with_progress(compressed: &[u8], dest: &Path) -> Result<(), VmError> {
+    eprintln!("Extracting VM environment (first run)...");
+
+    // Create progress bar for decompression
+    let pb = ProgressBar::new(compressed.len() as u64);
+    pb.set_style(
+        ProgressStyle::default_bar()
+            .template("  Decompressing [{bar:40.cyan/blue}] {bytes}/{total_bytes}")
+            .unwrap()
+            .progress_chars("=>-"),
+    );
+
+    // Wrap the compressed data in a progress reader
+    let reader = ProgressReader::new(std::io::Cursor::new(compressed), pb.clone());
+
+    // Decompress zstd stream
+    let decoder = zstd::Decoder::new(reader)
+        .map_err(|e| VmError::HostSetup(format!("create zstd decoder: {e}")))?;
+
+    pb.finish_and_clear();
+
+    // Create destination directory
+    fs::create_dir_all(dest).map_err(|e| VmError::HostSetup(format!("create rootfs dir: {e}")))?;
+
+    // Extract tar archive with progress
+    eprintln!("  Extracting rootfs...");
+    let mut archive = tar::Archive::new(decoder);
+    archive
+        .unpack(dest)
+        .map_err(|e| VmError::HostSetup(format!("extract rootfs tarball: {e}")))?;
+
+    eprintln!("  Rootfs extracted to {}", dest.display());
+
+    Ok(())
+}
+
+/// A reader wrapper that updates a progress bar as data is read.
+struct ProgressReader<R> {
+    inner: R,
+    progress: ProgressBar,
+}
+
+impl<R> ProgressReader<R> {
+    fn new(inner: R, progress: ProgressBar) -> Self {
+        Self { inner, progress }
+    }
+}
+
+impl<R: Read> Read for ProgressReader<R> {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        let n = self.inner.read(buf)?;
+        self.progress.inc(n as u64);
+        Ok(n)
+    }
+}
+
+fn validate_runtime_dir(dir: &Path) -> Result<(), VmError> {
+    let libkrun = dir.join(resources::LIBKRUN_NAME);
+    let libkrunfw = dir.join(resources::LIBKRUNFW_NAME);
+    let gvproxy = dir.join("gvproxy");
+
+    for path in [&libkrun, &libkrunfw, &gvproxy] {
+        if !path.exists() {
+            return Err(VmError::HostSetup(format!(
+                "missing runtime file: {}",
+                path.display()
+            )));
+        }
+
+        // Check file is not empty (would indicate a stub)
+        let size = fs::metadata(path).map(|m| m.len()).unwrap_or(0);
+        if size == 0 {
+            return Err(VmError::HostSetup(format!(
+                "runtime file is empty (stub): {}",
+                path.display()
+            )));
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_resources_not_empty() {
+        // On supported platforms, resources should be non-empty
+        #[cfg(any(
+            all(target_os = "macos", target_arch = "aarch64"),
+            all(target_os = "linux", target_arch = "aarch64"),
+            all(target_os = "linux", target_arch = "x86_64"),
+        ))]
+        {
+            // Note: This test only passes if `mise run vm:setup` was run
+            // before building. In CI without compressed artifacts, resources will be
+            // empty stubs.
+            if !resources::LIBKRUN.is_empty() {
+                assert!(!resources::LIBKRUNFW.is_empty());
+                assert!(!resources::GVPROXY.is_empty());
+            }
+        }
+    }
+}
diff --git a/crates/openshell-vm/src/exec.rs b/crates/openshell-vm/src/exec.rs
new file mode 100644
index 000000000..6195556e1
--- /dev/null
+++ b/crates/openshell-vm/src/exec.rs
@@ -0,0 +1,767 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::fs::{self, File};
+use std::io::{BufRead, BufReader, Read, Write};
+use std::os::unix::net::UnixStream;
+use std::path::{Path, PathBuf};
+use std::thread;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use base64::Engine as _;
+use serde::{Deserialize, Serialize};
+
+use crate::VmError;
+
+/// Remove a directory, safely handling symlinks.
+///
+/// Uses `symlink_metadata` (lstat) to detect symlinks. If the path is a
+/// symlink (e.g. `var/run -> /run` in a Linux rootfs), the symlink itself
+/// is removed without following it — preventing traversal attacks where a
+/// symlink could redirect `remove_dir_all` to an arbitrary host path.
+/// If the path is a real directory, it is removed recursively.
+fn safe_remove_dir_all(path: &Path) -> Result<bool, VmError> {
+    match fs::symlink_metadata(path) {
+        Ok(meta) => {
+            if meta.file_type().is_symlink() {
+                // Remove the symlink itself, not the target it points to.
+                fs::remove_file(path).map_err(|e| {
+                    VmError::RuntimeState(format!("reset: remove symlink {}: {e}", path.display()))
+                })?;
+                return Ok(true);
+            }
+            if !meta.is_dir() {
+                return Ok(false); // Not a directory — nothing to remove.
+            }
+            fs::remove_dir_all(path).map_err(|e| {
+                VmError::RuntimeState(format!("reset: remove {}: {e}", path.display()))
+            })?;
+            Ok(true)
+        }
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false),
+        Err(e) => Err(VmError::RuntimeState(format!(
+            "stat {}: {e}",
+            path.display()
+        ))),
+    }
+}
+
+pub const VM_EXEC_VSOCK_PORT: u32 = 10_777;
+
+const VM_STATE_NAME: &str = "vm-state.json";
+const VM_LOCK_NAME: &str = "vm.lock";
+const KUBECONFIG_ENV: &str = "KUBECONFIG=/etc/rancher/k3s/k3s.yaml";
+
+#[derive(Debug, Clone)]
+pub struct VmExecOptions {
+    pub rootfs: Option<PathBuf>,
+    pub command: Vec<String>,
+    pub workdir: Option<String>,
+    pub env: Vec<String>,
+    pub tty: bool,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VmRuntimeState {
+    pub pid: i32,
+    pub exec_vsock_port: u32,
+    pub socket_path: PathBuf,
+    pub rootfs: PathBuf,
+    pub console_log: PathBuf,
+    pub started_at_ms: u128,
+    /// PID of the gvproxy process (if networking uses gvproxy).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub gvproxy_pid: Option<u32>,
+}
+
+#[derive(Debug, Serialize)]
+struct ExecRequest {
+    argv: Vec<String>,
+    env: Vec<String>,
+    cwd: Option<String>,
+    tty: bool,
+}
+
+#[derive(Debug, Serialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+enum ClientFrame {
+    Stdin { data: String },
+    StdinClose,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+enum ServerFrame {
+    Stdout { data: String },
+    Stderr { data: String },
+    Exit { code: i32 },
+    Error { message: String },
+}
+
+pub fn vm_exec_socket_path(rootfs: &Path) -> PathBuf {
+    // Prefer XDG_RUNTIME_DIR (per-user, restricted permissions on Linux),
+    // fall back to /tmp. Ownership/symlink validation happens in
+    // secure_socket_base() when the gvproxy socket dir is created; here
+    // we just compute the path. The parent directory is created (with
+    // permission checks) at launch time via create_dir_all.
+    let base = if let Some(xdg) = std::env::var_os("XDG_RUNTIME_DIR") {
+        PathBuf::from(xdg)
+    } else {
+        let mut base = PathBuf::from("/tmp");
+        if !base.is_dir() {
+            base = std::env::temp_dir();
+        }
+        base
+    };
+    let dir = base.join("ovm-exec");
+    let id = hash_path_id(rootfs);
+    dir.join(format!("{id}.sock"))
+}
+
+fn hash_path_id(path: &Path) -> String {
+    let mut hash: u64 = 0xcbf29ce484222325;
+    for byte in path.to_string_lossy().as_bytes() {
+        hash ^= u64::from(*byte);
+        hash = hash.wrapping_mul(0x100000001b3);
+    }
+    format!("{:012x}", hash & 0x0000_ffff_ffff_ffff)
+}
+
+pub fn write_vm_runtime_state(
+    rootfs: &Path,
+    pid: i32,
+    console_log: &Path,
+    gvproxy_pid: Option<u32>,
+) -> Result<(), VmError> {
+    let state = VmRuntimeState {
+        pid,
+        exec_vsock_port: VM_EXEC_VSOCK_PORT,
+        socket_path: vm_exec_socket_path(rootfs),
+        rootfs: rootfs.to_path_buf(),
+        console_log: console_log.to_path_buf(),
+        started_at_ms: now_ms()?,
+        gvproxy_pid,
+    };
+    let path = vm_state_path(rootfs);
+    let bytes = serde_json::to_vec_pretty(&state)
+        .map_err(|e| VmError::RuntimeState(format!("serialize VM runtime state: {e}")))?;
+    fs::create_dir_all(vm_run_dir(rootfs))
+        .map_err(|e| VmError::RuntimeState(format!("create VM runtime dir: {e}")))?;
+    fs::write(&path, bytes)
+        .map_err(|e| VmError::RuntimeState(format!("write {}: {e}", path.display())))?;
+    Ok(())
+}
+
+pub fn clear_vm_runtime_state(rootfs: &Path) {
+    let state_path = vm_state_path(rootfs);
+    let socket_path = vm_exec_socket_path(rootfs);
+    let _ = fs::remove_file(state_path);
+    let _ = fs::remove_file(socket_path);
+}
+
+/// Wipe stale container runtime state from the rootfs.
+///
+/// After a crash or unclean shutdown, containerd and kubelet can retain
+/// references to pod sandboxes and containers that no longer exist. This
+/// causes `ContainerCreating` → `context deadline exceeded` loops because
+/// containerd blocks trying to clean up orphaned resources.
+///
+/// This function removes:
+/// - containerd runtime task state (running container metadata)
+/// - containerd sandbox controller shim state
+/// - containerd CRI plugin state (pod/container tracking)
+/// - containerd tmp mounts
+/// - kubelet pod state (volume mounts, pod status)
+///
+/// It preserves:
+/// - containerd images and content (no re-pull needed)
+/// - containerd snapshots (no re-extract needed)
+/// - containerd metadata database (meta.db — image/snapshot tracking)
+///
+/// **Note:** This is the only path that wipes the kine/SQLite database.
+/// Normal boots preserve `state.db` (and all cluster objects) across
+/// restarts. The init script clears stale bootstrap locks via `sqlite3`,
+/// and `recover_corrupt_kine_db` handles actual file corruption.
+pub fn reset_runtime_state(rootfs: &Path, gateway_name: &str) -> Result<(), VmError> {
+    // Full reset: wipe all runtime state so the VM cold-starts from scratch.
+    //
+    // With the block-device layout, k3s server/agent state, containerd, PVCs,
+    // and PKI all live on the state disk — the caller in lib.rs deletes the
+    // entire state disk image file, which achieves a complete wipe in one
+    // operation without touching the virtiofs rootfs.
+    //
+    // We still clean the virtiofs rootfs for paths that are NOT on the state
+    // disk: kubelet pod volumes, CNI state, and the pre-init sentinel.  These
+    // paths are present in the rootfs regardless of the storage layout.
+    let dirs_to_remove = [
+        // Stale pod volume mounts and projected secrets
+        rootfs.join("var/lib/kubelet/pods"),
+        // CNI state: stale network namespace references from dead pods
+        rootfs.join("var/lib/cni"),
+        // Runtime state (PIDs, sockets) — on virtiofs, not block device
+        rootfs.join("var/run"),
+    ];
+
+    let mut cleaned = 0usize;
+    for dir in &dirs_to_remove {
+        if safe_remove_dir_all(dir)? {
+            cleaned += 1;
+        }
+    }
+
+    // Remove the pre-initialized sentinel so the init script knows
+    // this is a cold start and deploys manifests from staging.
+    // We write a marker file so ensure-vm-rootfs.sh still sees the
+    // rootfs as built (avoiding a full rebuild) while the init script
+    // detects the cold start via the missing .initialized sentinel.
+    let sentinel = rootfs.join("opt/openshell/.initialized");
+    let reset_marker = rootfs.join("opt/openshell/.reset");
+    if sentinel.exists() {
+        fs::remove_file(&sentinel).map_err(|e| {
+            VmError::RuntimeState(format!(
+                "reset: remove sentinel {}: {e}",
+                sentinel.display()
+            ))
+        })?;
+        fs::write(&reset_marker, "").map_err(|e| {
+            VmError::RuntimeState(format!(
+                "reset: write marker {}: {e}",
+                reset_marker.display()
+            ))
+        })?;
+        cleaned += 1;
+    }
+
+    // PKI lives on the state disk; deleting the state disk image (done by
+    // the caller) rotates it automatically.  Just note it for the log.
+    eprintln!("Reset: PKI will be regenerated on next boot (state disk wiped)");
+
+    // Wipe host-side mTLS credentials so bootstrap_gateway() takes the
+    // first-boot path and fetches new certs from the VM via the exec agent.
+    if let Ok(home) = std::env::var("HOME") {
+        let config_base =
+            std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config"));
+        let mtls_dir = PathBuf::from(&config_base)
+            .join("openshell/gateways")
+            .join(gateway_name)
+            .join("mtls");
+        if mtls_dir.is_dir() {
+            fs::remove_dir_all(&mtls_dir).map_err(|e| {
+                VmError::RuntimeState(format!(
+                    "reset: remove mTLS dir {}: {e}",
+                    mtls_dir.display()
+                ))
+            })?;
+        }
+        // Also remove metadata so is_warm_boot() returns false.
+        let metadata = PathBuf::from(&config_base)
+            .join("openshell/gateways")
+            .join(gateway_name)
+            .join("metadata.json");
+        if metadata.is_file() {
+            fs::remove_file(&metadata).map_err(|e| {
+                VmError::RuntimeState(format!(
+                    "reset: remove metadata {}: {e}",
+                    metadata.display()
+                ))
+            })?;
+        }
+    }
+
+    eprintln!("Reset: cleaned {cleaned} state directories (full reset)");
+    Ok(())
+}
+
+/// Remove a corrupt kine (`SQLite`) database so k3s can recreate it on boot.
+///
+/// k3s uses kine with a `SQLite` backend at `var/lib/rancher/k3s/server/db/state.db`.
+/// If the VM is killed mid-write (SIGKILL, host crash, power loss), the database
+/// file may be left in a corrupt state — the `SQLite` header magic is missing or the
+/// file is truncated. k3s would open the DB, get `SQLITE_NOTADB` /
+/// `SQLITE_CORRUPT`, and crash at startup.
+///
+/// This function checks the `SQLite` file header (first 100 bytes only) and removes
+/// the database plus its WAL/SHM sidecar files if the header is invalid. k3s will
+/// create a fresh database on startup and cluster state will be re-applied from
+/// the auto-deploy manifests in `server/manifests/`.
+///
+/// **Stale bootstrap locks** (a kine application-level issue where a killed k3s
+/// server leaves a lock row that causes the next instance to hang) are handled
+/// separately by the init script (`openshell-vm-init.sh`), which runs
+/// `sqlite3 state.db "DELETE FROM kine WHERE name LIKE '/bootstrap/%'"` before
+/// starting k3s. This allows the database — and all persistent cluster state — to
+/// survive normal restarts.
+///
+/// **What is lost on corruption:** all cluster object records (Pods, Deployments,
+/// Secrets, `ConfigMaps`, CRDs, etc.) and the bootstrap token. These are re-created
+/// from manifests on the next boot.
+///
+/// **What is always preserved:** container images and snapshots (under
+/// `k3s/agent/`), PKI, and the `.initialized` sentinel.
+///
+/// This function is a no-op if `state.db` does not exist (e.g. first boot or
+/// after a full `--reset`).
+pub fn recover_corrupt_kine_db(rootfs: &Path) -> Result<(), VmError> {
+    let db_path = rootfs.join("var/lib/rancher/k3s/server/db/state.db");
+    if !db_path.exists() {
+        return Ok(()); // Nothing to check — first boot or post-reset.
+    }
+
+    // The SQLite file format begins with a 16-byte magic string.
+    // Reference: https://www.sqlite.org/fileformat.html#the_database_header
+    const SQLITE_MAGIC: &[u8] = b"SQLite format 3\x00";
+
+    // Read only the first 100 bytes (the minimum valid SQLite header size)
+    // instead of loading the entire database into memory.
+    let has_invalid_header = match File::open(&db_path).and_then(|mut f| {
+        let mut buf = [0u8; 100];
+        let n = f.read(&mut buf)?;
+        Ok((n, buf))
+    }) {
+        Err(_) => true,                // Can't read → treat as corrupt.
+        Ok((n, _)) if n < 100 => true, // Too short to be a valid DB.
+        Ok((_, buf)) => !buf.starts_with(SQLITE_MAGIC),
+    };
+
+    if !has_invalid_header {
+        return Ok(()); // Valid database — preserve it for warm boot.
+    }
+
+    eprintln!(
+        "Warning: kine database is corrupt ({}), removing for clean boot",
+        db_path.display()
+    );
+
+    remove_kine_db_files(&db_path)?;
+
+    Ok(())
+}
+
+/// Remove the kine `SQLite` database and its WAL/SHM sidecar files.
+fn remove_kine_db_files(db_path: &Path) -> Result<(), VmError> {
+    if let Err(e) = fs::remove_file(db_path) {
+        return Err(VmError::RuntimeState(format!(
+            "failed to remove kine database {}: {e}",
+            db_path.display()
+        )));
+    }
+    // Also remove any WAL/SHM sidecar files left by an interrupted write.
+    let _ = fs::remove_file(db_path.with_extension("db-wal"));
+    let _ = fs::remove_file(db_path.with_extension("db-shm"));
+    Ok(())
+}
+
+/// Acquire an exclusive lock on the rootfs lock file.
+///
+/// The lock is held for the lifetime of the returned `File` handle. When
+/// the process exits (even via SIGKILL), the OS releases the lock
+/// automatically. This provides a reliable guard against two VM processes
+/// sharing the same rootfs — even if the state file is deleted.
+///
+/// Returns `Ok(File)` on success. The caller must keep the `File` alive
+/// for as long as the VM is running.
+pub fn acquire_rootfs_lock(rootfs: &Path) -> Result<File, VmError> {
+    let lock_path = vm_lock_path(rootfs);
+    fs::create_dir_all(vm_run_dir(rootfs))
+        .map_err(|e| VmError::RuntimeState(format!("create VM runtime dir: {e}")))?;
+
+    // Open (or create) the lock file without truncating so we can read
+    // the holder's PID for the error message if the lock is held.
+    let file = fs::OpenOptions::new()
+        .read(true)
+        .write(true)
+        .create(true)
+        .truncate(false)
+        .open(&lock_path)
+        .map_err(|e| {
+            VmError::RuntimeState(format!("open lock file {}: {e}", lock_path.display()))
+        })?;
+
+    // Try non-blocking exclusive lock.
+    let fd = std::os::unix::io::AsRawFd::as_raw_fd(&file);
+    let rc = unsafe { libc::flock(fd, libc::LOCK_EX | libc::LOCK_NB) };
+    if rc != 0 {
+        let err = std::io::Error::last_os_error();
+        if err.raw_os_error() == Some(libc::EWOULDBLOCK) {
+            // Another process holds the lock — read its PID for diagnostics.
+            let holder_pid = fs::read_to_string(&lock_path).unwrap_or_default();
+            let holder_pid = holder_pid.trim();
+            return Err(VmError::RuntimeState(format!(
+                "another process (pid {holder_pid}) is using rootfs {}. \
+                 Stop the running VM first",
+                rootfs.display()
+            )));
+        }
+        return Err(VmError::RuntimeState(format!(
+            "lock rootfs {}: {err}",
+            lock_path.display()
+        )));
+    }
+
+    // Lock acquired — write our PID (truncate first, then write).
+    // This is informational only; the flock is the real guard.
+    let _ = file.set_len(0);
+    {
+        let mut f = &file;
+        let _ = write!(f, "{}", std::process::id());
+    }
+
+    Ok(file)
+}
+
+/// Check whether the rootfs lock file is currently held by another process.
+///
+/// Returns `Ok(())` if the lock is free (or can be acquired), and an
+/// `Err` if another process holds it. Does NOT acquire the lock — use
+/// [`acquire_rootfs_lock`] for that.
+fn check_rootfs_lock_free(rootfs: &Path) -> Result<(), VmError> {
+    let lock_path = vm_lock_path(rootfs);
+    if !lock_path.exists() {
+        return Ok(());
+    }
+
+    let Ok(file) = File::open(&lock_path) else {
+        return Ok(()); // Can't open → treat as free
+    };
+
+    let fd = std::os::unix::io::AsRawFd::as_raw_fd(&file);
+    let rc = unsafe { libc::flock(fd, libc::LOCK_EX | libc::LOCK_NB) };
+    if rc != 0 {
+        let err = std::io::Error::last_os_error();
+        if err.raw_os_error() == Some(libc::EWOULDBLOCK) {
+            let holder_pid = fs::read_to_string(&lock_path).unwrap_or_default();
+            let holder_pid = holder_pid.trim();
+            return Err(VmError::RuntimeState(format!(
+                "another process (pid {holder_pid}) is using rootfs {}. \
+                 Stop the running VM first",
+                rootfs.display()
+            )));
+        }
+    } else {
+        // We acquired the lock — release it immediately since we're only probing.
+        unsafe { libc::flock(fd, libc::LOCK_UN) };
+    }
+
+    Ok(())
+}
+
+pub fn ensure_vm_not_running(rootfs: &Path) -> Result<(), VmError> {
+    // Primary guard: check the flock. This works even if the state file
+    // has been deleted, because the kernel holds the lock until the
+    // owning process exits.
+    check_rootfs_lock_free(rootfs)?;
+
+    // Secondary guard: check the state file for any stale state.
+    match load_vm_runtime_state(Some(rootfs)) {
+        Ok(state) => Err(VmError::RuntimeState(format!(
+            "VM is already running (pid {}) with exec socket {}",
+            state.pid,
+            state.socket_path.display()
+        ))),
+        Err(VmError::RuntimeState(message))
+            if message.starts_with("read VM runtime state")
+                || message.starts_with("VM is not running") =>
+        {
+            clear_vm_runtime_state(rootfs);
+            Ok(())
+        }
+        Err(err) => Err(err),
+    }
+}
+
+pub fn exec_running_vm(options: VmExecOptions) -> Result<i32, VmError> {
+    let state = load_vm_runtime_state(options.rootfs.as_deref())?;
+    let mut stream = UnixStream::connect(&state.socket_path).map_err(|e| {
+        VmError::Exec(format!(
+            "connect to VM exec socket {}: {e}",
+            state.socket_path.display()
+        ))
+    })?;
+    let mut writer = stream
+        .try_clone()
+        .map_err(|e| VmError::Exec(format!("clone VM exec socket: {e}")))?;
+
+    let mut env = options.env;
+    validate_env_vars(&env)?;
+    if !env.iter().any(|item| item.starts_with("KUBECONFIG=")) {
+        env.push(KUBECONFIG_ENV.to_string());
+    }
+
+    let request = ExecRequest {
+        argv: options.command,
+        env,
+        cwd: options.workdir,
+        tty: options.tty,
+    };
+    send_json_line(&mut writer, &request)?;
+
+    let stdin_writer = writer;
+    thread::spawn(move || {
+        let _ = pump_stdin(stdin_writer);
+    });
+
+    let mut reader = BufReader::new(&mut stream);
+    let mut line = String::new();
+    let stdout = std::io::stdout();
+    let stderr = std::io::stderr();
+    let mut stdout = stdout.lock();
+    let mut stderr = stderr.lock();
+    let mut exit_code = None;
+
+    loop {
+        line.clear();
+        let bytes = reader
+            .read_line(&mut line)
+            .map_err(|e| VmError::Exec(format!("read VM exec response from guest agent: {e}")))?;
+        if bytes == 0 {
+            break;
+        }
+
+        let frame: ServerFrame = serde_json::from_str(line.trim_end())
+            .map_err(|e| VmError::Exec(format!("decode VM exec response frame: {e}")))?;
+
+        match frame {
+            ServerFrame::Stdout { data } => {
+                let bytes = decode_payload(&data)?;
+                stdout
+                    .write_all(&bytes)
+                    .map_err(|e| VmError::Exec(format!("write guest stdout: {e}")))?;
+                stdout
+                    .flush()
+                    .map_err(|e| VmError::Exec(format!("flush guest stdout: {e}")))?;
+            }
+            ServerFrame::Stderr { data } => {
+                let bytes = decode_payload(&data)?;
+                stderr
+                    .write_all(&bytes)
+                    .map_err(|e| VmError::Exec(format!("write guest stderr: {e}")))?;
+                stderr
+                    .flush()
+                    .map_err(|e| VmError::Exec(format!("flush guest stderr: {e}")))?;
+            }
+            ServerFrame::Exit { code } => {
+                exit_code = Some(code);
+                break;
+            }
+            ServerFrame::Error { message } => {
+                return Err(VmError::Exec(message));
+            }
+        }
+    }
+
+    exit_code.ok_or_else(|| {
+        VmError::Exec("VM exec agent disconnected before returning an exit code".to_string())
+    })
+}
+
+/// Run a command inside the guest via the exec agent and capture its stdout.
+///
+/// Unlike [`exec_running_vm`], this function does not pump host stdin or write
+/// to the terminal. It collects all stdout frames into a `Vec<u8>` and returns
+/// them on success (exit code 0). Stderr output is discarded.
+///
+/// This is the building block for internal host→guest queries (e.g. reading
+/// files from the guest filesystem) without requiring a dedicated vsock server.
+pub fn exec_capture(socket_path: &Path, argv: Vec<String>) -> Result<Vec<u8>, VmError> {
+    let mut stream = UnixStream::connect(socket_path).map_err(|e| {
+        VmError::Exec(format!(
+            "connect to VM exec socket {}: {e}",
+            socket_path.display()
+        ))
+    })?;
+    let mut writer = stream
+        .try_clone()
+        .map_err(|e| VmError::Exec(format!("clone VM exec socket: {e}")))?;
+
+    let request = ExecRequest {
+        argv,
+        env: vec![],
+        cwd: None,
+        tty: false,
+    };
+    send_json_line(&mut writer, &request)?;
+
+    // Close stdin immediately — we have no input to send.
+    send_json_line(&mut writer, &ClientFrame::StdinClose)?;
+
+    let mut reader = BufReader::new(&mut stream);
+    let mut line = String::new();
+    let mut stdout_buf = Vec::new();
+
+    loop {
+        line.clear();
+        let bytes = reader
+            .read_line(&mut line)
+            .map_err(|e| VmError::Exec(format!("read VM exec response: {e}")))?;
+        if bytes == 0 {
+            break;
+        }
+
+        let frame: ServerFrame = serde_json::from_str(line.trim_end())
+            .map_err(|e| VmError::Exec(format!("decode VM exec response frame: {e}")))?;
+
+        match frame {
+            ServerFrame::Stdout { data } => {
+                stdout_buf.extend_from_slice(&decode_payload(&data)?);
+            }
+            ServerFrame::Stderr { .. } => {
+                // Discard stderr for capture mode.
+            }
+            ServerFrame::Exit { code } => {
+                if code != 0 {
+                    return Err(VmError::Exec(format!(
+                        "guest command exited with code {code}"
+                    )));
+                }
+                return Ok(stdout_buf);
+            }
+            ServerFrame::Error { message } => {
+                return Err(VmError::Exec(message));
+            }
+        }
+    }
+
+    Err(VmError::Exec(
+        "VM exec agent disconnected before returning an exit code".to_string(),
+    ))
+}
+
+fn vm_run_dir(rootfs: &Path) -> PathBuf {
+    rootfs.parent().unwrap_or(rootfs).to_path_buf()
+}
+
+pub fn vm_state_path(rootfs: &Path) -> PathBuf {
+    vm_run_dir(rootfs).join(format!("{}-{}", rootfs_key(rootfs), VM_STATE_NAME))
+}
+
+fn vm_lock_path(rootfs: &Path) -> PathBuf {
+    vm_run_dir(rootfs).join(format!("{}-{}", rootfs_key(rootfs), VM_LOCK_NAME))
+}
+
+fn rootfs_key(rootfs: &Path) -> String {
+    let name = rootfs
+        .file_name()
+        .and_then(|part| part.to_str())
+        .unwrap_or("openshell-vm");
+    let mut out = String::with_capacity(name.len());
+    for ch in name.chars() {
+        if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
+            out.push(ch);
+        } else {
+            out.push('_');
+        }
+    }
+    if out.is_empty() {
+        "openshell-vm".to_string()
+    } else {
+        out
+    }
+}
+
+fn default_rootfs() -> Result<PathBuf, VmError> {
+    crate::named_rootfs_dir("default")
+}
+
+fn load_vm_runtime_state(rootfs: Option<&Path>) -> Result<VmRuntimeState, VmError> {
+    let rootfs = match rootfs {
+        Some(rootfs) => rootfs.to_path_buf(),
+        None => default_rootfs()?,
+    };
+    let path = vm_state_path(&rootfs);
+    let bytes = fs::read(&path).map_err(|e| {
+        VmError::RuntimeState(format!(
+            "read VM runtime state {}: {e}. Start the VM with `openshell-vm` first",
+            path.display()
+        ))
+    })?;
+    let state: VmRuntimeState = serde_json::from_slice(&bytes)
+        .map_err(|e| VmError::RuntimeState(format!("decode VM runtime state: {e}")))?;
+
+    if !process_alive(state.pid) {
+        clear_vm_runtime_state(&state.rootfs);
+        return Err(VmError::RuntimeState(format!(
+            "VM is not running (stale pid {})",
+            state.pid
+        )));
+    }
+
+    if !state.socket_path.exists() {
+        return Err(VmError::RuntimeState(format!(
+            "VM exec socket is not ready: {}",
+            state.socket_path.display()
+        )));
+    }
+
+    Ok(state)
+}
+
+fn validate_env_vars(items: &[String]) -> Result<(), VmError> {
+    for item in items {
+        let (key, _value) = item.split_once('=').ok_or_else(|| {
+            VmError::Exec(format!(
+                "invalid environment variable `{item}`; expected KEY=VALUE"
+            ))
+        })?;
+        if key.is_empty()
+            || !key.chars().enumerate().all(|(idx, ch)| {
+                ch == '_' || (ch.is_ascii_alphanumeric() && (idx > 0 || !ch.is_ascii_digit()))
+            })
+        {
+            return Err(VmError::Exec(format!(
+                "invalid environment variable name `{key}`"
+            )));
+        }
+    }
+    Ok(())
+}
+
+fn send_json_line<T: Serialize>(writer: &mut UnixStream, value: &T) -> Result<(), VmError> {
+    let mut bytes = serde_json::to_vec(value)
+        .map_err(|e| VmError::Exec(format!("encode VM exec request: {e}")))?;
+    bytes.push(b'\n');
+    writer
+        .write_all(&bytes)
+        .map_err(|e| VmError::Exec(format!("write VM exec request: {e}")))
+}
+
+fn pump_stdin(mut writer: UnixStream) -> Result<(), VmError> {
+    let stdin = std::io::stdin();
+    let mut stdin = stdin.lock();
+    let mut buf = [0u8; 8192];
+
+    loop {
+        let read = stdin
+            .read(&mut buf)
+            .map_err(|e| VmError::Exec(format!("read local stdin: {e}")))?;
+        if read == 0 {
+            break;
+        }
+        let frame = ClientFrame::Stdin {
+            data: base64::engine::general_purpose::STANDARD.encode(&buf[..read]),
+        };
+        send_json_line(&mut writer, &frame)?;
+    }
+
+    send_json_line(&mut writer, &ClientFrame::StdinClose)
+}
+
+fn decode_payload(data: &str) -> Result<Vec<u8>, VmError> {
+    base64::engine::general_purpose::STANDARD
+        .decode(data)
+        .map_err(|e| VmError::Exec(format!("decode VM exec payload: {e}")))
+}
+
+fn process_alive(pid: i32) -> bool {
+    let rc = unsafe { libc::kill(pid, 0) };
+    if rc == 0 {
+        return true;
+    }
+    std::io::Error::last_os_error().raw_os_error() == Some(libc::EPERM)
+}
+
+fn now_ms() -> Result<u128, VmError> {
+    let duration = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map_err(|e| VmError::RuntimeState(format!("read system clock: {e}")))?;
+    Ok(duration.as_millis())
+}
diff --git a/crates/openshell-vm/src/ffi.rs b/crates/openshell-vm/src/ffi.rs
new file mode 100644
index 000000000..7500b1c97
--- /dev/null
+++ b/crates/openshell-vm/src/ffi.rs
@@ -0,0 +1,336 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Minimal runtime-loaded bindings for the libkrun C API.
+//!
+//! We intentionally do not link libkrun at build time. Instead, the
+//! `openshell-vm` binary loads `libkrun` from the staged `openshell-vm.runtime/`
+//! sidecar bundle on first use.
+
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::sync::OnceLock;
+
+use libc::c_char;
+use libloading::Library;
+
+use crate::VmError;
+
+/// Runtime provenance information extracted from the bundle.
+#[derive(Debug, Clone)]
+pub struct RuntimeProvenance {
+    /// Path to the libkrun library that was loaded.
+    pub libkrun_path: PathBuf,
+    /// Paths to all libkrunfw libraries that were preloaded.
+    pub libkrunfw_paths: Vec<PathBuf>,
+    /// SHA-256 hash of the primary libkrunfw artifact (if computable).
+    pub libkrunfw_sha256: Option<String>,
+    /// Contents of provenance.json if present in the runtime bundle.
+    pub provenance_json: Option<String>,
+    /// Whether this is a custom (OpenShell-built) runtime.
+    pub is_custom: bool,
+}
+
+pub const KRUN_LOG_TARGET_DEFAULT: i32 = -1;
+pub const KRUN_LOG_LEVEL_OFF: u32 = 0;
+pub const KRUN_LOG_LEVEL_ERROR: u32 = 1;
+pub const KRUN_LOG_LEVEL_WARN: u32 = 2;
+pub const KRUN_LOG_LEVEL_INFO: u32 = 3;
+pub const KRUN_LOG_LEVEL_DEBUG: u32 = 4;
+pub const KRUN_LOG_LEVEL_TRACE: u32 = 5;
+pub const KRUN_LOG_STYLE_AUTO: u32 = 0;
+pub const KRUN_LOG_OPTION_NO_ENV: u32 = 1;
+pub const KRUN_DISK_FORMAT_RAW: u32 = 0;
+#[allow(dead_code)] // Used only on macOS (cfg-gated in state_disk_sync_mode)
+pub const KRUN_SYNC_RELAXED: u32 = 1;
+#[allow(dead_code)] // Used only on Linux (cfg-gated in state_disk_sync_mode)
+pub const KRUN_SYNC_FULL: u32 = 2;
+
+type KrunInitLog =
+    unsafe extern "C" fn(target_fd: i32, level: u32, style: u32, options: u32) -> i32;
+type KrunCreateCtx = unsafe extern "C" fn() -> i32;
+type KrunFreeCtx = unsafe extern "C" fn(ctx_id: u32) -> i32;
+type KrunSetVmConfig = unsafe extern "C" fn(ctx_id: u32, num_vcpus: u8, ram_mib: u32) -> i32;
+type KrunSetRoot = unsafe extern "C" fn(ctx_id: u32, root_path: *const c_char) -> i32;
+type KrunSetWorkdir = unsafe extern "C" fn(ctx_id: u32, workdir_path: *const c_char) -> i32;
+type KrunSetExec = unsafe extern "C" fn(
+    ctx_id: u32,
+    exec_path: *const c_char,
+    argv: *const *const c_char,
+    envp: *const *const c_char,
+) -> i32;
+type KrunSetPortMap = unsafe extern "C" fn(ctx_id: u32, port_map: *const *const c_char) -> i32;
+type KrunSetConsoleOutput = unsafe extern "C" fn(ctx_id: u32, filepath: *const c_char) -> i32;
+type KrunAddDisk3 = unsafe extern "C" fn(
+    ctx_id: u32,
+    block_id: *const c_char,
+    disk_path: *const c_char,
+    disk_format: u32,
+    read_only: bool,
+    direct_io: bool,
+    sync_mode: u32,
+) -> i32;
+type KrunAddVsockPort2 =
+    unsafe extern "C" fn(ctx_id: u32, port: u32, c_filepath: *const c_char, listen: bool) -> i32;
+type KrunStartEnter = unsafe extern "C" fn(ctx_id: u32) -> i32;
+type KrunDisableImplicitVsock = unsafe extern "C" fn(ctx_id: u32) -> i32;
+type KrunAddVsock = unsafe extern "C" fn(ctx_id: u32, tsi_features: u32) -> i32;
+#[cfg(target_os = "macos")]
+type KrunAddNetUnixgram = unsafe extern "C" fn(
+    ctx_id: u32,
+    c_path: *const c_char,
+    fd: i32,
+    c_mac: *const u8,
+    features: u32,
+    flags: u32,
+) -> i32;
+type KrunAddNetUnixstream = unsafe extern "C" fn(
+    ctx_id: u32,
+    c_path: *const c_char,
+    fd: i32,
+    c_mac: *const u8,
+    features: u32,
+    flags: u32,
+) -> i32;
+
+pub struct LibKrun {
+    pub krun_init_log: KrunInitLog,
+    pub krun_create_ctx: KrunCreateCtx,
+    pub krun_free_ctx: KrunFreeCtx,
+    pub krun_set_vm_config: KrunSetVmConfig,
+    pub krun_set_root: KrunSetRoot,
+    pub krun_set_workdir: KrunSetWorkdir,
+    pub krun_set_exec: KrunSetExec,
+    pub krun_set_port_map: KrunSetPortMap,
+    pub krun_set_console_output: KrunSetConsoleOutput,
+    pub krun_add_disk3: Option<KrunAddDisk3>,
+    pub krun_add_vsock_port2: KrunAddVsockPort2,
+    pub krun_start_enter: KrunStartEnter,
+    pub krun_disable_implicit_vsock: KrunDisableImplicitVsock,
+    pub krun_add_vsock: KrunAddVsock,
+    #[cfg(target_os = "macos")]
+    pub krun_add_net_unixgram: KrunAddNetUnixgram,
+    #[allow(dead_code)] // FFI symbol loaded for future use
+    pub krun_add_net_unixstream: KrunAddNetUnixstream,
+}
+
+static LIBKRUN: OnceLock<LibKrun> = OnceLock::new();
+static RUNTIME_PROVENANCE: OnceLock<RuntimeProvenance> = OnceLock::new();
+
+pub fn libkrun() -> Result<&'static LibKrun, VmError> {
+    if let Some(lib) = LIBKRUN.get() {
+        return Ok(lib);
+    }
+
+    let loaded = LibKrun::load()?;
+    let _ = LIBKRUN.set(loaded);
+    Ok(LIBKRUN.get().expect("libkrun should be initialized"))
+}
+
+/// Return the provenance information for the loaded runtime.
+///
+/// Only available after [`libkrun()`] has been called successfully.
+pub fn runtime_provenance() -> Option<&'static RuntimeProvenance> {
+    RUNTIME_PROVENANCE.get()
+}
+
+impl LibKrun {
+    fn load() -> Result<Self, VmError> {
+        let path = runtime_libkrun_path()?;
+        let runtime_dir = path.parent().ok_or_else(|| {
+            VmError::HostSetup(format!("libkrun has no parent dir: {}", path.display()))
+        })?;
+        let krunfw_paths = preload_runtime_support_libraries(runtime_dir)?;
+
+        // Build and store provenance information.
+        let provenance_json_path = runtime_dir.join("provenance.json");
+        let provenance_json = fs::read_to_string(&provenance_json_path).ok();
+        let is_custom = provenance_json.is_some();
+
+        let libkrunfw_sha256 = krunfw_paths.first().and_then(|p| compute_sha256(p).ok());
+
+        let provenance = RuntimeProvenance {
+            libkrun_path: path.clone(),
+            libkrunfw_paths: krunfw_paths,
+            libkrunfw_sha256,
+            provenance_json,
+            is_custom,
+        };
+        let _ = RUNTIME_PROVENANCE.set(provenance);
+
+        let library = Box::leak(Box::new(unsafe {
+            Library::new(&path).map_err(|e| {
+                VmError::HostSetup(format!("load libkrun from {}: {e}", path.display()))
+            })?
+        }));
+
+        Ok(Self {
+            krun_init_log: load_symbol(library, b"krun_init_log\0", &path)?,
+            krun_create_ctx: load_symbol(library, b"krun_create_ctx\0", &path)?,
+            krun_free_ctx: load_symbol(library, b"krun_free_ctx\0", &path)?,
+            krun_set_vm_config: load_symbol(library, b"krun_set_vm_config\0", &path)?,
+            krun_set_root: load_symbol(library, b"krun_set_root\0", &path)?,
+            krun_set_workdir: load_symbol(library, b"krun_set_workdir\0", &path)?,
+            krun_set_exec: load_symbol(library, b"krun_set_exec\0", &path)?,
+            krun_set_port_map: load_symbol(library, b"krun_set_port_map\0", &path)?,
+            krun_set_console_output: load_symbol(library, b"krun_set_console_output\0", &path)?,
+            krun_add_disk3: load_optional_symbol(library, b"krun_add_disk3\0"),
+            krun_add_vsock_port2: load_symbol(library, b"krun_add_vsock_port2\0", &path)?,
+            krun_start_enter: load_symbol(library, b"krun_start_enter\0", &path)?,
+            krun_disable_implicit_vsock: load_symbol(
+                library,
+                b"krun_disable_implicit_vsock\0",
+                &path,
+            )?,
+            krun_add_vsock: load_symbol(library, b"krun_add_vsock\0", &path)?,
+            #[cfg(target_os = "macos")]
+            krun_add_net_unixgram: load_symbol(library, b"krun_add_net_unixgram\0", &path)?,
+            krun_add_net_unixstream: load_symbol(library, b"krun_add_net_unixstream\0", &path)?,
+        })
+    }
+}
+
+fn runtime_libkrun_path() -> Result<PathBuf, VmError> {
+    Ok(crate::configured_runtime_dir()?.join(required_runtime_lib_name()))
+}
+
+fn preload_runtime_support_libraries(runtime_dir: &Path) -> Result<Vec<PathBuf>, VmError> {
+    let entries = fs::read_dir(runtime_dir)
+        .map_err(|e| VmError::HostSetup(format!("read {}: {e}", runtime_dir.display())))?;
+
+    let mut support_libs: Vec<PathBuf> = entries
+        .filter_map(Result::ok)
+        .map(|entry| entry.path())
+        .filter(|path| {
+            path.file_name()
+                .and_then(|name| name.to_str())
+                .is_some_and(|name| {
+                    #[cfg(target_os = "macos")]
+                    {
+                        name.starts_with("libkrunfw") && name.ends_with(".dylib")
+                    }
+                    #[cfg(not(target_os = "macos"))]
+                    {
+                        name.starts_with("libkrunfw") && name.contains(".so")
+                    }
+                })
+        })
+        .collect();
+
+    support_libs.sort();
+
+    for path in &support_libs {
+        let path_cstr = std::ffi::CString::new(path.to_string_lossy().as_bytes()).map_err(|e| {
+            VmError::HostSetup(format!(
+                "invalid support library path {}: {e}",
+                path.display()
+            ))
+        })?;
+        let handle =
+            unsafe { libc::dlopen(path_cstr.as_ptr(), libc::RTLD_NOW | libc::RTLD_GLOBAL) };
+        if handle.is_null() {
+            let error = unsafe {
+                let err = libc::dlerror();
+                if err.is_null() {
+                    "unknown dlopen error".to_string()
+                } else {
+                    std::ffi::CStr::from_ptr(err).to_string_lossy().into_owned()
+                }
+            };
+            return Err(VmError::HostSetup(format!(
+                "preload runtime support library {}: {error}",
+                path.display()
+            )));
+        }
+    }
+
+    Ok(support_libs)
+}
+
+pub fn required_runtime_lib_name() -> &'static str {
+    #[cfg(target_os = "macos")]
+    {
+        "libkrun.dylib"
+    }
+    #[cfg(not(target_os = "macos"))]
+    {
+        "libkrun.so"
+    }
+}
+
+/// Compute SHA-256 hash of a file, returning hex string.
+///
+/// Streams the file contents directly to `shasum -a 256` via a pipe,
+/// avoiding buffering the entire file in memory.
+fn compute_sha256(path: &Path) -> Result<String, std::io::Error> {
+    use std::io::{Read, Write};
+    use std::process::{Command, Stdio};
+
+    let mut file = fs::File::open(path)?;
+
+    // sha256sum is standard on Linux; shasum ships with macOS/Perl.
+    let mut child = Command::new("sha256sum")
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::null())
+        .spawn()
+        .or_else(|_| {
+            Command::new("shasum")
+                .args(["-a", "256"])
+                .stdin(Stdio::piped())
+                .stdout(Stdio::piped())
+                .stderr(Stdio::null())
+                .spawn()
+        })?;
+
+    // Stream file contents directly to shasum's stdin in 8KB chunks.
+    {
+        let mut stdin = child
+            .stdin
+            .take()
+            .ok_or_else(|| std::io::Error::other("failed to open shasum stdin"))?;
+        let mut buf = [0u8; 8192];
+        loop {
+            let n = file.read(&mut buf)?;
+            if n == 0 {
+                break;
+            }
+            stdin.write_all(&buf[..n])?;
+        }
+        // stdin is dropped here, closing the pipe so shasum can finish.
+    }
+
+    let output = child.wait_with_output()?;
+    if output.status.success() {
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        Ok(stdout
+            .split_whitespace()
+            .next()
+            .unwrap_or("unknown")
+            .to_string())
+    } else {
+        Ok("unknown".to_string())
+    }
+}
+
+fn load_symbol<T: Copy>(
+    library: &'static Library,
+    symbol: &[u8],
+    path: &Path,
+) -> Result<T, VmError> {
+    let loaded = unsafe {
+        library.get::<T>(symbol).map_err(|e| {
+            VmError::HostSetup(format!(
+                "resolve {} from {}: {e}",
+                String::from_utf8_lossy(symbol).trim_end_matches('\0'),
+                path.display()
+            ))
+        })?
+    };
+    Ok(*loaded)
+}
+
+fn load_optional_symbol<T: Copy>(library: &'static Library, symbol: &[u8]) -> Option<T> {
+    let loaded = unsafe { library.get::<T>(symbol).ok()? };
+    Some(*loaded)
+}
diff --git a/crates/openshell-vm/src/health.rs b/crates/openshell-vm/src/health.rs
new file mode 100644
index 000000000..096a35d1f
--- /dev/null
+++ b/crates/openshell-vm/src/health.rs
@@ -0,0 +1,201 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! gRPC health check for verifying the gateway is fully ready.
+//!
+//! This module provides a proper gRPC health check that verifies the gateway
+//! service is not just accepting TCP connections, but is actually responding
+//! to gRPC requests. This ensures we don't mark the server as ready before
+//! it has fully booted.
+
+use crate::VmError;
+use openshell_core::proto::{HealthRequest, ServiceStatus, open_shell_client::OpenShellClient};
+use std::path::PathBuf;
+use std::time::Duration;
+use tonic::transport::{Certificate, ClientTlsConfig, Endpoint, Identity};
+
+/// Load mTLS materials from the gateway's cert directory.
+fn load_mtls_materials(gateway_name: &str) -> Result<(Vec<u8>, Vec<u8>, Vec<u8>), String> {
+    let home = std::env::var("HOME").map_err(|_| "HOME not set")?;
+    let mtls_dir = PathBuf::from(home)
+        .join(".config/openshell/gateways")
+        .join(gateway_name)
+        .join("mtls");
+
+    let ca = std::fs::read(mtls_dir.join("ca.crt"))
+        .map_err(|e| format!("failed to read ca.crt: {e}"))?;
+    let cert = std::fs::read(mtls_dir.join("tls.crt"))
+        .map_err(|e| format!("failed to read tls.crt: {e}"))?;
+    let key = std::fs::read(mtls_dir.join("tls.key"))
+        .map_err(|e| format!("failed to read tls.key: {e}"))?;
+
+    Ok((ca, cert, key))
+}
+
+/// Build a tonic TLS config from mTLS materials.
+fn build_tls_config(ca: Vec<u8>, cert: Vec<u8>, key: Vec<u8>) -> ClientTlsConfig {
+    let ca_cert = Certificate::from_pem(ca);
+    let identity = Identity::from_pem(cert, key);
+    ClientTlsConfig::new()
+        .ca_certificate(ca_cert)
+        .identity(identity)
+}
+
+/// Perform a gRPC health check against the gateway.
+///
+/// Returns `Ok(())` if the health check succeeds (service reports healthy),
+/// or an error describing why the check failed.
+async fn grpc_health_check(gateway_port: u16, gateway_name: &str) -> Result<(), String> {
+    // Load mTLS materials
+    let (ca, cert, key) = load_mtls_materials(gateway_name)?;
+    let tls_config = build_tls_config(ca, cert, key);
+
+    // Build the channel with TLS
+    let endpoint = format!("https://127.0.0.1:{gateway_port}");
+    let channel = Endpoint::from_shared(endpoint.clone())
+        .map_err(|e| format!("invalid endpoint: {e}"))?
+        .connect_timeout(Duration::from_secs(5))
+        .tls_config(tls_config)
+        .map_err(|e| format!("TLS config error: {e}"))?
+        .connect()
+        .await
+        .map_err(|e| format!("connection failed: {e}"))?;
+
+    // Create client and call health
+    let mut client = OpenShellClient::new(channel);
+    let response = client
+        .health(HealthRequest {})
+        .await
+        .map_err(|e| format!("health RPC failed: {e}"))?;
+
+    let health = response.into_inner();
+    if health.status == ServiceStatus::Healthy as i32 {
+        Ok(())
+    } else {
+        Err(format!("service not healthy: status={}", health.status))
+    }
+}
+
+/// Wait for the gateway service to be fully ready by polling the gRPC health endpoint.
+///
+/// This replaces the TCP-only probe with a proper gRPC health check that verifies
+/// the service is actually responding to requests, not just accepting connections.
+///
+/// Returns `Ok(())` when the gateway is confirmed healthy, or `Err` if the health
+/// check fails or times out. Falls back to TCP probe if mTLS materials aren't
+/// available yet.
+pub fn wait_for_gateway_ready(gateway_port: u16, gateway_name: &str) -> Result<(), VmError> {
+    let start = std::time::Instant::now();
+    let timeout = Duration::from_secs(90);
+    let poll_interval = Duration::from_secs(1);
+
+    eprintln!("Waiting for gateway gRPC health check...");
+
+    // Create a runtime for async health checks
+    let rt = match tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+    {
+        Ok(rt) => rt,
+        Err(e) => {
+            eprintln!("  failed to create tokio runtime: {e}, falling back to TCP probe");
+            return wait_for_tcp_only(gateway_port, timeout, poll_interval);
+        }
+    };
+
+    loop {
+        // Try gRPC health check
+        let result = rt.block_on(async {
+            tokio::time::timeout(
+                Duration::from_secs(5),
+                grpc_health_check(gateway_port, gateway_name),
+            )
+            .await
+        });
+
+        match result {
+            Ok(Ok(())) => {
+                eprintln!("Gateway healthy [{:.1}s]", start.elapsed().as_secs_f64());
+                return Ok(());
+            }
+            Ok(Err(e)) => {
+                // gRPC call completed but failed
+                if start.elapsed() >= timeout {
+                    return Err(VmError::Bootstrap(format!(
+                        "gateway health check failed after {:.0}s: {e}",
+                        timeout.as_secs_f64()
+                    )));
+                }
+            }
+            Err(_) => {
+                // Timeout on the health check itself
+                if start.elapsed() >= timeout {
+                    return Err(VmError::Bootstrap(format!(
+                        "gateway health check timed out after {:.0}s",
+                        timeout.as_secs_f64()
+                    )));
+                }
+            }
+        }
+
+        std::thread::sleep(poll_interval);
+    }
+}
+
+/// Fallback TCP-only probe when gRPC health check can't be performed.
+fn wait_for_tcp_only(
+    gateway_port: u16,
+    timeout: Duration,
+    poll_interval: Duration,
+) -> Result<(), VmError> {
+    let start = std::time::Instant::now();
+
+    loop {
+        if host_tcp_probe(gateway_port) {
+            eprintln!(
+                "Service reachable (TCP) [{:.1}s]",
+                start.elapsed().as_secs_f64()
+            );
+            return Ok(());
+        }
+
+        if start.elapsed() >= timeout {
+            return Err(VmError::Bootstrap(format!(
+                "gateway TCP probe failed after {:.0}s",
+                timeout.as_secs_f64()
+            )));
+        }
+
+        std::thread::sleep(poll_interval);
+    }
+}
+
+/// Probe `127.0.0.1:port` from the host to verify the TCP path is working.
+///
+/// This is a fallback when gRPC health check isn't available.
+fn host_tcp_probe(gateway_port: u16) -> bool {
+    use std::io::Read;
+    use std::net::{SocketAddr, TcpStream};
+
+    let addr: SocketAddr = ([127, 0, 0, 1], gateway_port).into();
+    let Ok(mut stream) = TcpStream::connect_timeout(&addr, Duration::from_secs(2)) else {
+        return false;
+    };
+
+    // A short read timeout: if the server is alive it will wait for us
+    // to send a TLS ClientHello, so the read will time out (= good).
+    // If the connection resets or closes, the server is dead.
+    stream
+        .set_read_timeout(Some(Duration::from_millis(200)))
+        .ok();
+    let mut buf = [0u8; 1];
+    match stream.read(&mut buf) {
+        Err(e)
+            if e.kind() == std::io::ErrorKind::WouldBlock
+                || e.kind() == std::io::ErrorKind::TimedOut =>
+        {
+            true // Timeout = server alive, waiting for ClientHello.
+        }
+        _ => false, // Reset, EOF, or unexpected data = not healthy.
+    }
+}
diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs
new file mode 100644
index 000000000..4593dd605
--- /dev/null
+++ b/crates/openshell-vm/src/lib.rs
@@ -0,0 +1,1994 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! `MicroVM` runtime using libkrun for hardware-isolated execution.
+//!
+//! This crate provides a thin wrapper around the libkrun C API to boot
+//! lightweight VMs backed by virtio-fs root filesystems. On macOS ARM64,
+//! it uses Apple's Hypervisor.framework; on Linux it uses KVM.
+//!
+//! # Codesigning (macOS)
+//!
+//! The calling binary must be codesigned with the
+//! `com.apple.security.hypervisor` entitlement. See `entitlements.plist`.
+
+#![allow(unsafe_code)]
+
+mod embedded;
+mod exec;
+mod ffi;
+mod health;
+
+use std::ffi::CString;
+use std::path::{Path, PathBuf};
+use std::ptr;
+use std::time::Instant;
+
+pub use exec::{
+    VM_EXEC_VSOCK_PORT, VmExecOptions, VmRuntimeState, acquire_rootfs_lock, clear_vm_runtime_state,
+    ensure_vm_not_running, exec_capture, exec_running_vm, recover_corrupt_kine_db,
+    reset_runtime_state, vm_exec_socket_path, vm_state_path, write_vm_runtime_state,
+};
+
+// ── Error type ─────────────────────────────────────────────────────────
+
+/// Errors that can occur when configuring or launching a microVM.
+#[derive(Debug, thiserror::Error, miette::Diagnostic)]
+pub enum VmError {
+    /// A libkrun FFI call returned a negative error code.
+    #[error("{func} failed with error code {code}")]
+    Krun { func: &'static str, code: i32 },
+
+    /// The rootfs directory does not exist.
+    #[error(
+        "rootfs directory not found: {path}\nRun `openshell-vm prepare-rootfs` or build one with ./crates/openshell-vm/scripts/build-rootfs.sh <output_dir>"
+    )]
+    RootfsNotFound { path: String },
+
+    /// A path contained invalid UTF-8.
+    #[error("path is not valid UTF-8: {0}")]
+    InvalidPath(String),
+
+    /// `CString::new` failed (embedded NUL byte).
+    #[error("invalid C string: {0}")]
+    CString(#[from] std::ffi::NulError),
+
+    /// A required host binary was not found.
+    #[error("required binary not found: {path}\n{hint}")]
+    BinaryNotFound { path: String, hint: String },
+
+    /// Host-side VM setup failed before boot.
+    #[error("host setup failed: {0}")]
+    HostSetup(String),
+
+    /// `fork()` failed.
+    #[error("fork() failed: {0}")]
+    Fork(String),
+
+    /// Post-boot bootstrap failed.
+    #[error("bootstrap failed: {0}")]
+    Bootstrap(String),
+
+    /// Local VM runtime state could not be read or written.
+    #[error("VM runtime state error: {0}")]
+    RuntimeState(String),
+
+    /// Exec operation against a running VM failed.
+    #[error("VM exec failed: {0}")]
+    Exec(String),
+}
+
+/// Check a libkrun return code; negative values are errors.
+fn check(ret: i32, func: &'static str) -> Result<(), VmError> {
+    if ret < 0 {
+        Err(VmError::Krun { func, code: ret })
+    } else {
+        Ok(())
+    }
+}
+
+// ── Configuration ──────────────────────────────────────────────────────
+
+/// Networking backend for the microVM.
+#[derive(Debug, Clone)]
+pub enum NetBackend {
+    /// TSI (Transparent Socket Impersonation) — default libkrun networking.
+    /// Simple but intercepts guest loopback connections, breaking k3s.
+    Tsi,
+
+    /// No networking — disable vsock/TSI entirely. For debugging only.
+    None,
+
+    /// gvproxy (vfkit mode) — real `eth0` interface via virtio-net.
+    /// Requires gvproxy binary on the host. Port forwarding is done
+    /// through gvproxy's HTTP API.
+    Gvproxy {
+        /// Path to the gvproxy binary.
+        binary: PathBuf,
+    },
+}
+
+/// Host Unix socket bridged into the guest as a vsock port.
+#[derive(Debug, Clone)]
+pub struct VsockPort {
+    pub port: u32,
+    pub socket_path: PathBuf,
+    pub listen: bool,
+}
+
+/// Host-backed raw block image attached to the VM for mutable guest state.
+#[derive(Debug, Clone)]
+pub struct StateDiskConfig {
+    /// Path to the sparse raw image on the host.
+    pub path: PathBuf,
+
+    /// Size of the raw image in bytes.
+    pub size_bytes: u64,
+
+    /// Guest-visible libkrun block ID.
+    pub block_id: String,
+
+    /// Guest device path used by the init script.
+    pub guest_device: String,
+}
+
+impl StateDiskConfig {
+    fn for_rootfs(rootfs: &Path) -> Self {
+        Self {
+            path: default_state_disk_path(rootfs),
+            size_bytes: DEFAULT_STATE_DISK_SIZE_BYTES,
+            block_id: DEFAULT_STATE_DISK_BLOCK_ID.to_string(),
+            guest_device: DEFAULT_STATE_DISK_GUEST_DEVICE.to_string(),
+        }
+    }
+}
+
+/// Configuration for a libkrun microVM.
+pub struct VmConfig {
+    /// Path to the extracted rootfs directory (aarch64 Linux).
+    pub rootfs: PathBuf,
+
+    /// Number of virtual CPUs.
+    pub vcpus: u8,
+
+    /// RAM in MiB.
+    pub mem_mib: u32,
+
+    /// Executable path inside the VM.
+    pub exec_path: String,
+
+    /// Arguments to the executable (argv, excluding argv\[0\]).
+    pub args: Vec<String>,
+
+    /// Environment variables in `KEY=VALUE` form.
+    /// If empty, a minimal default set is used.
+    pub env: Vec<String>,
+
+    /// Working directory inside the VM.
+    pub workdir: String,
+
+    /// TCP port mappings in `"host_port:guest_port"` form.
+    /// Only used with TSI networking.
+    pub port_map: Vec<String>,
+
+    /// Optional host Unix sockets exposed to the guest over vsock.
+    pub vsock_ports: Vec<VsockPort>,
+
+    /// libkrun log level (0=Off .. 5=Trace).
+    pub log_level: u32,
+
+    /// Optional file path for VM console output. If `None`, console output
+    /// goes to the parent directory of the rootfs as `console.log`.
+    pub console_output: Option<PathBuf>,
+
+    /// Networking backend.
+    pub net: NetBackend,
+
+    /// Wipe all runtime state (containerd tasks/sandboxes, kubelet pods)
+    /// before booting. Recovers from corrupted state after a crash.
+    pub reset: bool,
+
+    /// Gateway metadata name used for host-side config and mTLS material.
+    pub gateway_name: String,
+
+    /// Optional host-backed raw block image for mutable guest state.
+    pub state_disk: Option<StateDiskConfig>,
+}
+
+impl VmConfig {
+    /// Default gateway configuration: boots k3s server inside the VM.
+    ///
+    /// Runs `/srv/openshell-vm-init.sh` which mounts essential filesystems,
+    /// deploys the `OpenShell` helm chart, and execs `k3s server`.
+    /// Exposes the `OpenShell` gateway on port 30051.
+    pub fn gateway(rootfs: PathBuf) -> Self {
+        let state_disk = StateDiskConfig::for_rootfs(&rootfs);
+        Self {
+            vsock_ports: vec![VsockPort {
+                port: VM_EXEC_VSOCK_PORT,
+                socket_path: vm_exec_socket_path(&rootfs),
+                listen: true,
+            }],
+            rootfs,
+            vcpus: 4,
+            mem_mib: 8192,
+            exec_path: "/srv/openshell-vm-init.sh".to_string(),
+            args: vec![],
+            env: vec![
+                "HOME=/root".to_string(),
+                "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(),
+                "TERM=xterm".to_string(),
+            ],
+            workdir: "/".to_string(),
+            port_map: vec![
+                // OpenShell server — with bridge CNI the pod listens on
+                // 8080 inside its own network namespace (10.42.0.x), not
+                // on the VM's root namespace. The NodePort service
+                // (kube-proxy nftables) forwards VM:30051 → pod:8080.
+                // gvproxy maps host:30051 → VM:30051 to complete the path.
+                "30051:30051".to_string(),
+            ],
+            log_level: 3, // Info — for debugging
+            console_output: None,
+            net: NetBackend::Gvproxy {
+                binary: default_runtime_gvproxy_path(),
+            },
+            reset: false,
+            gateway_name: format!("{GATEWAY_NAME_PREFIX}-default"),
+            state_disk: Some(state_disk),
+        }
+    }
+}
+
+/// Base prefix for gateway metadata names.
+const GATEWAY_NAME_PREFIX: &str = "openshell-vm";
+const DEFAULT_STATE_DISK_SIZE_BYTES: u64 = 32 * 1024 * 1024 * 1024;
+const DEFAULT_STATE_DISK_BLOCK_ID: &str = "openshell-state";
+const DEFAULT_STATE_DISK_GUEST_DEVICE: &str = "/dev/vda";
+
+/// Resolve the gateway metadata name for an instance name.
+pub fn gateway_name(instance_name: &str) -> Result<String, VmError> {
+    Ok(format!(
+        "{GATEWAY_NAME_PREFIX}-{}",
+        sanitize_instance_name(instance_name)?
+    ))
+}
+
+/// Resolve the rootfs path for a named instance (including the default gateway).
+///
+/// Layout: `$XDG_DATA_HOME/openshell/openshell-vm/{version}/instances/{name}/rootfs`
+pub fn named_rootfs_dir(instance_name: &str) -> Result<PathBuf, VmError> {
+    let name = sanitize_instance_name(instance_name)?;
+    let base = openshell_bootstrap::paths::openshell_vm_base_dir()
+        .map_err(|e| VmError::RuntimeState(format!("resolve openshell-vm base dir: {e}")))?;
+    Ok(base
+        .join(env!("CARGO_PKG_VERSION"))
+        .join("instances")
+        .join(name)
+        .join("rootfs"))
+}
+
+/// Ensure a named instance rootfs exists, extracting from the embedded
+/// rootfs tarball on first use.
+///
+/// The default (unnamed) gateway should be routed here as `"default"`.
+pub fn ensure_named_rootfs(instance_name: &str) -> Result<PathBuf, VmError> {
+    let instance_rootfs = named_rootfs_dir(instance_name)?;
+    if instance_rootfs.is_dir() {
+        return Ok(instance_rootfs);
+    }
+
+    if embedded::has_embedded_rootfs() {
+        // Clean up rootfs directories left by older binary versions.
+        embedded::cleanup_old_rootfs()?;
+
+        embedded::extract_rootfs_to(&instance_rootfs)?;
+        return Ok(instance_rootfs);
+    }
+
+    Err(VmError::RootfsNotFound {
+        path: instance_rootfs.display().to_string(),
+    })
+}
+
+/// Ensure the requested rootfs exists, extracting the embedded rootfs when needed.
+///
+/// When `rootfs` is `None`, this uses the named-instance layout under
+/// `$XDG_DATA_HOME/openshell/openshell-vm/{version}/instances/<name>/rootfs`.
+/// When `force_recreate` is true and the target exists, it is removed first.
+pub fn prepare_rootfs(
+    rootfs: Option<PathBuf>,
+    instance_name: &str,
+    force_recreate: bool,
+) -> Result<PathBuf, VmError> {
+    let target = match rootfs {
+        Some(path) => path,
+        None => named_rootfs_dir(instance_name)?,
+    };
+
+    if force_recreate && target.exists() {
+        std::fs::remove_dir_all(&target).map_err(|e| {
+            VmError::HostSetup(format!("remove existing rootfs {}: {e}", target.display()))
+        })?;
+    }
+
+    if target.is_dir() {
+        return Ok(target);
+    }
+
+    if embedded::has_embedded_rootfs() {
+        if target == named_rootfs_dir(instance_name)? {
+            embedded::cleanup_old_rootfs()?;
+        }
+        embedded::extract_rootfs_to(&target)?;
+        return Ok(target);
+    }
+
+    Err(VmError::RootfsNotFound {
+        path: target.display().to_string(),
+    })
+}
+
+fn sanitize_instance_name(name: &str) -> Result<String, VmError> {
+    let trimmed = name.trim();
+    if trimmed.is_empty() {
+        return Err(VmError::RuntimeState(
+            "instance name cannot be empty".to_string(),
+        ));
+    }
+
+    let mut out = String::with_capacity(trimmed.len());
+    for ch in trimmed.chars() {
+        if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
+            out.push(ch);
+        } else {
+            return Err(VmError::RuntimeState(format!(
+                "invalid instance name '{trimmed}': only [A-Za-z0-9_-] are allowed"
+            )));
+        }
+    }
+
+    Ok(out)
+}
+
+// ── Helpers ─────────────────────────────────────────────────────────────
+
+/// Build a null-terminated C string array from a slice of strings.
+///
+/// Returns both the `CString` owners (to keep them alive) and the pointer array.
+fn c_string_array(strings: &[&str]) -> Result<(Vec<CString>, Vec<*const libc::c_char>), VmError> {
+    let owned: Vec<CString> = strings
+        .iter()
+        .map(|s| CString::new(*s))
+        .collect::<Result<Vec<_>, _>>()?;
+    let mut ptrs: Vec<*const libc::c_char> = owned.iter().map(|c| c.as_ptr()).collect();
+    ptrs.push(ptr::null()); // null terminator
+    Ok((owned, ptrs))
+}
+
+const VM_RUNTIME_DIR_ENV: &str = "OPENSHELL_VM_RUNTIME_DIR";
+
+pub(crate) fn configured_runtime_dir() -> Result<PathBuf, VmError> {
+    // Allow override for development
+    if let Some(path) = std::env::var_os(VM_RUNTIME_DIR_ENV) {
+        let path = PathBuf::from(path);
+        tracing::debug!(
+            path = %path.display(),
+            "Using runtime from OPENSHELL_VM_RUNTIME_DIR"
+        );
+        return Ok(path);
+    }
+
+    // Use embedded runtime (extracts on first use)
+    embedded::ensure_runtime_extracted()
+}
+
+fn validate_runtime_dir(dir: &Path) -> Result<(), VmError> {
+    if !dir.is_dir() {
+        return Err(VmError::BinaryNotFound {
+            path: dir.display().to_string(),
+            hint: format!(
+                "VM runtime not found. Run `mise run vm:build:embedded` or set {VM_RUNTIME_DIR_ENV}"
+            ),
+        });
+    }
+
+    let libkrun = dir.join(ffi::required_runtime_lib_name());
+    if !libkrun.is_file() {
+        return Err(VmError::BinaryNotFound {
+            path: libkrun.display().to_string(),
+            hint: "runtime is incomplete: missing libkrun".to_string(),
+        });
+    }
+
+    let has_krunfw = std::fs::read_dir(dir)
+        .map_err(|e| VmError::HostSetup(format!("read {}: {e}", dir.display())))?
+        .filter_map(Result::ok)
+        .any(|entry| {
+            entry
+                .file_name()
+                .to_string_lossy()
+                .starts_with("libkrunfw.")
+        });
+    if !has_krunfw {
+        return Err(VmError::BinaryNotFound {
+            path: dir.display().to_string(),
+            hint: "runtime is incomplete: missing libkrunfw".to_string(),
+        });
+    }
+
+    let gvproxy = dir.join("gvproxy");
+    if !gvproxy.is_file() {
+        return Err(VmError::BinaryNotFound {
+            path: gvproxy.display().to_string(),
+            hint: "runtime is incomplete: missing gvproxy".to_string(),
+        });
+    }
+
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt as _;
+
+        let mode = std::fs::metadata(&gvproxy)
+            .map_err(|e| VmError::HostSetup(format!("stat {}: {e}", gvproxy.display())))?
+            .permissions()
+            .mode();
+        if mode & 0o111 == 0 {
+            return Err(VmError::HostSetup(format!(
+                "gvproxy is not executable: {}",
+                gvproxy.display()
+            )));
+        }
+    }
+
+    Ok(())
+}
+
+fn resolve_runtime_bundle() -> Result<PathBuf, VmError> {
+    let runtime_dir = configured_runtime_dir()?;
+    // Validate the directory has required files
+    validate_runtime_dir(&runtime_dir)?;
+    Ok(runtime_dir.join("gvproxy"))
+}
+
+pub fn default_runtime_gvproxy_path() -> PathBuf {
+    configured_runtime_dir()
+        .or_else(|_| embedded::runtime_cache_path())
+        .unwrap_or_else(|_| PathBuf::from("gvproxy"))
+        .join("gvproxy")
+}
+
+/// Check if the given path looks like an openshell-vm instance rootfs.
+fn is_instance_rootfs_path(path: &Path) -> bool {
+    // Matches: .../openshell/openshell-vm/.../instances/.../rootfs
+    let s = path.to_string_lossy();
+    s.contains("openshell/openshell-vm") && s.contains("instances") && path.ends_with("rootfs")
+}
+
+#[cfg(target_os = "macos")]
+fn configure_runtime_loader_env(runtime_dir: &Path) -> Result<(), VmError> {
+    let existing = std::env::var_os("DYLD_FALLBACK_LIBRARY_PATH");
+    let mut paths = vec![runtime_dir.to_path_buf()];
+    if let Some(existing) = existing {
+        paths.extend(std::env::split_paths(&existing));
+    }
+    let joined = std::env::join_paths(paths)
+        .map_err(|e| VmError::HostSetup(format!("join DYLD_FALLBACK_LIBRARY_PATH: {e}")))?;
+    unsafe {
+        std::env::set_var("DYLD_FALLBACK_LIBRARY_PATH", joined);
+    }
+    Ok(())
+}
+
+#[cfg(target_os = "linux")]
+fn configure_runtime_loader_env(runtime_dir: &Path) -> Result<(), VmError> {
+    // On Linux, libkrun.so has a DT_NEEDED for libkrunfw.so. Even though we
+    // preload libkrunfw with RTLD_GLOBAL, the ELF dynamic linker still resolves
+    // DT_NEEDED entries through LD_LIBRARY_PATH / system paths. Without this,
+    // dlopen("libkrun.so") fails if libkrunfw.so is only in the runtime bundle.
+    let existing = std::env::var_os("LD_LIBRARY_PATH");
+    let mut paths = vec![runtime_dir.to_path_buf()];
+    if let Some(existing) = existing {
+        paths.extend(std::env::split_paths(&existing));
+    }
+    let joined = std::env::join_paths(paths)
+        .map_err(|e| VmError::HostSetup(format!("join LD_LIBRARY_PATH: {e}")))?;
+    unsafe {
+        std::env::set_var("LD_LIBRARY_PATH", joined);
+    }
+    Ok(())
+}
+
+#[cfg(not(any(target_os = "macos", target_os = "linux")))]
+fn configure_runtime_loader_env(_runtime_dir: &Path) -> Result<(), VmError> {
+    Ok(())
+}
+
+fn raise_nofile_limit() {
+    #[cfg(unix)]
+    unsafe {
+        let mut rlim = libc::rlimit {
+            rlim_cur: 0,
+            rlim_max: 0,
+        };
+        if libc::getrlimit(libc::RLIMIT_NOFILE, &raw mut rlim) == 0 {
+            rlim.rlim_cur = rlim.rlim_max;
+            let _ = libc::setrlimit(libc::RLIMIT_NOFILE, &raw const rlim);
+        }
+    }
+}
+
+/// Log runtime provenance information for diagnostics.
+///
+/// Prints the libkrun/libkrunfw versions, artifact hashes, and whether
+/// a custom runtime is in use. This makes it easy to correlate VM issues
+/// with the specific runtime bundle.
+fn log_runtime_provenance(runtime_dir: &Path) {
+    if let Some(prov) = ffi::runtime_provenance() {
+        eprintln!("runtime: {}", runtime_dir.display());
+        eprintln!("  libkrun: {}", prov.libkrun_path.display());
+        for krunfw in &prov.libkrunfw_paths {
+            let name = krunfw.file_name().map_or_else(
+                || "unknown".to_string(),
+                |n| n.to_string_lossy().to_string(),
+            );
+            eprintln!("  libkrunfw: {name}");
+        }
+        if let Some(ref sha) = prov.libkrunfw_sha256 {
+            let short = if sha.len() > 12 { &sha[..12] } else { sha };
+            eprintln!("  sha256: {short}...");
+        }
+        if prov.is_custom {
+            eprintln!("  type: custom (OpenShell-built)");
+            // Parse provenance.json for additional details.
+            if let Some(ref json) = prov.provenance_json {
+                // Extract key fields from provenance metadata.
+                for key in &["libkrunfw_commit", "kernel_version", "build_timestamp"] {
+                    if let Some(val) = extract_json_string(json, key) {
+                        eprintln!("  {}: {}", key.replace('_', "-"), val);
+                    }
+                }
+            }
+        } else {
+            eprintln!("  type: stock (system/homebrew)");
+        }
+    }
+}
+
+/// Extract a string value from a JSON object by key.
+fn extract_json_string(json: &str, key: &str) -> Option<String> {
+    let map: serde_json::Map<String, serde_json::Value> = serde_json::from_str(json).ok()?;
+    map.get(key)?.as_str().map(ToOwned::to_owned)
+}
+
+fn clamp_log_level(level: u32) -> u32 {
+    match level {
+        0 => ffi::KRUN_LOG_LEVEL_OFF,
+        1 => ffi::KRUN_LOG_LEVEL_ERROR,
+        2 => ffi::KRUN_LOG_LEVEL_WARN,
+        3 => ffi::KRUN_LOG_LEVEL_INFO,
+        4 => ffi::KRUN_LOG_LEVEL_DEBUG,
+        _ => ffi::KRUN_LOG_LEVEL_TRACE,
+    }
+}
+
+struct VmContext {
+    krun: &'static ffi::LibKrun,
+    ctx_id: u32,
+}
+
+impl VmContext {
+    fn create(log_level: u32) -> Result<Self, VmError> {
+        let krun = ffi::libkrun()?;
+        unsafe {
+            check(
+                (krun.krun_init_log)(
+                    ffi::KRUN_LOG_TARGET_DEFAULT,
+                    clamp_log_level(log_level),
+                    ffi::KRUN_LOG_STYLE_AUTO,
+                    ffi::KRUN_LOG_OPTION_NO_ENV,
+                ),
+                "krun_init_log",
+            )?;
+        }
+
+        let ctx_id = unsafe { (krun.krun_create_ctx)() };
+        if ctx_id < 0 {
+            return Err(VmError::Krun {
+                func: "krun_create_ctx",
+                code: ctx_id,
+            });
+        }
+
+        Ok(Self {
+            krun,
+            ctx_id: ctx_id as u32,
+        })
+    }
+
+    fn set_vm_config(&self, vcpus: u8, mem_mib: u32) -> Result<(), VmError> {
+        unsafe {
+            check(
+                (self.krun.krun_set_vm_config)(self.ctx_id, vcpus, mem_mib),
+                "krun_set_vm_config",
+            )
+        }
+    }
+
+    fn set_root(&self, rootfs: &Path) -> Result<(), VmError> {
+        let rootfs_c = path_to_cstring(rootfs)?;
+        unsafe {
+            check(
+                (self.krun.krun_set_root)(self.ctx_id, rootfs_c.as_ptr()),
+                "krun_set_root",
+            )
+        }
+    }
+
+    fn add_state_disk(&self, state_disk: &StateDiskConfig) -> Result<(), VmError> {
+        let Some(add_disk3) = self.krun.krun_add_disk3 else {
+            return Err(VmError::HostSetup(
+                "libkrun runtime does not expose krun_add_disk3; rebuild the VM runtime with block support"
+                    .to_string(),
+            ));
+        };
+
+        let block_id_c = CString::new(state_disk.block_id.as_str())?;
+        let disk_path_c = path_to_cstring(&state_disk.path)?;
+        unsafe {
+            check(
+                add_disk3(
+                    self.ctx_id,
+                    block_id_c.as_ptr(),
+                    disk_path_c.as_ptr(),
+                    ffi::KRUN_DISK_FORMAT_RAW,
+                    false,
+                    false,
+                    state_disk_sync_mode(),
+                ),
+                "krun_add_disk3",
+            )
+        }
+    }
+
+    fn set_workdir(&self, workdir: &str) -> Result<(), VmError> {
+        let workdir_c = CString::new(workdir)?;
+        unsafe {
+            check(
+                (self.krun.krun_set_workdir)(self.ctx_id, workdir_c.as_ptr()),
+                "krun_set_workdir",
+            )
+        }
+    }
+
+    fn disable_implicit_vsock(&self) -> Result<(), VmError> {
+        unsafe {
+            check(
+                (self.krun.krun_disable_implicit_vsock)(self.ctx_id),
+                "krun_disable_implicit_vsock",
+            )
+        }
+    }
+
+    fn add_vsock(&self, tsi_features: u32) -> Result<(), VmError> {
+        unsafe {
+            check(
+                (self.krun.krun_add_vsock)(self.ctx_id, tsi_features),
+                "krun_add_vsock",
+            )
+        }
+    }
+
+    #[cfg(target_os = "macos")]
+    fn add_net_unixgram(
+        &self,
+        socket_path: &Path,
+        mac: &[u8; 6],
+        features: u32,
+        flags: u32,
+    ) -> Result<(), VmError> {
+        let sock_c = path_to_cstring(socket_path)?;
+        unsafe {
+            check(
+                (self.krun.krun_add_net_unixgram)(
+                    self.ctx_id,
+                    sock_c.as_ptr(),
+                    -1,
+                    mac.as_ptr(),
+                    features,
+                    flags,
+                ),
+                "krun_add_net_unixgram",
+            )
+        }
+    }
+
+    #[allow(dead_code)] // FFI binding for future use (e.g. Linux networking)
+    fn add_net_unixstream(
+        &self,
+        socket_path: &Path,
+        mac: &[u8; 6],
+        features: u32,
+    ) -> Result<(), VmError> {
+        let sock_c = path_to_cstring(socket_path)?;
+        unsafe {
+            check(
+                (self.krun.krun_add_net_unixstream)(
+                    self.ctx_id,
+                    sock_c.as_ptr(),
+                    -1,
+                    mac.as_ptr(),
+                    features,
+                    0,
+                ),
+                "krun_add_net_unixstream",
+            )
+        }
+    }
+
+    fn set_port_map(&self, port_map: &[String]) -> Result<(), VmError> {
+        let port_strs: Vec<&str> = port_map.iter().map(String::as_str).collect();
+        let (_port_owners, port_ptrs) = c_string_array(&port_strs)?;
+        unsafe {
+            check(
+                (self.krun.krun_set_port_map)(self.ctx_id, port_ptrs.as_ptr()),
+                "krun_set_port_map",
+            )
+        }
+    }
+
+    fn add_vsock_port(&self, port: &VsockPort) -> Result<(), VmError> {
+        let socket_c = path_to_cstring(&port.socket_path)?;
+        unsafe {
+            check(
+                (self.krun.krun_add_vsock_port2)(
+                    self.ctx_id,
+                    port.port,
+                    socket_c.as_ptr(),
+                    port.listen,
+                ),
+                "krun_add_vsock_port2",
+            )
+        }
+    }
+
+    fn set_console_output(&self, path: &Path) -> Result<(), VmError> {
+        let console_c = path_to_cstring(path)?;
+        unsafe {
+            check(
+                (self.krun.krun_set_console_output)(self.ctx_id, console_c.as_ptr()),
+                "krun_set_console_output",
+            )
+        }
+    }
+
+    fn set_exec(&self, exec_path: &str, args: &[String], env: &[String]) -> Result<(), VmError> {
+        let exec_c = CString::new(exec_path)?;
+        let argv_strs: Vec<&str> = args.iter().map(String::as_str).collect();
+        let (_argv_owners, argv_ptrs) = c_string_array(&argv_strs)?;
+        let env_strs: Vec<&str> = env.iter().map(String::as_str).collect();
+        let (_env_owners, env_ptrs) = c_string_array(&env_strs)?;
+
+        unsafe {
+            check(
+                (self.krun.krun_set_exec)(
+                    self.ctx_id,
+                    exec_c.as_ptr(),
+                    argv_ptrs.as_ptr(),
+                    env_ptrs.as_ptr(),
+                ),
+                "krun_set_exec",
+            )
+        }
+    }
+
+    fn start_enter(&self) -> i32 {
+        unsafe { (self.krun.krun_start_enter)(self.ctx_id) }
+    }
+}
+
+impl Drop for VmContext {
+    fn drop(&mut self) {
+        unsafe {
+            let ret = (self.krun.krun_free_ctx)(self.ctx_id);
+            if ret < 0 {
+                eprintln!(
+                    "warning: krun_free_ctx({}) failed with code {ret}",
+                    self.ctx_id
+                );
+            }
+        }
+    }
+}
+
+/// RAII guard that kills and waits on a gvproxy child process when dropped.
+///
+/// This prevents orphaned gvproxy processes when early `?` returns in the
+/// launch function cause the child to be dropped before cleanup code runs.
+/// Call [`GvproxyGuard::disarm`] to take ownership of the child when it
+/// should outlive the guard (i.e., after a successful fork).
+struct GvproxyGuard {
+    child: Option<std::process::Child>,
+}
+
+impl GvproxyGuard {
+    fn new(child: std::process::Child) -> Self {
+        Self { child: Some(child) }
+    }
+
+    /// Take the child out of the guard, preventing it from being killed on drop.
+    /// Use this after the launch is successful and the parent will manage cleanup.
+    fn disarm(&mut self) -> Option<std::process::Child> {
+        self.child.take()
+    }
+
+    /// Get the child's PID without disarming.
+    fn id(&self) -> Option<u32> {
+        self.child.as_ref().map(std::process::Child::id)
+    }
+}
+
+impl Drop for GvproxyGuard {
+    fn drop(&mut self) {
+        if let Some(mut child) = self.child.take() {
+            let pid = child.id();
+            let _ = child.kill();
+            let _ = child.wait();
+            eprintln!("gvproxy cleaned up (pid {pid})");
+        }
+    }
+}
+
+/// Issue a gvproxy expose call via its HTTP API (unix socket).
+///
+/// Sends a raw HTTP/1.1 POST request over the unix socket to avoid
+/// depending on `curl` being installed on the host.
+fn gvproxy_expose(api_sock: &Path, body: &str) -> Result<(), String> {
+    use std::io::{Read, Write};
+    use std::os::unix::net::UnixStream;
+
+    let mut stream =
+        UnixStream::connect(api_sock).map_err(|e| format!("connect to gvproxy API socket: {e}"))?;
+
+    let request = format!(
+        "POST /services/forwarder/expose HTTP/1.1\r\n\
+         Host: localhost\r\n\
+         Content-Type: application/json\r\n\
+         Content-Length: {}\r\n\
+         Connection: close\r\n\
+         \r\n\
+         {}",
+        body.len(),
+        body,
+    );
+
+    stream
+        .write_all(request.as_bytes())
+        .map_err(|e| format!("write to gvproxy API: {e}"))?;
+
+    // Read just enough of the response to get the status line.
+    let mut buf = [0u8; 1024];
+    let n = stream
+        .read(&mut buf)
+        .map_err(|e| format!("read from gvproxy API: {e}"))?;
+    let response = String::from_utf8_lossy(&buf[..n]);
+
+    // Parse the HTTP status code from the first line (e.g. "HTTP/1.1 200 OK").
+    let status = response
+        .lines()
+        .next()
+        .and_then(|line| line.split_whitespace().nth(1))
+        .unwrap_or("0");
+
+    match status {
+        "200" | "204" => Ok(()),
+        _ => {
+            let first_line = response.lines().next().unwrap_or("<empty>");
+            Err(format!("gvproxy API: {first_line}"))
+        }
+    }
+}
+
+/// Kill a stale gvproxy process from a previous openshell-vm run.
+///
+/// If the CLI crashes or is killed before cleanup, gvproxy keeps running
+/// and holds port 2222. A new gvproxy instance then fails with
+/// "bind: address already in use".
+///
+/// We only kill the specific gvproxy PID recorded in the VM runtime state
+/// to avoid disrupting unrelated gvproxy instances (e.g. Podman Desktop).
+/// Before sending SIGTERM, we verify the process name contains "gvproxy"
+/// to guard against PID reuse.
+fn kill_stale_gvproxy(rootfs: &Path) {
+    let state_path = vm_state_path(rootfs);
+    let pid = std::fs::read(&state_path)
+        .ok()
+        .and_then(|bytes| serde_json::from_slice::<VmRuntimeState>(&bytes).ok())
+        .and_then(|state| state.gvproxy_pid);
+
+    if let Some(gvproxy_pid) = pid {
+        // Verify the process is still alive before killing it.
+        let pid_i32 = gvproxy_pid as libc::pid_t;
+        let is_alive = unsafe { libc::kill(pid_i32, 0) } == 0;
+        if is_alive {
+            // Verify the process is actually gvproxy before killing.
+            // Without this check, PID reuse could cause us to kill an
+            // unrelated process.
+            if !is_process_named(pid_i32, "gvproxy") {
+                eprintln!(
+                    "Stale gvproxy pid {gvproxy_pid} is no longer gvproxy (PID reused), skipping kill"
+                );
+                return;
+            }
+            unsafe {
+                libc::kill(pid_i32, libc::SIGTERM);
+            }
+            eprintln!("Killed stale gvproxy process (pid {gvproxy_pid})");
+            // Brief pause for the port to be released.
+            std::thread::sleep(std::time::Duration::from_millis(200));
+        }
+    }
+}
+
+/// Check whether a process with the given PID has the expected name.
+///
+/// On macOS, shells out to `ps` to query the process name. On Linux, reads
+/// `/proc/<pid>/comm`. Returns `false` if the process name cannot be
+/// determined (fail-safe: don't kill if we can't verify).
+#[cfg(target_os = "macos")]
+fn is_process_named(pid: libc::pid_t, expected: &str) -> bool {
+    // Use `ps -p <pid> -o comm=` to get just the process name.
+    // This avoids depending on libc kinfo_proc struct layout.
+    std::process::Command::new("ps")
+        .args(["-p", &pid.to_string(), "-o", "comm="])
+        .output()
+        .ok()
+        .and_then(|output| {
+            if output.status.success() {
+                String::from_utf8(output.stdout).ok()
+            } else {
+                None
+            }
+        })
+        .is_some_and(|name| name.trim().contains(expected))
+}
+
+#[cfg(target_os = "linux")]
+fn is_process_named(pid: libc::pid_t, expected: &str) -> bool {
+    let comm_path = format!("/proc/{pid}/comm");
+    std::fs::read_to_string(comm_path)
+        .map(|name| name.trim().contains(expected))
+        .unwrap_or(false)
+}
+
+#[cfg(not(any(target_os = "macos", target_os = "linux")))]
+fn is_process_named(_pid: libc::pid_t, _expected: &str) -> bool {
+    // Cannot verify on this platform — fail-safe: don't kill.
+    false
+}
+
+fn vm_rootfs_key(rootfs: &Path) -> String {
+    let name = rootfs
+        .file_name()
+        .and_then(|part| part.to_str())
+        .unwrap_or("openshell-vm");
+    let mut out = String::with_capacity(name.len());
+    for ch in name.chars() {
+        if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
+            out.push(ch);
+        } else {
+            out.push('_');
+        }
+    }
+    if out.is_empty() {
+        "openshell-vm".to_string()
+    } else {
+        out
+    }
+}
+
+fn default_state_disk_path(rootfs: &Path) -> PathBuf {
+    rootfs
+        .parent()
+        .unwrap_or(rootfs)
+        .join(format!("{}-state.raw", vm_rootfs_key(rootfs)))
+}
+
+fn ensure_state_disk_image(state_disk: &StateDiskConfig) -> Result<(), VmError> {
+    if let Some(parent) = state_disk.path.parent() {
+        std::fs::create_dir_all(parent).map_err(|e| {
+            VmError::HostSetup(format!("create state disk dir {}: {e}", parent.display()))
+        })?;
+    }
+
+    let file = std::fs::OpenOptions::new()
+        .read(true)
+        .write(true)
+        .create(true)
+        .truncate(false)
+        .open(&state_disk.path)
+        .map_err(|e| {
+            VmError::HostSetup(format!(
+                "open state disk {}: {e}",
+                state_disk.path.display()
+            ))
+        })?;
+
+    let current_len = file
+        .metadata()
+        .map_err(|e| {
+            VmError::HostSetup(format!(
+                "stat state disk {}: {e}",
+                state_disk.path.display()
+            ))
+        })?
+        .len();
+    if current_len < state_disk.size_bytes {
+        file.set_len(state_disk.size_bytes).map_err(|e| {
+            VmError::HostSetup(format!(
+                "resize state disk {} to {} bytes: {e}",
+                state_disk.path.display(),
+                state_disk.size_bytes
+            ))
+        })?;
+    }
+
+    Ok(())
+}
+
+fn state_disk_sync_mode() -> u32 {
+    #[cfg(target_os = "macos")]
+    {
+        ffi::KRUN_SYNC_RELAXED
+    }
+    #[cfg(not(target_os = "macos"))]
+    {
+        ffi::KRUN_SYNC_FULL
+    }
+}
+
+fn hash_path_id(path: &Path) -> String {
+    let mut hash: u64 = 0xcbf29ce484222325;
+    for byte in path.to_string_lossy().as_bytes() {
+        hash ^= u64::from(*byte);
+        hash = hash.wrapping_mul(0x100000001b3);
+    }
+    format!("{:012x}", hash & 0x0000_ffff_ffff_ffff)
+}
+
+/// Return a secure base directory for temporary socket files.
+///
+/// Prefers `XDG_RUNTIME_DIR` (per-user, restricted permissions on Linux),
+/// falls back to `/tmp`. After `create_dir_all`, validates the directory
+/// is not a symlink and is owned by the current user.
+fn secure_socket_base(subdir: &str) -> Result<PathBuf, VmError> {
+    let base = if let Some(xdg) = std::env::var_os("XDG_RUNTIME_DIR") {
+        PathBuf::from(xdg)
+    } else {
+        let mut base = PathBuf::from("/tmp");
+        if !base.is_dir() {
+            base = std::env::temp_dir();
+        }
+        base
+    };
+    let dir = base.join(subdir);
+
+    // If the path exists, verify it is not a symlink before using it.
+    if dir.exists() {
+        let meta = dir
+            .symlink_metadata()
+            .map_err(|e| VmError::HostSetup(format!("lstat {}: {e}", dir.display())))?;
+        if meta.file_type().is_symlink() {
+            return Err(VmError::HostSetup(format!(
+                "socket directory {} is a symlink — refusing to use it",
+                dir.display()
+            )));
+        }
+        // Verify ownership matches current user.
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::MetadataExt as _;
+            let uid = unsafe { libc::getuid() };
+            if meta.uid() != uid {
+                return Err(VmError::HostSetup(format!(
+                    "socket directory {} is owned by uid {} but we are uid {} — refusing to use it",
+                    dir.display(),
+                    meta.uid(),
+                    uid
+                )));
+            }
+        }
+    } else {
+        std::fs::create_dir_all(&dir)
+            .map_err(|e| VmError::HostSetup(format!("create socket dir {}: {e}", dir.display())))?;
+        // Set restrictive permissions on the newly created directory.
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt as _;
+            let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
+        }
+    }
+
+    Ok(dir)
+}
+
+fn gvproxy_socket_dir(rootfs: &Path) -> Result<PathBuf, VmError> {
+    let dir = secure_socket_base("ovm-gv")?;
+
+    // macOS unix socket path limit is tight (~104 bytes). Keep paths very short.
+    let id = hash_path_id(rootfs);
+    Ok(dir.join(id))
+}
+
+fn gateway_host_port(config: &VmConfig) -> u16 {
+    config
+        .port_map
+        .first()
+        .and_then(|pm| pm.split(':').next())
+        .and_then(|port| port.parse::<u16>().ok())
+        .unwrap_or(DEFAULT_GATEWAY_PORT)
+}
+
+fn pick_gvproxy_ssh_port() -> Result<u16, VmError> {
+    let listener = std::net::TcpListener::bind(("127.0.0.1", 0))
+        .map_err(|e| VmError::HostSetup(format!("allocate gvproxy ssh port on localhost: {e}")))?;
+    let port = listener
+        .local_addr()
+        .map_err(|e| VmError::HostSetup(format!("read gvproxy ssh port: {e}")))?
+        .port();
+    drop(listener);
+    Ok(port)
+}
+
+fn path_to_cstring(path: &Path) -> Result<CString, VmError> {
+    let s = path
+        .to_str()
+        .ok_or_else(|| VmError::InvalidPath(path.display().to_string()))?;
+    Ok(CString::new(s)?)
+}
+
+// ── Launch ──────────────────────────────────────────────────────────────
+
+/// Configure and launch a libkrun microVM.
+///
+/// This forks the process. The child enters the VM (never returns); the
+/// parent blocks until the VM exits or a signal is received.
+///
+/// Returns the VM exit code (from `waitpid`).
+#[allow(clippy::similar_names)]
+pub fn launch(config: &VmConfig) -> Result<i32, VmError> {
+    // Auto-extract embedded rootfs if using an instance path and it doesn't exist
+    if !config.rootfs.is_dir()
+        && is_instance_rootfs_path(&config.rootfs)
+        && embedded::has_embedded_rootfs()
+    {
+        embedded::extract_rootfs_to(&config.rootfs)?;
+    }
+
+    // Validate rootfs
+    if !config.rootfs.is_dir() {
+        return Err(VmError::RootfsNotFound {
+            path: config.rootfs.display().to_string(),
+        });
+    }
+    if config.exec_path == "/srv/openshell-vm-init.sh" {
+        ensure_vm_not_running(&config.rootfs)?;
+    }
+
+    // Acquire an exclusive flock on the rootfs lock file. This is held
+    // by the parent process for the VM's entire lifetime. If this process
+    // is killed (even SIGKILL), the OS releases the lock automatically.
+    // This prevents a second launch or rootfs rebuild from corrupting a
+    // running VM's filesystem via virtio-fs.
+    let _rootfs_lock = if config.exec_path == "/srv/openshell-vm-init.sh" {
+        Some(acquire_rootfs_lock(&config.rootfs)?)
+    } else {
+        None
+    };
+
+    // Check for a corrupt kine (SQLite) database and remove it if the
+    // header is invalid. Stale bootstrap locks are handled inside the VM
+    // by the init script (sqlite3 DELETE before k3s starts). This runs on
+    // every normal boot (not --reset, which wipes k3s/server/ entirely).
+    // Must happen after the lock so we know no other VM process is using
+    // the rootfs.
+    if !config.reset && config.exec_path == "/srv/openshell-vm-init.sh" {
+        recover_corrupt_kine_db(&config.rootfs)?;
+    }
+
+    // Wipe stale containerd/kubelet runtime state if requested.
+    // This must happen after the lock (to confirm no other VM is using
+    // the rootfs) but before booting (so the new VM starts clean).
+    if config.reset {
+        reset_runtime_state(&config.rootfs, &config.gateway_name)?;
+    }
+    if config.reset
+        && let Some(state_disk) = &config.state_disk
+        && let Err(err) = std::fs::remove_file(&state_disk.path)
+        && err.kind() != std::io::ErrorKind::NotFound
+    {
+        return Err(VmError::HostSetup(format!(
+            "remove state disk {}: {err}",
+            state_disk.path.display()
+        )));
+    }
+    if let Some(state_disk) = &config.state_disk {
+        ensure_state_disk_image(state_disk)?;
+    }
+
+    let launch_start = Instant::now();
+    eprintln!("rootfs: {}", config.rootfs.display());
+    if let Some(state_disk) = &config.state_disk {
+        eprintln!(
+            "state disk: {} ({} GiB)",
+            state_disk.path.display(),
+            state_disk.size_bytes / 1024 / 1024 / 1024
+        );
+    }
+    eprintln!("vm: {} vCPU(s), {} MiB RAM", config.vcpus, config.mem_mib);
+
+    // The runtime is embedded in the binary and extracted on first use.
+    // Can be overridden via OPENSHELL_VM_RUNTIME_DIR for development.
+    let runtime_gvproxy = resolve_runtime_bundle()?;
+    let runtime_dir = runtime_gvproxy.parent().ok_or_else(|| {
+        VmError::HostSetup(format!(
+            "runtime bundle file has no parent directory: {}",
+            runtime_gvproxy.display()
+        ))
+    })?;
+    configure_runtime_loader_env(runtime_dir)?;
+    raise_nofile_limit();
+
+    // ── Log runtime provenance ─────────────────────────────────────
+    // After configuring the loader, trigger library loading so that
+    // provenance is captured before we proceed with VM configuration.
+    let _ = ffi::libkrun()?;
+    log_runtime_provenance(runtime_dir);
+
+    // ── Configure the microVM ──────────────────────────────────────
+
+    let vm = VmContext::create(config.log_level)?;
+    vm.set_vm_config(config.vcpus, config.mem_mib)?;
+    vm.set_root(&config.rootfs)?;
+    if let Some(state_disk) = &config.state_disk {
+        vm.add_state_disk(state_disk)?;
+    }
+    vm.set_workdir(&config.workdir)?;
+
+    // Networking setup — use a drop guard so gvproxy is killed if we
+    // return early via `?` before reaching the parent's cleanup code.
+    let mut gvproxy_guard: Option<GvproxyGuard> = None;
+    let mut gvproxy_api_sock: Option<PathBuf> = None;
+
+    match &config.net {
+        NetBackend::Tsi => {
+            // Default TSI — no special setup needed.
+        }
+        NetBackend::None => {
+            vm.disable_implicit_vsock()?;
+            vm.add_vsock(0)?;
+            eprintln!("Networking: disabled (no TSI, no virtio-net)");
+        }
+        NetBackend::Gvproxy { binary } => {
+            if !binary.exists() {
+                return Err(VmError::BinaryNotFound {
+                    path: binary.display().to_string(),
+                    hint: "Install Podman Desktop or place gvproxy in PATH".to_string(),
+                });
+            }
+
+            // Create temp socket paths
+            let run_dir = config
+                .rootfs
+                .parent()
+                .unwrap_or(&config.rootfs)
+                .to_path_buf();
+            let rootfs_key = vm_rootfs_key(&config.rootfs);
+            let sock_base = gvproxy_socket_dir(&config.rootfs)?;
+            let net_sock = sock_base.with_extension("v");
+            let api_sock = sock_base.with_extension("a");
+
+            // Kill any stale gvproxy process from a previous run.
+            // If gvproxy is still holding port 2222, the new instance
+            // will fail with "bind: address already in use".
+            kill_stale_gvproxy(&config.rootfs);
+
+            // Clean stale sockets (including the -krun.sock file that
+            // libkrun creates as its datagram endpoint on macOS).
+            let _ = std::fs::remove_file(&net_sock);
+            let _ = std::fs::remove_file(&api_sock);
+            let krun_sock = sock_base.with_extension("v-krun.sock");
+            let _ = std::fs::remove_file(&krun_sock);
+
+            // Start gvproxy
+            eprintln!("Starting gvproxy: {}", binary.display());
+            let ssh_port = pick_gvproxy_ssh_port()?;
+            let gvproxy_log = run_dir.join(format!("{rootfs_key}-gvproxy.log"));
+            let gvproxy_log_file = std::fs::File::create(&gvproxy_log)
+                .map_err(|e| VmError::Fork(format!("failed to create gvproxy log: {e}")))?;
+
+            // On Linux, gvproxy uses QEMU mode (SOCK_STREAM) since the vfkit
+            // unixgram scheme is macOS/vfkit-specific.  On macOS, use vfkit mode.
+            #[cfg(target_os = "linux")]
+            let (gvproxy_net_flag, gvproxy_net_url) =
+                ("-listen-qemu", format!("unix://{}", net_sock.display()));
+            #[cfg(target_os = "macos")]
+            let (gvproxy_net_flag, gvproxy_net_url) = (
+                "-listen-vfkit",
+                format!("unixgram://{}", net_sock.display()),
+            );
+
+            let child = std::process::Command::new(binary)
+                .arg(gvproxy_net_flag)
+                .arg(&gvproxy_net_url)
+                .arg("-listen")
+                .arg(format!("unix://{}", api_sock.display()))
+                .arg("-ssh-port")
+                .arg(ssh_port.to_string())
+                .stdout(std::process::Stdio::null())
+                .stderr(gvproxy_log_file)
+                .spawn()
+                .map_err(|e| VmError::Fork(format!("failed to start gvproxy: {e}")))?;
+
+            eprintln!(
+                "gvproxy started (pid {}, ssh port {}) [{:.1}s]",
+                child.id(),
+                ssh_port,
+                launch_start.elapsed().as_secs_f64()
+            );
+
+            // Wait for the socket to appear (exponential backoff: 5ms → 100ms).
+            {
+                let deadline = Instant::now() + std::time::Duration::from_secs(5);
+                let mut interval = std::time::Duration::from_millis(5);
+                while !net_sock.exists() {
+                    if Instant::now() >= deadline {
+                        return Err(VmError::Fork(
+                            "gvproxy socket did not appear within 5s".to_string(),
+                        ));
+                    }
+                    std::thread::sleep(interval);
+                    interval = (interval * 2).min(std::time::Duration::from_millis(100));
+                }
+            }
+
+            // Disable implicit TSI and add virtio-net via gvproxy
+            vm.disable_implicit_vsock()?;
+            vm.add_vsock(0)?;
+            // This MAC matches gvproxy's default static DHCP lease for
+            // 192.168.127.2. Using a different MAC can cause the gVisor
+            // network stack to misroute or drop packets.
+            let mac: [u8; 6] = [0x5a, 0x94, 0xef, 0xe4, 0x0c, 0xee];
+
+            // COMPAT_NET_FEATURES from libkrun.h
+            const NET_FEATURE_CSUM: u32 = 1 << 0;
+            const NET_FEATURE_GUEST_CSUM: u32 = 1 << 1;
+            const NET_FEATURE_GUEST_TSO4: u32 = 1 << 7;
+            const NET_FEATURE_GUEST_UFO: u32 = 1 << 10;
+            const NET_FEATURE_HOST_TSO4: u32 = 1 << 11;
+            const NET_FEATURE_HOST_UFO: u32 = 1 << 14;
+            const COMPAT_NET_FEATURES: u32 = NET_FEATURE_CSUM
+                | NET_FEATURE_GUEST_CSUM
+                | NET_FEATURE_GUEST_TSO4
+                | NET_FEATURE_GUEST_UFO
+                | NET_FEATURE_HOST_TSO4
+                | NET_FEATURE_HOST_UFO;
+
+            // On Linux use unixstream (SOCK_STREAM) to connect to gvproxy's
+            // QEMU listener.  On macOS use unixgram (SOCK_DGRAM) with the vfkit
+            // magic byte for the vfkit listener.
+            #[cfg(target_os = "linux")]
+            vm.add_net_unixstream(&net_sock, &mac, COMPAT_NET_FEATURES)?;
+            #[cfg(target_os = "macos")]
+            {
+                const NET_FLAG_VFKIT: u32 = 1 << 0;
+                vm.add_net_unixgram(&net_sock, &mac, COMPAT_NET_FEATURES, NET_FLAG_VFKIT)?;
+            }
+
+            eprintln!(
+                "Networking: gvproxy (virtio-net) [{:.1}s]",
+                launch_start.elapsed().as_secs_f64()
+            );
+            gvproxy_guard = Some(GvproxyGuard::new(child));
+            gvproxy_api_sock = Some(api_sock);
+        }
+    }
+
+    // Port mapping (TSI only)
+    if !config.port_map.is_empty() && matches!(config.net, NetBackend::Tsi) {
+        vm.set_port_map(&config.port_map)?;
+    }
+
+    for vsock_port in &config.vsock_ports {
+        if let Some(parent) = vsock_port.socket_path.parent() {
+            std::fs::create_dir_all(parent).map_err(|e| {
+                VmError::RuntimeState(format!("create vsock socket dir {}: {e}", parent.display()))
+            })?;
+        }
+        // libkrun returns EEXIST if the socket file is already present from a
+        // previous run. Remove any stale socket before registering the port.
+        let _ = std::fs::remove_file(&vsock_port.socket_path);
+        vm.add_vsock_port(vsock_port)?;
+    }
+
+    // Console output
+    let console_log = config.console_output.clone().unwrap_or_else(|| {
+        config
+            .rootfs
+            .parent()
+            .unwrap_or(&config.rootfs)
+            .join(format!("{}-console.log", vm_rootfs_key(&config.rootfs)))
+    });
+    vm.set_console_output(&console_log)?;
+
+    // envp: use provided env or minimal defaults
+    let mut env: Vec<String> = if config.env.is_empty() {
+        vec![
+            "HOME=/root",
+            "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+            "TERM=xterm",
+        ]
+        .into_iter()
+        .map(ToOwned::to_owned)
+        .collect()
+    } else {
+        config.env.clone()
+    };
+    if let Some(state_disk) = &config.state_disk
+        && !env
+            .iter()
+            .any(|entry| entry.starts_with("OPENSHELL_VM_STATE_DISK_DEVICE="))
+    {
+        env.push(format!(
+            "OPENSHELL_VM_STATE_DISK_DEVICE={}",
+            state_disk.guest_device
+        ));
+    }
+    vm.set_exec(&config.exec_path, &config.args, &env)?;
+
+    // ── Fork and enter the VM ──────────────────────────────────────
+    //
+    // krun_start_enter() never returns — it calls exit() when the guest
+    // process exits. We fork so the parent can monitor and report.
+
+    let boot_start = Instant::now();
+    eprintln!("Booting microVM...");
+
+    let pid = unsafe { libc::fork() };
+    match pid {
+        -1 => Err(VmError::Fork(std::io::Error::last_os_error().to_string())),
+        0 => {
+            // Child process: enter the VM (never returns on success)
+            let ret = vm.start_enter();
+            eprintln!("krun_start_enter failed: {ret}");
+            std::process::exit(1);
+        }
+        _ => {
+            // Parent: wait for child
+            if config.exec_path == "/srv/openshell-vm-init.sh" {
+                let gvproxy_pid = gvproxy_guard.as_ref().and_then(GvproxyGuard::id);
+                if let Err(err) =
+                    write_vm_runtime_state(&config.rootfs, pid, &console_log, gvproxy_pid)
+                {
+                    unsafe {
+                        libc::kill(pid, libc::SIGTERM);
+                    }
+                    // Guard drop will kill gvproxy automatically
+                    drop(gvproxy_guard);
+                    clear_vm_runtime_state(&config.rootfs);
+                    return Err(err);
+                }
+            }
+            eprintln!(
+                "VM started (child pid {pid}) [{:.1}s]",
+                boot_start.elapsed().as_secs_f64()
+            );
+            for pm in &config.port_map {
+                let host_port = pm.split(':').next().unwrap_or(pm);
+                eprintln!("  port {pm} -> http://localhost:{host_port}");
+            }
+            eprintln!("Console output: {}", console_log.display());
+
+            // Set up gvproxy port forwarding via its HTTP API.
+            // The port_map entries use the same "host:guest" format
+            // as TSI, but here we translate them into gvproxy expose
+            // calls targeting the guest IP (192.168.127.2).
+            //
+            // Instead of a fixed 500ms sleep, poll the API socket with
+            // exponential backoff (5ms → 200ms, ~1s total budget).
+            if let Some(ref api_sock) = gvproxy_api_sock {
+                let fwd_start = Instant::now();
+                // Wait for the API socket to appear (it lags slightly
+                // behind the vfkit data socket).
+                {
+                    let deadline = Instant::now() + std::time::Duration::from_secs(2);
+                    let mut interval = std::time::Duration::from_millis(5);
+                    while !api_sock.exists() {
+                        if Instant::now() >= deadline {
+                            eprintln!(
+                                "warning: gvproxy API socket not ready after 2s, attempting anyway"
+                            );
+                            break;
+                        }
+                        std::thread::sleep(interval);
+                        interval = (interval * 2).min(std::time::Duration::from_millis(200));
+                    }
+                }
+
+                let guest_ip = "192.168.127.2";
+
+                for pm in &config.port_map {
+                    let parts: Vec<&str> = pm.split(':').collect();
+                    let (host_port, guest_port) = match parts.len() {
+                        2 => (parts[0], parts[1]),
+                        1 => (parts[0], parts[0]),
+                        _ => {
+                            eprintln!("  skipping invalid port mapping: {pm}");
+                            continue;
+                        }
+                    };
+
+                    let expose_body = format!(
+                        r#"{{"local":":{host_port}","remote":"{guest_ip}:{guest_port}","protocol":"tcp"}}"#
+                    );
+
+                    match gvproxy_expose(api_sock, &expose_body) {
+                        Ok(()) => {
+                            eprintln!("  port {host_port} -> {guest_ip}:{guest_port}");
+                        }
+                        Err(e) => {
+                            eprintln!("  port {host_port}: {e}");
+                        }
+                    }
+                }
+                eprintln!(
+                    "Port forwarding ready [{:.1}s]",
+                    fwd_start.elapsed().as_secs_f64()
+                );
+            }
+
+            // Bootstrap the OpenShell control plane and wait for the
+            // service to be reachable. Only for the gateway preset, and
+            // only when port forwarding is configured (i.e. the gateway
+            // is reachable from the host). During rootfs pre-init builds,
+            // no --port is specified so there is nothing to health-check
+            // — the build script has its own kubectl-based readiness
+            // checks inside the VM.
+            if config.exec_path == "/srv/openshell-vm-init.sh" && !config.port_map.is_empty() {
+                // Bootstrap stores host-side metadata and mTLS creds.
+                // With pre-baked rootfs (Path 1) this reads PKI directly
+                // from virtio-fs — no kubectl or port forwarding needed.
+                // Cold boot (Path 2) writes secret manifests into the
+                // k3s auto-deploy directory via virtio-fs.
+                let gateway_port = gateway_host_port(config);
+                bootstrap_gateway(&config.rootfs, &config.gateway_name, gateway_port)?;
+
+                // Wait for the gRPC health check to pass. This ensures
+                // the service is fully operational, not just accepting
+                // TCP connections. The health check confirms the full
+                // path (gvproxy → kube-proxy nftables → pod:8080) and
+                // that the gRPC service is responding to requests.
+                health::wait_for_gateway_ready(gateway_port, &config.gateway_name)?;
+            }
+
+            eprintln!("Ready [{:.1}s total]", boot_start.elapsed().as_secs_f64());
+            eprintln!("Press Ctrl+C to stop.");
+
+            // Forward signals to child
+            unsafe {
+                libc::signal(
+                    libc::SIGINT,
+                    forward_signal as *const () as libc::sighandler_t,
+                );
+                libc::signal(
+                    libc::SIGTERM,
+                    forward_signal as *const () as libc::sighandler_t,
+                );
+                CHILD_PID.store(pid, std::sync::atomic::Ordering::Relaxed);
+            }
+
+            let mut status: libc::c_int = 0;
+            unsafe {
+                libc::waitpid(pid, &raw mut status, 0);
+            }
+
+            // Clean up gvproxy — disarm the guard and do explicit cleanup
+            // so we can print the "stopped" message.
+            if config.exec_path == "/srv/openshell-vm-init.sh" {
+                clear_vm_runtime_state(&config.rootfs);
+            }
+            if let Some(mut guard) = gvproxy_guard
+                && let Some(mut child) = guard.disarm()
+            {
+                let _ = child.kill();
+                let _ = child.wait();
+                eprintln!("gvproxy stopped");
+            }
+
+            if libc::WIFEXITED(status) {
+                let code = libc::WEXITSTATUS(status);
+                eprintln!("VM exited with code {code}");
+                return Ok(code);
+            } else if libc::WIFSIGNALED(status) {
+                let sig = libc::WTERMSIG(status);
+                eprintln!("VM killed by signal {sig}");
+                return Ok(128 + sig);
+            }
+
+            Ok(status)
+        }
+    }
+}
+
+// ── Post-boot bootstrap ────────────────────────────────────────────────
+
+/// Default gateway port: host port mapped to the `OpenShell` `NodePort` (30051).
+const DEFAULT_GATEWAY_PORT: u16 = 30051;
+
+/// Bootstrap the `OpenShell` control plane after k3s is ready.
+///
+/// Two paths:
+///
+/// 1. **Warm boot**: host-side metadata and mTLS certs already exist from a
+///    previous run. Fetch PKI via the exec agent to detect cert drift (e.g.
+///    after a `--reset`), re-sync if needed, then proceed to the health check.
+///
+/// 2. **First boot / post-reset**: poll the exec agent to `cat` each PEM file
+///    from `/opt/openshell/pki/` until the files exist (PKI generation has
+///    finished), then store them in `~/.config/openshell/gateways/<name>/mtls/`.
+fn bootstrap_gateway(rootfs: &Path, gateway_name: &str, gateway_port: u16) -> Result<(), VmError> {
+    let bootstrap_start = Instant::now();
+
+    let metadata = openshell_bootstrap::GatewayMetadata {
+        name: gateway_name.to_string(),
+        gateway_endpoint: format!("https://127.0.0.1:{gateway_port}"),
+        is_remote: false,
+        gateway_port,
+        remote_host: None,
+        resolved_host: None,
+        auth_mode: None,
+        edge_team_domain: None,
+        edge_auth_url: None,
+    };
+
+    let exec_socket = vm_exec_socket_path(rootfs);
+
+    // ── Warm boot: host already has certs ──────────────────────────
+    if is_warm_boot(gateway_name) {
+        // Always (re-)store metadata so port/endpoint changes are picked up.
+        openshell_bootstrap::store_gateway_metadata(gateway_name, &metadata)
+            .map_err(|e| VmError::Bootstrap(format!("failed to store metadata: {e}")))?;
+        openshell_bootstrap::save_active_gateway(gateway_name)
+            .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?;
+
+        // Verify host certs match the VM's PKI. If they diverge (e.g.
+        // PKI was regenerated after a --reset, or the state disk was
+        // replaced), re-sync the host certs from the VM via the exec agent.
+        //
+        // On warm boot the exec agent may not be ready yet (the VM is
+        // still booting). Use a short timeout — this is a non-critical
+        // drift check and the host already has valid certs. If the agent
+        // isn't reachable we skip silently rather than blocking boot for
+        // 30s.
+        match fetch_pki_over_exec(&exec_socket, std::time::Duration::from_secs(5)) {
+            Ok(bundle) => {
+                if let Err(e) = sync_host_certs_if_stale(gateway_name, &bundle) {
+                    eprintln!("Warning: cert sync check failed: {e}");
+                }
+            }
+            Err(_) => {
+                // Expected on warm boot — exec agent not ready yet.
+            }
+        }
+
+        eprintln!(
+            "Warm boot [{:.1}s]",
+            bootstrap_start.elapsed().as_secs_f64()
+        );
+        eprintln!("  Cluster:  {gateway_name}");
+        eprintln!("  Gateway:  https://127.0.0.1:{gateway_port}");
+        eprintln!("  mTLS:     ~/.config/openshell/gateways/{gateway_name}/mtls/");
+        return Ok(());
+    }
+
+    // ── First boot / post-reset: fetch PKI from VM via exec agent ──
+    //
+    // The VM init script generates certs on first boot at /opt/openshell/pki/.
+    // We poll the exec agent with `cat <file>` for each PEM file until they
+    // exist, retrying to handle the window between VM boot and PKI generation.
+    eprintln!("Waiting for VM to generate PKI...");
+    let pki_bundle = fetch_pki_over_exec(&exec_socket, std::time::Duration::from_secs(120))
+        .map_err(|e| VmError::Bootstrap(format!("VM did not produce PKI within 120s: {e}")))?;
+
+    eprintln!("PKI ready — storing client certs on host...");
+
+    openshell_bootstrap::store_gateway_metadata(gateway_name, &metadata)
+        .map_err(|e| VmError::Bootstrap(format!("failed to store metadata: {e}")))?;
+
+    openshell_bootstrap::mtls::store_pki_bundle(gateway_name, &pki_bundle)
+        .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS creds: {e}")))?;
+
+    openshell_bootstrap::save_active_gateway(gateway_name)
+        .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?;
+
+    eprintln!(
+        "Bootstrap complete [{:.1}s]",
+        bootstrap_start.elapsed().as_secs_f64()
+    );
+    eprintln!("  Cluster:  {gateway_name}");
+    eprintln!("  Gateway:  https://127.0.0.1:{gateway_port}");
+    eprintln!("  mTLS:     ~/.config/openshell/gateways/{gateway_name}/mtls/");
+
+    Ok(())
+}
+
+/// PKI file names and the corresponding [`PkiBundle`] fields.
+const PKI_FILES: &[(&str, &str)] = &[
+    ("ca.crt", "ca_cert_pem"),
+    ("ca.key", "ca_key_pem"),
+    ("server.crt", "server_cert_pem"),
+    ("server.key", "server_key_pem"),
+    ("client.crt", "client_cert_pem"),
+    ("client.key", "client_key_pem"),
+];
+
+/// Fetch all six PEM files from `/opt/openshell/pki/` inside the guest by
+/// running `cat` via the exec agent.  Retries until `timeout` elapses,
+/// sleeping 500ms between attempts, to handle the window between VM boot
+/// and PKI generation completing.
+fn fetch_pki_over_exec(
+    exec_socket: &Path,
+    timeout: std::time::Duration,
+) -> Result<openshell_bootstrap::pki::PkiBundle, VmError> {
+    let deadline = Instant::now() + timeout;
+
+    loop {
+        match try_read_pki_files(exec_socket) {
+            Ok(bundle) => return Ok(bundle),
+            Err(_) if Instant::now() < deadline => {
+                std::thread::sleep(std::time::Duration::from_millis(500));
+            }
+            Err(e) => {
+                return Err(VmError::Bootstrap(format!(
+                    "failed to read PKI files via exec agent: {e}"
+                )));
+            }
+        }
+    }
+}
+
+/// Attempt to read all six PEM files from the guest in one pass.
+fn try_read_pki_files(exec_socket: &Path) -> Result<openshell_bootstrap::pki::PkiBundle, VmError> {
+    let mut pems = std::collections::HashMap::new();
+
+    for &(filename, _field) in PKI_FILES {
+        let path = format!("/opt/openshell/pki/{filename}");
+        let output = exec_capture(exec_socket, vec!["cat".to_string(), path])?;
+        let content = String::from_utf8(output).map_err(|e| {
+            VmError::Bootstrap(format!("PKI file {filename} is not valid UTF-8: {e}"))
+        })?;
+        if content.is_empty() {
+            return Err(VmError::Bootstrap(format!("PKI file {filename} is empty")));
+        }
+        pems.insert(filename, content);
+    }
+
+    let mut get = |key: &str| -> Result<String, VmError> {
+        pems.remove(key)
+            .ok_or_else(|| VmError::Bootstrap(format!("PKI file {key} missing from exec output")))
+    };
+
+    Ok(openshell_bootstrap::pki::PkiBundle {
+        ca_cert_pem: get("ca.crt")?,
+        ca_key_pem: get("ca.key")?,
+        server_cert_pem: get("server.crt")?,
+        server_key_pem: get("server.key")?,
+        client_cert_pem: get("client.crt")?,
+        client_key_pem: get("client.key")?,
+    })
+}
+
+/// Check whether a previous bootstrap left valid state on disk.
+///
+/// A warm boot is detected when both:
+/// - Cluster metadata exists: `$XDG_CONFIG_HOME/openshell/gateways/openshell-vm/metadata.json`
+/// - mTLS certs exist: `$XDG_CONFIG_HOME/openshell/gateways/openshell-vm/mtls/{ca.crt,tls.crt,tls.key}`
+///
+/// When true, the host-side bootstrap (PKI generation, secret manifest writing,
+/// metadata storage) can be skipped because the virtio-fs rootfs persists k3s
+/// state (TLS certs, kine/SQLite cluster objects, containerd images, helm
+/// releases) across VM restarts. The kine database is preserved on normal
+/// boots so that pods and other cluster objects survive restarts.
+fn is_warm_boot(gateway_name: &str) -> bool {
+    let Ok(home) = std::env::var("HOME") else {
+        return false;
+    };
+
+    let config_base =
+        std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config"));
+
+    let config_dir = PathBuf::from(&config_base)
+        .join("openshell")
+        .join("gateways");
+
+    // Check metadata file.
+    let metadata_path = config_dir.join(gateway_name).join("metadata.json");
+    if !metadata_path.is_file() {
+        return false;
+    }
+
+    // Check mTLS cert files.
+    let mtls_dir = config_dir.join(gateway_name).join("mtls");
+    for name in &["ca.crt", "tls.crt", "tls.key"] {
+        let path = mtls_dir.join(name);
+        match std::fs::metadata(&path) {
+            Ok(m) if m.is_file() && m.len() > 0 => {}
+            _ => return false,
+        }
+    }
+
+    true
+}
+
+/// Compare the CA cert on the rootfs (authoritative source) against the
+/// host-side copy. If they differ, re-copy all client certs from the rootfs.
+///
+/// This catches cases where PKI was regenerated (e.g. rootfs rebuilt,
+/// manual reset) but host-side certs survived from a previous boot cycle.
+fn sync_host_certs_if_stale(
+    gateway_name: &str,
+    bundle: &openshell_bootstrap::pki::PkiBundle,
+) -> Result<(), VmError> {
+    let Ok(home) = std::env::var("HOME") else {
+        return Ok(());
+    };
+    let config_base =
+        std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config"));
+    let host_ca = PathBuf::from(&config_base)
+        .join("openshell/gateways")
+        .join(gateway_name)
+        .join("mtls/ca.crt");
+
+    let host_ca_contents = std::fs::read_to_string(&host_ca)
+        .map_err(|e| VmError::Bootstrap(format!("failed to read host ca.crt: {e}")))?;
+
+    if bundle.ca_cert_pem.trim() == host_ca_contents.trim() {
+        return Ok(());
+    }
+
+    eprintln!("Cert drift detected — re-syncing mTLS certs from VM...");
+
+    openshell_bootstrap::mtls::store_pki_bundle(gateway_name, bundle)
+        .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS creds: {e}")))?;
+
+    eprintln!("  mTLS certs re-synced from VM");
+    Ok(())
+}
+
+static CHILD_PID: std::sync::atomic::AtomicI32 = std::sync::atomic::AtomicI32::new(0);
+
+extern "C" fn forward_signal(_sig: libc::c_int) {
+    let pid = CHILD_PID.load(std::sync::atomic::Ordering::Relaxed);
+    if pid > 0 {
+        unsafe {
+            libc::kill(pid, libc::SIGTERM);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use std::time::{SystemTime, UNIX_EPOCH};
+
+    fn temp_runtime_dir() -> PathBuf {
+        let nanos = SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .expect("time went backwards")
+            .as_nanos();
+        std::env::temp_dir().join(format!(
+            "openshell-vm-runtime-{}-{nanos}",
+            std::process::id()
+        ))
+    }
+
+    fn write_runtime_file(path: &Path) {
+        fs::write(path, b"test").expect("failed to write runtime file");
+    }
+
+    #[test]
+    fn validate_runtime_dir_accepts_minimal_bundle() {
+        let dir = temp_runtime_dir();
+        fs::create_dir_all(&dir).expect("failed to create runtime dir");
+
+        write_runtime_file(&dir.join(ffi::required_runtime_lib_name()));
+        write_runtime_file(&dir.join("libkrunfw.test"));
+        let gvproxy = dir.join("gvproxy");
+        write_runtime_file(&gvproxy);
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt as _;
+
+            let mut perms = fs::metadata(&gvproxy).expect("stat gvproxy").permissions();
+            perms.set_mode(0o755);
+            fs::set_permissions(&gvproxy, perms).expect("chmod gvproxy");
+        }
+
+        validate_runtime_dir(&dir).expect("runtime bundle should validate");
+        assert!(gvproxy.exists());
+
+        let _ = fs::remove_dir_all(&dir);
+    }
+
+    #[test]
+    fn validate_runtime_dir_requires_gvproxy() {
+        let dir = temp_runtime_dir();
+        fs::create_dir_all(&dir).expect("failed to create runtime dir");
+
+        write_runtime_file(&dir.join(ffi::required_runtime_lib_name()));
+        write_runtime_file(&dir.join("libkrunfw.test"));
+
+        let err = validate_runtime_dir(&dir).expect_err("missing gvproxy should fail");
+        match err {
+            VmError::BinaryNotFound { hint, .. } => {
+                assert!(hint.contains("missing gvproxy"));
+            }
+            other => panic!("unexpected error: {other:?}"),
+        }
+
+        let _ = fs::remove_dir_all(&dir);
+    }
+
+    #[test]
+    fn gateway_config_uses_default_state_disk_next_to_rootfs() {
+        let rootfs = PathBuf::from("/tmp/openshell-vm-test/rootfs");
+
+        let config = VmConfig::gateway(rootfs.clone());
+        let state_disk = config
+            .state_disk
+            .expect("gateway should enable a state disk");
+
+        assert_eq!(
+            state_disk.path,
+            rootfs.parent().unwrap().join("rootfs-state.raw")
+        );
+        assert_eq!(state_disk.block_id, DEFAULT_STATE_DISK_BLOCK_ID);
+        assert_eq!(state_disk.guest_device, DEFAULT_STATE_DISK_GUEST_DEVICE);
+        assert_eq!(state_disk.size_bytes, DEFAULT_STATE_DISK_SIZE_BYTES);
+    }
+
+    #[test]
+    fn ensure_state_disk_image_creates_sparse_file() {
+        let dir = temp_runtime_dir();
+        fs::create_dir_all(&dir).expect("failed to create runtime dir");
+
+        let state_disk = StateDiskConfig {
+            path: dir.join("state.raw"),
+            size_bytes: 8 * 1024 * 1024,
+            block_id: DEFAULT_STATE_DISK_BLOCK_ID.to_string(),
+            guest_device: DEFAULT_STATE_DISK_GUEST_DEVICE.to_string(),
+        };
+
+        ensure_state_disk_image(&state_disk).expect("state disk should be created");
+
+        let metadata = fs::metadata(&state_disk.path).expect("stat state disk");
+        assert_eq!(metadata.len(), state_disk.size_bytes);
+
+        let _ = fs::remove_dir_all(&dir);
+    }
+
+    #[test]
+    fn prepare_rootfs_returns_existing_explicit_rootfs() {
+        let dir = temp_runtime_dir();
+        let rootfs = dir.join("rootfs");
+        fs::create_dir_all(&rootfs).expect("failed to create rootfs dir");
+
+        let prepared =
+            prepare_rootfs(Some(rootfs.clone()), "default", false).expect("prepare rootfs");
+
+        assert_eq!(prepared, rootfs);
+
+        let _ = fs::remove_dir_all(&dir);
+    }
+}
diff --git a/crates/openshell-vm/src/main.rs b/crates/openshell-vm/src/main.rs
new file mode 100644
index 000000000..ba7c7d6bc
--- /dev/null
+++ b/crates/openshell-vm/src/main.rs
@@ -0,0 +1,241 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Standalone openshell-vm binary.
+//!
+//! Boots a libkrun microVM running the `OpenShell` control plane (k3s +
+//! openshell-server). Each named instance gets its own rootfs extracted from
+//! the embedded tarball at
+//! `~/.local/share/openshell/openshell-vm/{version}/instances/<name>/rootfs`.
+//!
+//! # Codesigning (macOS)
+//!
+//! This binary must be codesigned with the `com.apple.security.hypervisor`
+//! entitlement. See `entitlements.plist` in this crate.
+//!
+//! ```sh
+//! codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - target/debug/openshell-vm
+//! ```
+
+use std::io::IsTerminal;
+use std::path::PathBuf;
+
+use clap::{Parser, Subcommand, ValueHint};
+
+const DISABLE_STATE_DISK_ENV: &str = "OPENSHELL_VM_DISABLE_STATE_DISK";
+
+/// Boot the `OpenShell` gateway microVM.
+///
+/// Starts a libkrun microVM running a k3s Kubernetes cluster with the
+/// `OpenShell` control plane. Use `--exec` to run a custom process instead.
+#[derive(Parser)]
+#[command(name = "openshell-vm", version)]
+struct Cli {
+    #[command(subcommand)]
+    command: Option<GatewayCommand>,
+
+    /// Path to the rootfs directory (aarch64 Linux).
+    /// Overrides the default instance-based rootfs resolution.
+    #[arg(long, value_hint = ValueHint::DirPath)]
+    rootfs: Option<PathBuf>,
+
+    /// Named VM instance.
+    ///
+    /// When used alone, the rootfs resolves to
+    /// `~/.local/share/openshell/openshell-vm/{version}/instances/<name>/rootfs`
+    /// and is extracted from the embedded tarball on first use.
+    /// When combined with `--rootfs`, only provides the instance identity
+    /// (for exec, gateway name, etc.) while the rootfs comes from the
+    /// explicit path.
+    #[arg(long, default_value = "default")]
+    name: String,
+
+    /// Executable path inside the VM. When set, runs this instead of
+    /// the default k3s server.
+    #[arg(long)]
+    exec: Option<String>,
+
+    /// Arguments to the executable (requires `--exec`).
+    #[arg(long, num_args = 1..)]
+    args: Vec<String>,
+
+    /// Environment variables in `KEY=VALUE` form (requires `--exec`).
+    #[arg(long, num_args = 1..)]
+    env: Vec<String>,
+
+    /// Working directory inside the VM.
+    #[arg(long, default_value = "/")]
+    workdir: String,
+
+    /// Port mappings (`host_port:guest_port`).
+    #[arg(long, short, num_args = 1..)]
+    port: Vec<String>,
+
+    /// Number of virtual CPUs (default: 4 for openshell-vm, 2 for --exec).
+    #[arg(long)]
+    vcpus: Option<u8>,
+
+    /// RAM in MiB (default: 8192 for openshell-vm, 2048 for --exec).
+    #[arg(long)]
+    mem: Option<u32>,
+
+    /// libkrun log level (0=Off .. 5=Trace).
+    #[arg(long, default_value_t = 1)]
+    krun_log_level: u32,
+
+    /// Networking backend: "gvproxy" (default), "tsi", or "none".
+    #[arg(long, default_value = "gvproxy")]
+    net: String,
+
+    /// Wipe all runtime state (containerd, kubelet, k3s) before booting.
+    /// Use this to recover from a corrupted state after a crash or
+    /// unclean shutdown.
+    #[arg(long)]
+    reset: bool,
+}
+
+#[derive(Subcommand)]
+enum GatewayCommand {
+    /// Ensure the target rootfs exists, extracting the embedded rootfs if needed.
+    PrepareRootfs {
+        /// Recreate the target rootfs even if it already exists.
+        #[arg(long)]
+        force: bool,
+    },
+
+    /// Execute a command inside a running openshell-vm VM.
+    Exec {
+        /// Working directory inside the VM.
+        #[arg(long)]
+        workdir: Option<String>,
+
+        /// Environment variables in `KEY=VALUE` form.
+        #[arg(long, num_args = 1..)]
+        env: Vec<String>,
+
+        /// Command and arguments to run inside the VM.
+        #[arg(trailing_var_arg = true)]
+        command: Vec<String>,
+    },
+}
+
+fn main() {
+    tracing_subscriber::fmt::init();
+
+    let cli = Cli::parse();
+
+    let code = match run(cli) {
+        Ok(code) => code,
+        Err(e) => {
+            eprintln!("Error: {e}");
+            1
+        }
+    };
+
+    if code != 0 {
+        std::process::exit(code);
+    }
+}
+
+fn run(cli: Cli) -> Result<i32, Box<dyn std::error::Error>> {
+    if let Some(GatewayCommand::PrepareRootfs { force }) = &cli.command {
+        let rootfs = openshell_vm::prepare_rootfs(cli.rootfs.clone(), &cli.name, *force)?;
+        println!("{}", rootfs.display());
+        return Ok(0);
+    }
+
+    if let Some(GatewayCommand::Exec {
+        workdir,
+        env,
+        mut command,
+    }) = cli.command
+    {
+        let effective_tty = std::io::stdin().is_terminal();
+        if command.is_empty() {
+            if effective_tty {
+                command.push("sh".to_string());
+            } else {
+                return Err("openshell-vm exec requires a command when stdin is not a TTY".into());
+            }
+        }
+        return Ok(openshell_vm::exec_running_vm(
+            openshell_vm::VmExecOptions {
+                rootfs: Some(
+                    cli.rootfs
+                        .unwrap_or(openshell_vm::named_rootfs_dir(&cli.name)?),
+                ),
+                command,
+                workdir,
+                env,
+                tty: effective_tty,
+            },
+        )?);
+    }
+
+    let net_backend = match cli.net.as_str() {
+        "tsi" => openshell_vm::NetBackend::Tsi,
+        "none" => openshell_vm::NetBackend::None,
+        "gvproxy" => openshell_vm::NetBackend::Gvproxy {
+            binary: openshell_vm::default_runtime_gvproxy_path(),
+        },
+        other => {
+            return Err(
+                format!("unknown --net backend: {other} (expected: gvproxy, tsi, none)").into(),
+            );
+        }
+    };
+
+    let rootfs = cli
+        .rootfs
+        .map_or_else(|| openshell_vm::ensure_named_rootfs(&cli.name), Ok)?;
+
+    let gateway_name = openshell_vm::gateway_name(&cli.name)?;
+
+    let mut config = if let Some(exec_path) = cli.exec {
+        openshell_vm::VmConfig {
+            rootfs,
+            vcpus: cli.vcpus.unwrap_or(2),
+            mem_mib: cli.mem.unwrap_or(2048),
+            exec_path,
+            args: cli.args,
+            env: cli.env,
+            workdir: cli.workdir,
+            port_map: cli.port,
+            vsock_ports: vec![],
+            log_level: cli.krun_log_level,
+            console_output: None,
+            net: net_backend,
+            reset: cli.reset,
+            gateway_name,
+            state_disk: None,
+        }
+    } else {
+        let mut c = openshell_vm::VmConfig::gateway(rootfs);
+        if !cli.port.is_empty() {
+            c.port_map = cli.port;
+        }
+        if let Some(v) = cli.vcpus {
+            c.vcpus = v;
+        }
+        if let Some(m) = cli.mem {
+            c.mem_mib = m;
+        }
+        c.net = net_backend;
+        c.reset = cli.reset;
+        c.gateway_name = gateway_name;
+        if state_disk_disabled() {
+            c.state_disk = None;
+        }
+        c
+    };
+    config.log_level = cli.krun_log_level;
+
+    Ok(openshell_vm::launch(&config)?)
+}
+
+fn state_disk_disabled() -> bool {
+    matches!(
+        std::env::var(DISABLE_STATE_DISK_ENV).ok().as_deref(),
+        Some("1" | "true" | "TRUE" | "yes" | "YES")
+    )
+}
diff --git a/crates/openshell-vm/tests/gateway_integration.rs b/crates/openshell-vm/tests/gateway_integration.rs
new file mode 100644
index 000000000..7ababb42f
--- /dev/null
+++ b/crates/openshell-vm/tests/gateway_integration.rs
@@ -0,0 +1,155 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Integration tests for the standalone `openshell-vm` binary.
+//!
+//! These tests require:
+//! - libkrun installed (e.g. `brew tap slp/krun && brew install libkrun`)
+//! - macOS ARM64 with Apple Hypervisor.framework
+//! - An `openshell-vm` binary built with an embedded rootfs tarball
+//!   (for example via `mise run vm:build:embedded`)
+//!
+//! All tests are `#[ignore]` — run them explicitly:
+//!
+//! ```sh
+//! cargo test -p openshell-vm --test gateway_integration -- --ignored
+//! ```
+
+#![allow(unsafe_code)]
+
+use std::net::{SocketAddr, TcpStream};
+use std::process::{Command, Stdio};
+use std::time::{Duration, Instant};
+
+/// Path to the built `openshell-vm` binary (resolved by Cargo at compile time).
+const GATEWAY: &str = env!("CARGO_BIN_EXE_openshell-vm");
+
+// ── Helpers ────────────────────────────────────────────────────────────
+
+/// Codesign the binary on macOS so it can access Hypervisor.framework.
+fn codesign_if_needed() {
+    if cfg!(target_os = "macos") {
+        let entitlements = format!("{}/entitlements.plist", env!("CARGO_MANIFEST_DIR"));
+        let status = Command::new("codesign")
+            .args([
+                "--entitlements",
+                &entitlements,
+                "--force",
+                "-s",
+                "-",
+                GATEWAY,
+            ])
+            .status()
+            .expect("codesign command failed to execute");
+        assert!(status.success(), "failed to codesign openshell-vm binary");
+    }
+}
+
+fn assert_runtime_bundle_staged() {
+    let bundle_dir = std::path::Path::new(GATEWAY)
+        .parent()
+        .expect("openshell-vm binary has no parent")
+        .join("openshell-vm.runtime");
+    assert!(
+        bundle_dir.is_dir(),
+        "openshell-vm.runtime is missing next to the test binary: {}. Run `mise run vm:bundle-runtime` first.",
+        bundle_dir.display()
+    );
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────
+
+/// Boot the full `OpenShell` gateway and verify the gRPC service becomes
+/// reachable on port 30051.
+#[test]
+#[ignore] // requires libkrun + rootfs
+fn gateway_boots_and_service_becomes_reachable() {
+    codesign_if_needed();
+    assert_runtime_bundle_staged();
+
+    let mut cmd = Command::new(GATEWAY);
+    cmd.stdout(Stdio::null()).stderr(Stdio::piped());
+
+    let mut child = cmd.spawn().expect("failed to start openshell-vm");
+
+    // Poll for the OpenShell gRPC service.
+    let addr: SocketAddr = ([127, 0, 0, 1], 30051).into();
+    let timeout = Duration::from_secs(180);
+    let start = Instant::now();
+    let mut reachable = false;
+
+    while start.elapsed() < timeout {
+        if TcpStream::connect_timeout(&addr, Duration::from_secs(1)).is_ok() {
+            reachable = true;
+            break;
+        }
+        std::thread::sleep(Duration::from_secs(2));
+    }
+
+    // Tear down regardless of result.
+    let _ = unsafe { libc::kill(child.id() as i32, libc::SIGTERM) };
+    let _ = child.wait();
+
+    assert!(
+        reachable,
+        "openshell-vm service on port 30051 not reachable within {timeout:?}"
+    );
+}
+
+/// Run a trivial command inside the VM via `--exec` and verify it exits
+/// successfully, proving the VM boots and can execute guest processes.
+#[test]
+#[ignore] // requires libkrun + rootfs
+fn gateway_exec_runs_guest_command() {
+    codesign_if_needed();
+    assert_runtime_bundle_staged();
+
+    let mut cmd = Command::new(GATEWAY);
+    cmd.args(["--exec", "/bin/true"]);
+
+    let output = cmd.output().expect("failed to run openshell-vm --exec");
+
+    assert!(
+        output.status.success(),
+        "openshell-vm --exec /bin/true failed with status {:?}\nstderr: {}",
+        output.status,
+        String::from_utf8_lossy(&output.stderr),
+    );
+}
+
+/// Boot the VM, then use `openshell-vm exec` against the running instance.
+#[test]
+#[ignore] // requires libkrun + rootfs
+fn gateway_exec_attaches_to_running_vm() {
+    codesign_if_needed();
+    assert_runtime_bundle_staged();
+
+    let mut boot = Command::new(GATEWAY);
+    boot.stdout(Stdio::null()).stderr(Stdio::piped());
+    let mut child = boot.spawn().expect("failed to start openshell-vm VM");
+
+    let addr: SocketAddr = ([127, 0, 0, 1], 30051).into();
+    let timeout = Duration::from_secs(180);
+    let start = Instant::now();
+    while start.elapsed() < timeout {
+        if TcpStream::connect_timeout(&addr, Duration::from_secs(1)).is_ok() {
+            break;
+        }
+        std::thread::sleep(Duration::from_secs(2));
+    }
+
+    let output = Command::new(GATEWAY)
+        .args(["exec", "--", "/bin/true"])
+        .output()
+        .expect("failed to run openshell-vm exec");
+
+    let _ = unsafe { libc::kill(child.id() as i32, libc::SIGTERM) };
+    let _ = child.wait();
+
+    assert!(
+        output.status.success(),
+        "openshell-vm exec -- /bin/true failed with status {:?}\nstderr: {}",
+        output.status,
+        String::from_utf8_lossy(&output.stderr),
+    );
+}
diff --git a/deploy/docker/Dockerfile.vm-macos b/deploy/docker/Dockerfile.vm-macos
new file mode 100644
index 000000000..c033e43e8
--- /dev/null
+++ b/deploy/docker/Dockerfile.vm-macos
@@ -0,0 +1,125 @@
+# syntax=docker/dockerfile:1.6
+
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Cross-compile the openshell-vm binary for macOS aarch64 (Apple Silicon)
+# using the osxcross toolchain.
+#
+# The openshell-vm binary loads libkrun/libkrunfw at runtime via dlopen, so it
+# does NOT need Hypervisor.framework headers at build time.  Pre-compressed
+# runtime artifacts (libkrun, libkrunfw, gvproxy, rootfs) are injected via
+# the vm-runtime-compressed build context and embedded into the binary via
+# include_bytes!().
+#
+# Usage:
+#   docker buildx build -f deploy/docker/Dockerfile.vm-macos \
+#     --build-arg OPENSHELL_CARGO_VERSION=0.6.0 \
+#     --build-context vm-runtime-compressed=/path/to/compressed-dir \
+#     --output type=local,dest=out/ .
+
+ARG OSXCROSS_IMAGE=crazymax/osxcross:latest
+
+FROM ${OSXCROSS_IMAGE} AS osxcross
+
+FROM python:3.12-slim AS builder
+
+ARG CARGO_TARGET_CACHE_SCOPE=default
+
+ENV PATH="/root/.cargo/bin:/usr/local/bin:/osxcross/bin:${PATH}"
+ENV LD_LIBRARY_PATH="/osxcross/lib"
+
+COPY --from=osxcross /osxcross /osxcross
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    ca-certificates \
+    clang \
+    cmake \
+    curl \
+    pkg-config \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.88.0
+
+RUN rustup target add aarch64-apple-darwin
+
+WORKDIR /build
+
+ENV CC_aarch64_apple_darwin=oa64-clang
+ENV CXX_aarch64_apple_darwin=oa64-clang++
+ENV AR_aarch64_apple_darwin=aarch64-apple-darwin25.1-ar
+ENV CARGO_TARGET_AARCH64_APPLE_DARWIN_LINKER=oa64-clang
+ENV CARGO_TARGET_AARCH64_APPLE_DARWIN_AR=aarch64-apple-darwin25.1-ar
+
+# aws-lc-sys workaround (in case it ends up in the dep tree via feature unification)
+RUN ln -sf /osxcross/bin/arm64-apple-darwin25.1-ld /usr/local/bin/arm64-apple-macosx-ld
+
+# ---------------------------------------------------------------------------
+# Stage 1: dependency caching — copy only manifests, create dummy sources,
+# build dependencies.  This layer is cached unless Cargo.toml/lock changes.
+# ---------------------------------------------------------------------------
+COPY Cargo.toml Cargo.lock ./
+COPY crates/openshell-vm/Cargo.toml     crates/openshell-vm/Cargo.toml
+COPY crates/openshell-vm/build.rs       crates/openshell-vm/build.rs
+COPY crates/openshell-core/Cargo.toml   crates/openshell-core/Cargo.toml
+COPY crates/openshell-core/build.rs     crates/openshell-core/build.rs
+COPY crates/openshell-bootstrap/Cargo.toml crates/openshell-bootstrap/Cargo.toml
+COPY crates/openshell-policy/Cargo.toml crates/openshell-policy/Cargo.toml
+COPY proto/ proto/
+
+# Scope workspace to VM crates only.
+RUN sed -i 's|members = \["crates/\*"\]|members = ["crates/openshell-vm", "crates/openshell-core", "crates/openshell-bootstrap", "crates/openshell-policy"]|' Cargo.toml
+
+RUN mkdir -p crates/openshell-vm/src \
+             crates/openshell-core/src \
+             crates/openshell-bootstrap/src \
+             crates/openshell-policy/src && \
+    echo "fn main() {}" > crates/openshell-vm/src/main.rs && \
+    touch crates/openshell-vm/src/lib.rs && \
+    touch crates/openshell-core/src/lib.rs && \
+    touch crates/openshell-bootstrap/src/lib.rs && \
+    touch crates/openshell-policy/src/lib.rs
+
+# Build deps only (cached layer).
+RUN --mount=type=cache,id=cargo-registry-vm-macos,sharing=locked,target=/root/.cargo/registry \
+    --mount=type=cache,id=cargo-git-vm-macos,sharing=locked,target=/root/.cargo/git \
+    --mount=type=cache,id=cargo-target-vm-macos-${CARGO_TARGET_CACHE_SCOPE},sharing=locked,target=/build/target \
+    cargo build --release --target aarch64-apple-darwin -p openshell-vm 2>/dev/null || true
+
+# ---------------------------------------------------------------------------
+# Stage 2: real build with compressed runtime artifacts
+# ---------------------------------------------------------------------------
+COPY crates/ crates/
+
+# Copy compressed VM runtime artifacts for embedding.
+# These are passed in via --build-context vm-runtime-compressed=...
+COPY --from=vm-runtime-compressed / /build/vm-runtime-compressed/
+
+# Touch source files to ensure they're rebuilt (not the cached dummy).
+RUN touch crates/openshell-vm/src/main.rs \
+    crates/openshell-vm/src/lib.rs \
+    crates/openshell-vm/build.rs \
+    crates/openshell-bootstrap/src/lib.rs \
+    crates/openshell-core/src/lib.rs \
+    crates/openshell-core/build.rs \
+    crates/openshell-policy/src/lib.rs \
+    proto/*.proto
+
+# Declare version ARGs here (not earlier) so the git-hash-bearing values do not
+# invalidate the expensive dependency-build layers above on every commit.
+ARG OPENSHELL_CARGO_VERSION
+ARG OPENSHELL_IMAGE_TAG
+RUN --mount=type=cache,id=cargo-registry-vm-macos,sharing=locked,target=/root/.cargo/registry \
+    --mount=type=cache,id=cargo-git-vm-macos,sharing=locked,target=/root/.cargo/git \
+    --mount=type=cache,id=cargo-target-vm-macos-${CARGO_TARGET_CACHE_SCOPE},sharing=locked,target=/build/target \
+    if [ -n "${OPENSHELL_CARGO_VERSION:-}" ]; then \
+      sed -i -E '/^\[workspace\.package\]/,/^\[/{s/^version[[:space:]]*=[[:space:]]*".*"/version = "'"${OPENSHELL_CARGO_VERSION}"'"/}' Cargo.toml; \
+    fi && \
+    OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=/build/vm-runtime-compressed \
+    OPENSHELL_IMAGE_TAG="${OPENSHELL_IMAGE_TAG:-dev}" \
+    cargo build --release --target aarch64-apple-darwin -p openshell-vm && \
+    cp target/aarch64-apple-darwin/release/openshell-vm /openshell-vm
+
+FROM scratch AS binary
+COPY --from=builder /openshell-vm /openshell-vm
diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh
index 14f13ecb0..b045bf222 100644
--- a/deploy/docker/cluster-entrypoint.sh
+++ b/deploy/docker/cluster-entrypoint.sh
@@ -461,9 +461,18 @@ if [ -n "${IMAGE_TAG:-}" ] && [ -f "$HELMCHART" ]; then
     sed -i -E "s|tag:[[:space:]]*\"?latest\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART"
 fi
 
-if [ -n "${IMAGE_PULL_POLICY:-}" ] && [ -f "$HELMCHART" ]; then
-    echo "Overriding image pull policy to: ${IMAGE_PULL_POLICY}"
-    sed -i "s|pullPolicy: Always|pullPolicy: ${IMAGE_PULL_POLICY}|" "$HELMCHART"
+if [ -f "$HELMCHART" ]; then
+    IMAGE_PULL_POLICY_VALUE="${IMAGE_PULL_POLICY:-Always}"
+    if [ -n "${IMAGE_PULL_POLICY:-}" ]; then
+        echo "Overriding image pull policy to: ${IMAGE_PULL_POLICY}"
+    fi
+    sed -i "s|__IMAGE_PULL_POLICY__|${IMAGE_PULL_POLICY_VALUE}|g" "$HELMCHART"
+
+    SANDBOX_IMAGE_PULL_POLICY_VALUE="${SANDBOX_IMAGE_PULL_POLICY:-\"\"}"
+    sed -i "s|__SANDBOX_IMAGE_PULL_POLICY__|${SANDBOX_IMAGE_PULL_POLICY_VALUE}|g" "$HELMCHART"
+
+    DB_URL_VALUE="${DB_URL:-\"sqlite:/var/openshell/openshell.db\"}"
+    sed -i "s|__DB_URL__|${DB_URL_VALUE}|g" "$HELMCHART"
 fi
 
 # SSH handshake secret: previously generated here and injected via sed into the
diff --git a/deploy/kube/manifests/openshell-helmchart.yaml b/deploy/kube/manifests/openshell-helmchart.yaml
index ae22ddc6a..a09e0f300 100644
--- a/deploy/kube/manifests/openshell-helmchart.yaml
+++ b/deploy/kube/manifests/openshell-helmchart.yaml
@@ -27,9 +27,11 @@ spec:
     image:
       repository: ghcr.io/nvidia/openshell/gateway
       tag: latest
-      pullPolicy: Always
+      pullPolicy: __IMAGE_PULL_POLICY__
     server:
       sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest
+      sandboxImagePullPolicy: __SANDBOX_IMAGE_PULL_POLICY__
+      dbUrl: __DB_URL__
       sshGatewayHost: __SSH_GATEWAY_HOST__
       sshGatewayPort: __SSH_GATEWAY_PORT__
       grpcEndpoint: "https://openshell.openshell.svc.cluster.local:8080"
diff --git a/e2e/rust/e2e-vm.sh b/e2e/rust/e2e-vm.sh
new file mode 100755
index 000000000..5fd055036
--- /dev/null
+++ b/e2e/rust/e2e-vm.sh
@@ -0,0 +1,246 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Run the Rust e2e smoke test against an openshell-vm gateway.
+#
+# Usage:
+#   mise run e2e:vm                                          # start new named VM on random port
+#   mise run e2e:vm -- --vm-port=30051                       # reuse existing VM on port 30051
+#   mise run e2e:vm -- --vm-port=30051 --vm-name=my-vm       # reuse existing named VM and run exec check
+#
+# Options:
+#   --vm-port=PORT  Skip VM startup and test against this port.
+#   --vm-name=NAME  VM instance name. Auto-generated for fresh VMs.
+#
+# When --vm-port is omitted:
+#   1. Picks a random free host port
+#   2. Starts the VM with --name <auto> --port <random>:30051
+#   3. Waits for the VM to fully bootstrap (mTLS certs + gRPC health)
+#   4. Verifies `openshell-vm exec` works
+#   5. Runs the Rust smoke test
+#   6. Tears down the VM
+#
+# When --vm-port is given the script assumes the VM is already running
+# on that port and runs the smoke test. The VM exec check runs only when
+# --vm-name is provided (so the script can target the correct instance).
+#
+# Prerequisites (when starting a new VM): `mise run vm:build` must already
+# be done (the e2e:vm mise task handles this via depends).
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+RUNTIME_DIR="${ROOT}/target/debug/openshell-vm.runtime"
+GATEWAY_BIN="${ROOT}/target/debug/openshell-vm"
+VM_GATEWAY_IMAGE="${IMAGE_REPO_BASE:-openshell}/gateway:${IMAGE_TAG:-dev}"
+VM_GATEWAY_TAR_REL="var/lib/rancher/k3s/agent/images/openshell-server.tar.zst"
+GUEST_PORT=30051
+TIMEOUT=180
+
+named_vm_rootfs() {
+  local vm_version
+
+  vm_version=$("${GATEWAY_BIN}" --version | awk '{print $2}')
+  printf '%s\n' "${XDG_DATA_HOME:-${HOME}/.local/share}/openshell/openshell-vm/${vm_version}/instances/${VM_NAME}/rootfs"
+}
+
+vm_exec() {
+  local rootfs_args=()
+  if [ -n "${VM_ROOTFS_DIR:-}" ]; then
+    rootfs_args=(--rootfs "${VM_ROOTFS_DIR}")
+  fi
+  "${GATEWAY_BIN}" "${rootfs_args[@]}" --name "${VM_NAME}" exec -- "$@"
+}
+
+prepare_named_vm_rootfs() {
+  if [ -z "${VM_NAME}" ]; then
+    return 0
+  fi
+
+  echo "Preparing named VM rootfs '${VM_NAME}'..."
+  VM_ROOTFS_DIR="$("${ROOT}/tasks/scripts/vm/ensure-vm-rootfs.sh" --name "${VM_NAME}" \
+    | tail -n 1 | sed 's/^using openshell-vm rootfs at //')"
+  "${ROOT}/tasks/scripts/vm/sync-vm-rootfs.sh" --name "${VM_NAME}"
+}
+
+refresh_vm_gateway() {
+  if [ -z "${VM_NAME}" ]; then
+    return 0
+  fi
+
+  echo "Refreshing VM gateway StatefulSet image to ${VM_GATEWAY_IMAGE}..."
+  # Re-import the host-synced :dev image into the VM's containerd, then
+  # force a rollout when the StatefulSet already points at the same tag.
+  vm_exec sh -lc "set -eu; \
+    image_tar='/${VM_GATEWAY_TAR_REL}'; \
+    k3s ctr -n k8s.io images import \"\${image_tar}\" >/dev/null; \
+    current_image=\$(kubectl -n openshell get statefulset/openshell -o jsonpath='{.spec.template.spec.containers[?(@.name==\"openshell\")].image}'); \
+    if [ \"\${current_image}\" = \"${VM_GATEWAY_IMAGE}\" ]; then \
+      kubectl -n openshell rollout restart statefulset/openshell >/dev/null; \
+    else \
+      kubectl -n openshell set image statefulset/openshell openshell=${VM_GATEWAY_IMAGE} >/dev/null; \
+    fi; \
+    kubectl -n openshell rollout status statefulset/openshell --timeout=300s"
+  echo "Gateway rollout complete."
+}
+
+wait_for_gateway_health() {
+  local elapsed=0 timeout=60 consecutive_ok=0
+
+  echo "Waiting for refreshed gateway health..."
+  while [ "${elapsed}" -lt "${timeout}" ]; do
+    if "${ROOT}/target/debug/openshell" status >/dev/null 2>&1; then
+      consecutive_ok=$((consecutive_ok + 1))
+      if [ "${consecutive_ok}" -ge 3 ]; then
+        echo "Gateway health confirmed after refresh."
+        return 0
+      fi
+    else
+      consecutive_ok=0
+    fi
+
+    sleep 2
+    elapsed=$((elapsed + 2))
+  done
+
+  echo "ERROR: refreshed gateway did not become healthy after ${timeout}s"
+  return 1
+}
+
+# ── Parse arguments ──────────────────────────────────────────────────
+VM_PORT=""
+VM_NAME=""
+VM_ROOTFS_DIR=""
+for arg in "$@"; do
+  case "$arg" in
+    --vm-port=*) VM_PORT="${arg#--vm-port=}" ;;
+    --vm-name=*) VM_NAME="${arg#--vm-name=}" ;;
+    *) echo "Unknown argument: $arg"; exit 1 ;;
+  esac
+done
+
+# ── Determine mode ───────────────────────────────────────────────────
+if [ -n "${VM_PORT}" ]; then
+  # Point at an already-running VM.
+  HOST_PORT="${VM_PORT}"
+  echo "Using existing VM on port ${HOST_PORT}."
+  if [ -n "${VM_NAME}" ]; then
+    prepare_named_vm_rootfs
+  fi
+else
+  # Pick a random free port and start a new VM.
+  HOST_PORT=$(python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()')
+  if [ -z "${VM_NAME}" ]; then
+    VM_NAME="e2e-${HOST_PORT}-$$"
+  fi
+
+  cleanup() {
+    local exit_code=$?
+    if [ -n "${VM_PID:-}" ] && kill -0 "$VM_PID" 2>/dev/null; then
+      echo "Stopping openshell-vm (pid ${VM_PID})..."
+      kill "$VM_PID" 2>/dev/null || true
+      wait "$VM_PID" 2>/dev/null || true
+    fi
+    # On failure, preserve the VM console log for post-mortem debugging.
+    if [ "$exit_code" -ne 0 ] && [ -n "${VM_NAME:-}" ]; then
+      local console_log
+      console_log="$(named_vm_rootfs)-console.log"
+      if [ -f "$console_log" ]; then
+        echo "=== VM console log (preserved for debugging) ==="
+        cat "$console_log"
+        echo "=== end VM console log ==="
+      fi
+    fi
+    rm -f "${VM_LOG:-}" 2>/dev/null || true
+    if [ -n "${VM_NAME:-}" ]; then
+      rm -rf "$(dirname "$(named_vm_rootfs)")" 2>/dev/null || true
+    fi
+  }
+  trap cleanup EXIT
+
+  prepare_named_vm_rootfs
+
+  echo "Starting openshell-vm '${VM_NAME}' on port ${HOST_PORT}..."
+  if [ "$(uname -s)" = "Darwin" ]; then
+    export DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}"
+  fi
+
+  VM_LOG=$(mktemp /tmp/openshell-vm-e2e.XXXXXX)
+  rootfs_args=()
+  if [ -n "${VM_ROOTFS_DIR}" ]; then
+    rootfs_args=(--rootfs "${VM_ROOTFS_DIR}")
+  fi
+  "${GATEWAY_BIN}" "${rootfs_args[@]}" --name "${VM_NAME}" --port "${HOST_PORT}:${GUEST_PORT}" 2>"${VM_LOG}" &
+  VM_PID=$!
+
+  # ── Wait for full bootstrap (mTLS certs + gRPC health) ─────────────
+  # The VM prints "Ready [Xs total]" to stderr after bootstrap_gateway()
+  # stores mTLS certs and wait_for_gateway_ready() confirms the gRPC
+  # service is responding. Waiting only for TCP port reachability (nc -z)
+  # is insufficient because port forwarding is established before the
+  # mTLS certs are written, causing `openshell status` to fail.
+  echo "Waiting for VM bootstrap to complete (timeout ${TIMEOUT}s)..."
+  elapsed=0
+  while ! grep -q "^Ready " "${VM_LOG}" 2>/dev/null; do
+    if ! kill -0 "$VM_PID" 2>/dev/null; then
+      echo "ERROR: openshell-vm exited before becoming ready"
+      echo "VM log:"
+      cat "${VM_LOG}"
+      exit 1
+    fi
+    if [ "$elapsed" -ge "$TIMEOUT" ]; then
+      echo "ERROR: openshell-vm did not become ready after ${TIMEOUT}s"
+      echo "VM log:"
+      cat "${VM_LOG}"
+      exit 1
+    fi
+    sleep 2
+    elapsed=$((elapsed + 2))
+  done
+  echo "Gateway is ready (${elapsed}s)."
+  echo "VM log:"
+  cat "${VM_LOG}"
+fi
+
+# ── Exec into the VM (when instance name is known) ───────────────────
+if [ -n "${VM_NAME}" ]; then
+  echo "Verifying openshell-vm exec for '${VM_NAME}'..."
+  exec_elapsed=0
+  exec_timeout=60
+  until vm_exec /bin/true; do
+    if [ "$exec_elapsed" -ge "$exec_timeout" ]; then
+      echo "ERROR: openshell-vm exec did not become ready after ${exec_timeout}s"
+      exit 1
+    fi
+    sleep 2
+    exec_elapsed=$((exec_elapsed + 2))
+  done
+  echo "VM exec succeeded."
+else
+  echo "Skipping openshell-vm exec check (provide --vm-name for existing VMs)."
+fi
+
+refresh_vm_gateway
+
+# ── Run the smoke test ───────────────────────────────────────────────
+# The openshell CLI reads OPENSHELL_GATEWAY_ENDPOINT to connect to the
+# gateway directly, and OPENSHELL_GATEWAY to resolve mTLS certs from
+# ~/.config/openshell/gateways/<name>/mtls/.
+# In the VM, the overlayfs snapshotter re-extracts all image layers on
+# every boot. The 1GB sandbox base image extraction can take >300s
+# under contention, so allow 600s for sandbox provisioning.
+export OPENSHELL_PROVISION_TIMEOUT=600
+export OPENSHELL_GATEWAY_ENDPOINT="https://127.0.0.1:${HOST_PORT}"
+if [ -n "${VM_NAME}" ]; then
+  export OPENSHELL_GATEWAY="openshell-vm-${VM_NAME}"
+else
+  export OPENSHELL_GATEWAY="openshell-vm"
+fi
+
+echo "Running e2e smoke test (gateway: ${OPENSHELL_GATEWAY}, endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})..."
+cargo build -p openshell-cli --features openshell-core/dev-settings
+wait_for_gateway_health
+cargo test --manifest-path e2e/rust/Cargo.toml --features e2e --test smoke -- --nocapture
+
+echo "Smoke test passed."
diff --git a/e2e/rust/src/harness/sandbox.rs b/e2e/rust/src/harness/sandbox.rs
index 7a942265d..3a9601a83 100644
--- a/e2e/rust/src/harness/sandbox.rs
+++ b/e2e/rust/src/harness/sandbox.rs
@@ -25,7 +25,11 @@ fn extract_sandbox_name(output: &str) -> Option<String> {
 }
 
 /// Default timeout for waiting for a sandbox to become ready.
-const SANDBOX_READY_TIMEOUT: Duration = Duration::from_secs(300);
+/// In VM mode, the overlayfs snapshotter re-extracts all image layers
+/// from the content store on every boot (~250s for the 1GB sandbox
+/// base image), so 600s accommodates extraction + workspace-init + pod
+/// startup.
+const SANDBOX_READY_TIMEOUT: Duration = Duration::from_secs(600);
 
 /// RAII guard that deletes a sandbox on drop.
 ///
diff --git a/e2e/rust/tests/smoke.rs b/e2e/rust/tests/smoke.rs
new file mode 100644
index 000000000..c380efc8c
--- /dev/null
+++ b/e2e/rust/tests/smoke.rs
@@ -0,0 +1,97 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#![cfg(feature = "e2e")]
+
+//! Smoke test: verify the gateway is healthy, create a sandbox, exec a
+//! command inside it, and tear it down.
+//!
+//! This test is cluster-agnostic — it works against any running gateway
+//! (Docker-based cluster or openshell-vm microVM).  The `e2e:vm` mise
+//! task uses it to validate the VM gateway after boot.
+
+use std::process::Stdio;
+use std::time::Duration;
+
+use openshell_e2e::harness::binary::openshell_cmd;
+use openshell_e2e::harness::output::strip_ansi;
+use openshell_e2e::harness::sandbox::SandboxGuard;
+
+/// End-to-end smoke test: status → create → exec → list → delete.
+#[tokio::test]
+async fn gateway_smoke() {
+    // ── 1. Gateway must be reachable ──────────────────────────────────
+    let mut clean_status = String::new();
+    let mut status_ok = false;
+    for _ in 0..15 {
+        let mut status_cmd = openshell_cmd();
+        status_cmd
+            .arg("status")
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        let status_out = status_cmd
+            .output()
+            .await
+            .expect("failed to run openshell status");
+
+        let status_text = format!(
+            "{}{}",
+            String::from_utf8_lossy(&status_out.stdout),
+            String::from_utf8_lossy(&status_out.stderr),
+        );
+        clean_status = strip_ansi(&status_text);
+
+        if status_out.status.success() && clean_status.contains("Connected") {
+            status_ok = true;
+            break;
+        }
+
+        tokio::time::sleep(Duration::from_secs(2)).await;
+    }
+
+    assert!(
+        status_ok,
+        "openshell status never became healthy:\n{clean_status}",
+    );
+
+    // ── 2. Create a sandbox and exec a command ───────────────────────
+    // Default behaviour keeps the sandbox alive after the command exits,
+    // so we can verify it in the list before cleaning up.
+    let mut sb = SandboxGuard::create(&["--", "echo", "smoke-ok"])
+        .await
+        .expect("sandbox create should succeed");
+
+    assert!(
+        sb.create_output.contains("smoke-ok"),
+        "expected 'smoke-ok' in sandbox output:\n{}",
+        sb.create_output,
+    );
+
+    // ── 3. Verify the sandbox appeared in the list ───────────────────
+    let mut list_cmd = openshell_cmd();
+    list_cmd
+        .args(["sandbox", "list", "--names"])
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped());
+
+    let list_out = list_cmd
+        .output()
+        .await
+        .expect("failed to run openshell sandbox list");
+
+    let list_text = strip_ansi(&format!(
+        "{}{}",
+        String::from_utf8_lossy(&list_out.stdout),
+        String::from_utf8_lossy(&list_out.stderr),
+    ));
+
+    assert!(
+        list_text.contains(&sb.name),
+        "sandbox '{}' should appear in list output:\n{list_text}",
+        sb.name,
+    );
+
+    // ── 4. Cleanup ───────────────────────────────────────────────────
+    sb.cleanup().await;
+}
diff --git a/pyproject.toml b/pyproject.toml
index 60d5177d5..899885929 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ dev = [
     "maturin>=1.5,<2.0",
     "setuptools-scm>=8",
     "grpcio-tools>=1.60",
+    "pyelftools>=0.30",
 ]
 docs = [
     "sphinx<=7.5",
diff --git a/scripts/bin/openshell-vm b/scripts/bin/openshell-vm
new file mode 100755
index 000000000..6513219eb
--- /dev/null
+++ b/scripts/bin/openshell-vm
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+BINARY="$PROJECT_ROOT/target/debug/openshell-vm"
+
+cargo build --package openshell-vm --bin openshell-vm --quiet
+
+# On macOS, codesign with the hypervisor entitlement so libkrun can use
+# Apple's Hypervisor.framework. Re-sign after every build.
+ENTITLEMENTS="$PROJECT_ROOT/crates/openshell-vm/entitlements.plist"
+if [[ "$(uname)" == "Darwin" ]] && [[ -f "$ENTITLEMENTS" ]]; then
+  codesign --entitlements "$ENTITLEMENTS" --force -s - "$BINARY" 2>/dev/null
+fi
+exec "$BINARY" "$@"
diff --git a/tasks/scripts/vm/_lib.sh b/tasks/scripts/vm/_lib.sh
new file mode 100755
index 000000000..b925492a3
--- /dev/null
+++ b/tasks/scripts/vm/_lib.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Shared helpers for openshell-vm build scripts.
+# Source this file from other scripts:
+#   source "$(dirname "${BASH_SOURCE[0]}")/_lib.sh"
+
+# ── Root directory ──────────────────────────────────────────────────────
+
+vm_lib_root() {
+    cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd
+}
+
+# ── Platform detection ──────────────────────────────────────────────────
+
+# Detect the current platform and echo one of:
+#   darwin-aarch64, linux-aarch64, linux-x86_64
+# Exits with error on unsupported platforms.
+detect_platform() {
+    case "$(uname -s)-$(uname -m)" in
+        Darwin-arm64)   echo "darwin-aarch64" ;;
+        Linux-aarch64)  echo "linux-aarch64" ;;
+        Linux-x86_64)   echo "linux-x86_64" ;;
+        *)
+            echo "Error: Unsupported platform: $(uname -s)-$(uname -m)" >&2
+            echo "Supported: macOS ARM64, Linux ARM64, Linux x86_64" >&2
+            return 1
+            ;;
+    esac
+}
+
+# ── Compression helpers ─────────────────────────────────────────────────
+
+# Compress a single file with zstd level 19, reporting sizes.
+# Usage: compress_file <input> <output>
+compress_file() {
+    local input="$1"
+    local output="$2"
+    local name
+    name="$(basename "$input")"
+    local original_size
+    original_size="$(du -h "$input" | cut -f1)"
+
+    zstd -19 -f -q -T0 -o "$output" "$input"
+    chmod 644 "$output"
+
+    local compressed_size
+    compressed_size="$(du -h "$output" | cut -f1)"
+    echo "    ${name}: ${original_size} -> ${compressed_size}"
+}
+
+# Compress all files in a directory (skipping provenance.json) into an
+# output directory, appending .zst to each filename.
+# Usage: compress_dir <source_dir> <output_dir>
+compress_dir() {
+    local source_dir="$1"
+    local output_dir="$2"
+
+    echo "==> Compressing with zstd (level 19)..."
+    for file in "$source_dir"/*; do
+        [ -f "$file" ] || continue
+        local name
+        name="$(basename "$file")"
+        # Skip metadata files — not embedded
+        if [ "$name" = "provenance.json" ]; then
+            cp "$file" "${output_dir}/"
+            continue
+        fi
+        compress_file "$file" "${output_dir}/${name}.zst"
+    done
+}
diff --git a/tasks/scripts/vm/build-libkrun-macos.sh b/tasks/scripts/vm/build-libkrun-macos.sh
new file mode 100755
index 000000000..e203c8724
--- /dev/null
+++ b/tasks/scripts/vm/build-libkrun-macos.sh
@@ -0,0 +1,271 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Build libkrun from source on macOS with portable rpath.
+#
+# This script builds libkrun WITHOUT GPU support (no virglrenderer/libepoxy/MoltenVK
+# dependencies), making the resulting binary fully portable and self-contained.
+#
+# For openshell-vm, we run headless k3s clusters, so GPU passthrough is not needed.
+#
+# Prerequisites:
+#   - macOS ARM64 (Apple Silicon)
+#   - Xcode Command Line Tools
+#   - Homebrew: brew install rust lld dtc xz libkrunfw
+#
+# Usage:
+#   ./build-libkrun-macos.sh
+#
+# Output:
+#   target/libkrun-build/libkrun.dylib - portable dylib with @loader_path rpath
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+BUILD_DIR="${ROOT}/target/libkrun-build"
+OUTPUT_DIR="${BUILD_DIR}"
+BREW_PREFIX="$(brew --prefix 2>/dev/null || echo /opt/homebrew)"
+CUSTOM_RUNTIME="${ROOT}/target/custom-runtime"
+
+if [ "$(uname -s)" != "Darwin" ]; then
+    echo "Error: This script only runs on macOS" >&2
+    exit 1
+fi
+
+if [ "$(uname -m)" != "arm64" ]; then
+    echo "Error: libkrun on macOS only supports ARM64 (Apple Silicon)" >&2
+    exit 1
+fi
+
+ARCH="$(uname -m)"
+echo "==> Building libkrun for macOS ${ARCH} (no GPU support)"
+echo "    Build directory: ${BUILD_DIR}"
+echo ""
+
+# ── Check dependencies ──────────────────────────────────────────────────
+
+check_deps() {
+    echo "==> Checking build dependencies..."
+    
+    MISSING=""
+    
+    # Check for Rust
+    if ! command -v cargo &>/dev/null; then
+        MISSING="$MISSING rust"
+    fi
+    
+    # Check for lld (LLVM linker)
+    if ! command -v ld.lld &>/dev/null && ! [ -x "${BREW_PREFIX}/opt/llvm/bin/ld.lld" ]; then
+        MISSING="$MISSING lld"
+    fi
+    
+    # Check for dtc (device tree compiler)
+    if ! command -v dtc &>/dev/null; then
+        MISSING="$MISSING dtc"
+    fi
+    
+    # Check for libkrunfw
+    if [ ! -f "${BREW_PREFIX}/lib/libkrunfw.dylib" ] && \
+       [ ! -f "${BREW_PREFIX}/lib/libkrunfw.5.dylib" ] && \
+       [ ! -f "${CUSTOM_RUNTIME}/libkrunfw.dylib" ]; then
+        MISSING="$MISSING libkrunfw"
+    fi
+    
+    if [ -n "$MISSING" ]; then
+        echo "Error: Missing dependencies:$MISSING" >&2
+        echo "" >&2
+        echo "Install with: brew install$MISSING" >&2
+        exit 1
+    fi
+    
+    echo "    All dependencies found"
+}
+
+check_deps
+
+# ── Setup build directory ───────────────────────────────────────────────
+
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+# ── Clone libkrun ───────────────────────────────────────────────────────
+
+LIBKRUN_REF="${LIBKRUN_REF:-e5922f6}"
+
+if [ ! -d libkrun ]; then
+    echo "==> Cloning libkrun..."
+    git clone https://github.com/containers/libkrun.git
+fi
+
+echo "==> Checking out ${LIBKRUN_REF}..."
+cd libkrun
+git fetch origin --tags
+git checkout "${LIBKRUN_REF}" 2>/dev/null || git checkout "tags/${LIBKRUN_REF}" 2>/dev/null || {
+    echo "Error: Could not checkout ${LIBKRUN_REF}" >&2
+    exit 1
+}
+cd ..
+
+LIBKRUN_COMMIT=$(git -C libkrun rev-parse HEAD)
+echo "    Commit: ${LIBKRUN_COMMIT}"
+
+cd libkrun
+
+# ── Build libkrun ───────────────────────────────────────────────────────
+
+echo ""
+echo "==> Building libkrun with NET=1 BLK=1 (no GPU)..."
+
+# Find libkrunfw - prefer custom build with bridge support
+if [ -f "${CUSTOM_RUNTIME}/provenance.json" ] && [ -f "${CUSTOM_RUNTIME}/libkrunfw.dylib" ]; then
+    LIBKRUNFW_DIR="${CUSTOM_RUNTIME}"
+    echo "    Using custom libkrunfw from ${LIBKRUNFW_DIR}"
+else
+    LIBKRUNFW_DIR="${BREW_PREFIX}/lib"
+    echo "    Using Homebrew libkrunfw from ${LIBKRUNFW_DIR}"
+fi
+
+# Set library search paths for build
+export LIBRARY_PATH="${LIBKRUNFW_DIR}:${BREW_PREFIX}/lib:${LIBRARY_PATH:-}"
+export DYLD_LIBRARY_PATH="${LIBKRUNFW_DIR}:${BREW_PREFIX}/lib:${DYLD_LIBRARY_PATH:-}"
+
+# Set up LLVM/clang for bindgen (required by krun_display/krun_input if they get compiled)
+# Note: DYLD_LIBRARY_PATH is needed at runtime for the build scripts that use libclang
+LLVM_PREFIX="${BREW_PREFIX}/opt/llvm"
+if [ -d "$LLVM_PREFIX" ]; then
+    export LIBCLANG_PATH="${LLVM_PREFIX}/lib"
+    export DYLD_LIBRARY_PATH="${LLVM_PREFIX}/lib:${DYLD_LIBRARY_PATH:-}"
+fi
+
+# Build with BLK and NET features only (no GPU)
+# This avoids the virglrenderer → libepoxy → MoltenVK dependency chain
+make clean 2>/dev/null || true
+make BLK=1 NET=1 -j"$(sysctl -n hw.ncpu)"
+
+# ── Rewrite dylib paths for portability ─────────────────────────────────
+
+echo ""
+echo "==> Making dylib portable with @loader_path..."
+
+DYLIB="target/release/libkrun.dylib"
+if [ ! -f "$DYLIB" ]; then
+    echo "Error: Build did not produce $DYLIB" >&2
+    exit 1
+fi
+
+# Copy to output
+cp "$DYLIB" "${OUTPUT_DIR}/libkrun.dylib"
+DYLIB="${OUTPUT_DIR}/libkrun.dylib"
+
+# Show current dependencies
+echo "    Original dependencies:"
+otool -L "$DYLIB" | grep -v "^/" | sed 's/^/      /'
+
+# Rewrite the install name to use @loader_path (makes it relocatable)
+install_name_tool -id "@loader_path/libkrun.dylib" "$DYLIB"
+
+# Rewrite libkrunfw path to @loader_path (will be bundled alongside)
+# Find what libkrunfw path is currently referenced
+# Note: grep may not find anything (libkrunfw is loaded via dlopen), so we use || true
+KRUNFW_PATH=$(otool -L "$DYLIB" | grep libkrunfw | awk '{print $1}' || true)
+if [ -n "$KRUNFW_PATH" ]; then
+    install_name_tool -change "$KRUNFW_PATH" "@loader_path/libkrunfw.dylib" "$DYLIB"
+    echo "    Rewrote: $KRUNFW_PATH → @loader_path/libkrunfw.dylib"
+fi
+
+# Re-codesign after modifications (required on macOS)
+codesign -f -s - "$DYLIB"
+
+# Show final dependencies
+echo ""
+echo "    Final dependencies:"
+otool -L "$DYLIB" | grep -v "^/" | sed 's/^/      /'
+
+# Verify no hardcoded homebrew paths remain
+if otool -L "$DYLIB" | grep -q "/opt/homebrew"; then
+    echo ""
+    echo "Warning: Homebrew paths still present in dylib!" >&2
+    otool -L "$DYLIB" | grep "/opt/homebrew" | sed 's/^/      /'
+else
+    echo ""
+    echo "    ✓ No hardcoded Homebrew paths"
+fi
+
+# ── Copy libkrunfw to output ────────────────────────────────────────────
+
+echo ""
+echo "==> Bundling libkrunfw..."
+
+# Find and copy libkrunfw
+KRUNFW_SRC=""
+for candidate in \
+    "${CUSTOM_RUNTIME}/libkrunfw.dylib" \
+    "${CUSTOM_RUNTIME}/libkrunfw.5.dylib" \
+    "${BREW_PREFIX}/lib/libkrunfw.dylib" \
+    "${BREW_PREFIX}/lib/libkrunfw.5.dylib"; do
+    if [ -f "$candidate" ]; then
+        # Resolve symlinks
+        if [ -L "$candidate" ]; then
+            KRUNFW_SRC=$(readlink -f "$candidate" 2>/dev/null || readlink "$candidate")
+            if [[ "$KRUNFW_SRC" != /* ]]; then
+                KRUNFW_SRC="$(dirname "$candidate")/${KRUNFW_SRC}"
+            fi
+        else
+            KRUNFW_SRC="$candidate"
+        fi
+        break
+    fi
+done
+
+if [ -z "$KRUNFW_SRC" ]; then
+    echo "Error: Could not find libkrunfw.dylib" >&2
+    exit 1
+fi
+
+cp "$KRUNFW_SRC" "${OUTPUT_DIR}/libkrunfw.dylib"
+echo "    Copied: $KRUNFW_SRC"
+
+# Make libkrunfw portable too
+install_name_tool -id "@loader_path/libkrunfw.dylib" "${OUTPUT_DIR}/libkrunfw.dylib"
+codesign -f -s - "${OUTPUT_DIR}/libkrunfw.dylib"
+
+# Check libkrunfw dependencies
+echo "    libkrunfw dependencies:"
+otool -L "${OUTPUT_DIR}/libkrunfw.dylib" | grep -v "^/" | sed 's/^/      /'
+
+# ── Summary ─────────────────────────────────────────────────────────────
+
+cd "$BUILD_DIR"
+
+echo ""
+echo "==> Build complete!"
+echo "    Output directory: ${OUTPUT_DIR}"
+echo ""
+echo "    Artifacts:"
+ls -lah "${OUTPUT_DIR}"/*.dylib
+
+# Verify portability
+echo ""
+echo "==> Verifying portability..."
+ALL_GOOD=true
+
+for lib in "${OUTPUT_DIR}"/*.dylib; do
+    if otool -L "$lib" | grep -q "/opt/homebrew"; then
+        echo "    ✗ $(basename "$lib") has hardcoded paths"
+        ALL_GOOD=false
+    else
+        echo "    ✓ $(basename "$lib") is portable"
+    fi
+done
+
+if $ALL_GOOD; then
+    echo ""
+    echo "All libraries are portable!"
+    echo ""
+    echo "Next step: mise run vm:build"
+else
+    echo ""
+    echo "Warning: Some libraries have non-portable paths"
+    echo "They may not work on machines without Homebrew"
+fi
diff --git a/tasks/scripts/vm/build-libkrun.sh b/tasks/scripts/vm/build-libkrun.sh
new file mode 100755
index 000000000..2c01c65de
--- /dev/null
+++ b/tasks/scripts/vm/build-libkrun.sh
@@ -0,0 +1,248 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Build libkrun and libkrunfw from source on Linux.
+#
+# This script builds libkrun (VMM) and libkrunfw (kernel firmware) from source
+# with OpenShell's custom kernel configuration for bridge/netfilter support.
+#
+# Prerequisites:
+#   - Linux (aarch64 or x86_64)
+#   - Build tools: make, git, gcc, flex, bison, bc
+#   - Python 3 with pyelftools
+#   - Rust toolchain
+#
+# Usage:
+#   ./build-libkrun.sh
+#
+# The script will install missing dependencies on Debian/Ubuntu and Fedora.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/_lib.sh"
+ROOT="$(vm_lib_root)"
+
+# Source pinned dependency versions
+source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true
+
+BUILD_DIR="${ROOT}/target/libkrun-build"
+OUTPUT_DIR="${BUILD_DIR}"
+KERNEL_CONFIG="${ROOT}/crates/openshell-vm/runtime/kernel/openshell.kconfig"
+
+if [ "$(uname -s)" != "Linux" ]; then
+  echo "Error: This script only runs on Linux" >&2
+  exit 1
+fi
+
+ARCH="$(uname -m)"
+echo "==> Building libkrun for Linux ${ARCH}"
+echo "    Build directory: ${BUILD_DIR}"
+echo "    Kernel config: ${KERNEL_CONFIG}"
+echo ""
+
+# ── Install dependencies ────────────────────────────────────────────────
+
+install_deps() {
+  echo "==> Checking/installing build dependencies..."
+  
+  if command -v apt-get &>/dev/null; then
+    # Debian/Ubuntu
+    DEPS="build-essential git python3 python3-pyelftools flex bison libelf-dev libssl-dev bc curl libclang-dev"
+    MISSING=""
+    for dep in $DEPS; do
+      if ! dpkg -s "$dep" &>/dev/null; then
+        MISSING="$MISSING $dep"
+      fi
+    done
+    if [ -n "$MISSING" ]; then
+      echo "    Installing:$MISSING"
+      sudo apt-get update
+      sudo apt-get install -y $MISSING
+    else
+      echo "    All dependencies installed"
+    fi
+    
+  elif command -v dnf &>/dev/null; then
+    # Fedora/RHEL
+    DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel"
+    echo "    Installing dependencies via dnf..."
+    sudo dnf install -y $DEPS
+    
+  else
+    echo "Warning: Unknown package manager. Please install manually:" >&2
+    echo "  build-essential git python3 python3-pyelftools flex bison" >&2
+    echo "  libelf-dev libssl-dev bc curl" >&2
+  fi
+}
+
+install_deps
+
+# ── Setup build directory ───────────────────────────────────────────────
+
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+# ── Build libkrunfw (kernel firmware) ───────────────────────────────────
+
+echo ""
+echo "==> Building libkrunfw with custom kernel config..."
+
+if [ ! -d libkrunfw ]; then
+  echo "    Cloning libkrunfw (pinned: ${LIBKRUNFW_REF:-HEAD})..."
+  git clone https://github.com/containers/libkrunfw.git
+fi
+
+cd libkrunfw
+
+# Ensure we're on the pinned commit for reproducible builds
+if [ -n "${LIBKRUNFW_REF:-}" ]; then
+  echo "    Checking out pinned ref: ${LIBKRUNFW_REF}"
+  git fetch origin
+  git checkout "${LIBKRUNFW_REF}"
+fi
+
+# Copy custom kernel config fragment
+if [ -f "$KERNEL_CONFIG" ]; then
+  cp "$KERNEL_CONFIG" openshell.kconfig
+  echo "    Applied custom kernel config fragment: openshell.kconfig"
+else
+  echo "Warning: Custom kernel config not found at ${KERNEL_CONFIG}" >&2
+  echo "    Building with default config (k3s networking may not work)" >&2
+fi
+
+echo "    Building kernel and libkrunfw (this may take 15-20 minutes)..."
+
+# The libkrunfw Makefile does not support a config fragment — it copies the
+# base config and runs olddefconfig, then builds the kernel image in one
+# make invocation.  We cannot inject the fragment mid-build via make flags.
+#
+# Instead we drive the build in two phases:
+#
+#   Phase 1: Run the Makefile's $(KERNEL_SOURCES) target, which:
+#              - downloads and extracts the kernel tarball (if needed)
+#              - applies patches
+#              - copies config-libkrunfw_aarch64 to $(KERNEL_SOURCES)/.config
+#              - runs olddefconfig
+#
+#   Phase 2: Merge our fragment on top of the .config produced by Phase 1
+#            using the kernel's own merge_config.sh, then re-run olddefconfig
+#            to resolve new dependency chains (e.g. CONFIG_BRIDGE pulls in
+#            CONFIG_BRIDGE_NETFILTER which needs CONFIG_NETFILTER etc).
+#
+#   Phase 3: Let the Makefile build everything (kernel + kernel.c + .so),
+#            skipping the $(KERNEL_SOURCES) target since it already exists.
+
+KERNEL_VERSION="$(grep '^KERNEL_VERSION' Makefile | head -1 | awk '{print $3}')"
+KERNEL_SOURCES="${KERNEL_VERSION}"
+
+# Phase 1: prepare kernel source tree + base .config.
+# Run the Makefile's $(KERNEL_SOURCES) target whenever the .config is absent
+# (either because the tree was never extracted, or because it was cleaned).
+# The target is idempotent: if the directory already exists make skips the
+# tarball extraction but still copies the base config and runs olddefconfig.
+if [ ! -f "${KERNEL_SOURCES}/.config" ]; then
+  echo "    Phase 1: preparing kernel source tree and base .config..."
+  # Remove the directory so make re-runs the full $(KERNEL_SOURCES) recipe
+  # (extract + patch + config copy + olddefconfig).
+  rm -rf "${KERNEL_SOURCES}"
+  make "${KERNEL_SOURCES}"
+else
+  echo "    Phase 1: kernel source tree and .config already present, skipping"
+fi
+
+# Phase 2: merge the openshell fragment on top
+if [ -f openshell.kconfig ]; then
+  echo "    Phase 2: merging openshell.kconfig fragment..."
+
+  # merge_config.sh must be called with ARCH set so it finds the right Kconfig
+  # entry points. -m means "merge into existing .config" (vs starting fresh).
+  ARCH=arm64 KCONFIG_CONFIG="${KERNEL_SOURCES}/.config" \
+    "${KERNEL_SOURCES}/scripts/kconfig/merge_config.sh" \
+    -m -O "${KERNEL_SOURCES}" \
+    "${KERNEL_SOURCES}/.config" \
+    openshell.kconfig
+
+  # Re-run olddefconfig to fill in any new symbols introduced by the fragment.
+  make -C "${KERNEL_SOURCES}" ARCH=arm64 olddefconfig
+
+  # Verify that the key options were actually applied.
+  all_ok=true
+  for opt in CONFIG_BRIDGE CONFIG_NETFILTER CONFIG_NF_NAT; do
+    val="$(grep "^${opt}=" "${KERNEL_SOURCES}/.config" 2>/dev/null || true)"
+    if [ -n "$val" ]; then
+      echo "    ${opt}: ${val#*=}"
+    else
+      echo "    WARNING: ${opt} not set after merge!" >&2
+      all_ok=false
+    fi
+  done
+  if [ "$all_ok" = false ]; then
+    echo "ERROR: kernel config fragment merge failed — required options missing" >&2
+    exit 1
+  fi
+
+  # The kernel binary and kernel.c from the previous (bad) build must be
+  # removed so make rebuilds them with the updated .config.
+  rm -f kernel.c "${KERNEL_SOURCES}/arch/arm64/boot/Image" \
+        "${KERNEL_SOURCES}/vmlinux" libkrunfw.so*
+fi
+
+# Phase 3: build kernel image, kernel.c bundle, and the shared library
+make -j"$(nproc)"
+
+# Copy output
+cp libkrunfw.so* "$OUTPUT_DIR/"
+echo "    Built: $(ls "$OUTPUT_DIR"/libkrunfw.so* | xargs -n1 basename | tr '\n' ' ')"
+
+cd "$BUILD_DIR"
+
+# ── Build libkrun (VMM) ─────────────────────────────────────────────────
+
+echo ""
+echo "==> Building libkrun..."
+
+if [ ! -d libkrun ]; then
+  echo "    Cloning libkrun..."
+  git clone --depth 1 https://github.com/containers/libkrun.git
+fi
+
+cd libkrun
+
+# Build with NET support for gvproxy networking and BLK support for the
+# host-backed state disk.
+echo "    Building libkrun with NET=1 BLK=1..."
+
+# Locate libclang for clang-sys if LIBCLANG_PATH isn't already set.
+# clang-sys looks for libclang.so or libclang-*.so; on Debian/Ubuntu the
+# versioned file (e.g. libclang-18.so.18) lives under the LLVM lib dir.
+if [ -z "${LIBCLANG_PATH:-}" ]; then
+  for llvm_lib in /usr/lib/llvm-*/lib; do
+    if ls "$llvm_lib"/libclang*.so* &>/dev/null; then
+      export LIBCLANG_PATH="$llvm_lib"
+      echo "    LIBCLANG_PATH=$LIBCLANG_PATH"
+      break
+    fi
+  done
+fi
+
+make NET=1 BLK=1 -j"$(nproc)"
+
+# Copy output
+cp target/release/libkrun.so "$OUTPUT_DIR/"
+echo "    Built: libkrun.so"
+
+cd "$BUILD_DIR"
+
+# ── Summary ─────────────────────────────────────────────────────────────
+
+echo ""
+echo "==> Build complete!"
+echo "    Output directory: ${OUTPUT_DIR}"
+echo ""
+echo "    Artifacts:"
+ls -lah "$OUTPUT_DIR"/*.so*
+
+echo ""
+echo "Next step: mise run vm:build"
diff --git a/tasks/scripts/vm/build-rootfs-tarball.sh b/tasks/scripts/vm/build-rootfs-tarball.sh
new file mode 100755
index 000000000..76e4f6297
--- /dev/null
+++ b/tasks/scripts/vm/build-rootfs-tarball.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Build rootfs and compress to tarball for embedding in openshell-vm binary.
+#
+# This script:
+# 1. Builds the rootfs using build-rootfs.sh
+# 2. Compresses it to a zstd tarball for embedding
+#
+# Usage:
+#   ./build-rootfs-tarball.sh [--base]
+#
+# Options:
+#   --base      Build a base rootfs (~200-300MB) without pre-loaded images.
+#               First boot will be slower but binary size is much smaller.
+#               Default: full rootfs with pre-loaded images (~2GB+).
+#
+# The resulting tarball is placed at target/vm-runtime-compressed/rootfs.tar.zst
+# for inclusion in the embedded binary build.
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+ROOTFS_BUILD_DIR="${ROOT}/target/rootfs-build"
+OUTPUT_DIR="${ROOT}/target/vm-runtime-compressed"
+OUTPUT="${OUTPUT_DIR}/rootfs.tar.zst"
+
+# Parse arguments
+BASE_ONLY=false
+for arg in "$@"; do
+    case "$arg" in
+        --base)
+            BASE_ONLY=true
+            ;;
+        --help|-h)
+            echo "Usage: $0 [--base]"
+            echo ""
+            echo "Options:"
+            echo "  --base   Build base rootfs (~200-300MB) without pre-loaded images"
+            echo "           First boot will be slower but binary size is much smaller"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $arg"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+# Check for Docker
+if ! command -v docker &>/dev/null; then
+    echo "Error: Docker is required to build the rootfs" >&2
+    echo "Please install Docker and try again" >&2
+    exit 1
+fi
+
+# Check if Docker daemon is running
+if ! docker info &>/dev/null; then
+    echo "Error: Docker daemon is not running" >&2
+    echo "Please start Docker and try again" >&2
+    exit 1
+fi
+
+if [ "$BASE_ONLY" = true ]; then
+    echo "==> Building BASE rootfs for embedding"
+    echo "    Build dir: ${ROOTFS_BUILD_DIR}"
+    echo "    Output:    ${OUTPUT}"
+    echo "    Mode:      base (no pre-loaded images, ~200-300MB)"
+    echo ""
+    
+    # Build base rootfs
+    echo "==> Step 1/2: Building base rootfs..."
+    "${ROOT}/crates/openshell-vm/scripts/build-rootfs.sh" --base "${ROOTFS_BUILD_DIR}"
+else
+    echo "==> Building FULL rootfs for embedding"
+    echo "    Build dir: ${ROOTFS_BUILD_DIR}"
+    echo "    Output:    ${OUTPUT}"
+    echo "    Mode:      full (pre-loaded images, pre-initialized, ~2GB+)"
+    echo ""
+    
+    # Build full rootfs
+    echo "==> Step 1/2: Building full rootfs (this may take 10-15 minutes)..."
+    "${ROOT}/crates/openshell-vm/scripts/build-rootfs.sh" "${ROOTFS_BUILD_DIR}"
+fi
+
+# Compress to tarball
+echo ""
+echo "==> Step 2/2: Compressing rootfs to tarball..."
+mkdir -p "${OUTPUT_DIR}"
+
+# Remove existing tarball if present
+rm -f "${OUTPUT}"
+
+# Get uncompressed size for display
+echo "    Uncompressed size: $(du -sh "${ROOTFS_BUILD_DIR}" | cut -f1)"
+
+# Create tarball with zstd compression
+# -19 = high compression (slower but smaller)
+# -T0 = use all available threads
+echo "    Compressing with zstd (level 19, this may take a few minutes)..."
+tar -C "${ROOTFS_BUILD_DIR}" -cf - . | zstd -19 -T0 -o "${OUTPUT}"
+
+# Report results
+echo ""
+echo "==> Rootfs tarball created successfully!"
+echo "    Output:     ${OUTPUT}"
+echo "    Compressed: $(du -sh "${OUTPUT}" | cut -f1)"
+if [ "$BASE_ONLY" = true ]; then
+    echo "    Type:       base (first boot ~30-60s, images pulled on demand)"
+else
+    echo "    Type:       full (first boot ~3-5s, images pre-loaded)"
+fi
+echo ""
+echo "Next step: mise run vm:build"
diff --git a/tasks/scripts/vm/bundle-vm-runtime.sh b/tasks/scripts/vm/bundle-vm-runtime.sh
new file mode 100755
index 000000000..6c21e511d
--- /dev/null
+++ b/tasks/scripts/vm/bundle-vm-runtime.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Stage the openshell-vm sidecar runtime bundle next to local build outputs.
+#
+# Copies the uncompressed VM runtime libraries (libkrun, libkrunfw, gvproxy)
+# from target/vm-runtime/ into the .runtime sidecar directories alongside
+# each build output.  This is required for:
+#   - build-rootfs.sh pre-initialization (boots the real VM to pre-bake k3s state)
+#   - Direct invocation of target/debug/openshell-vm without embedding
+#
+# The source artifacts are collected by compress-vm-runtime.sh into
+# target/vm-runtime/ before compression; this script re-uses that work dir.
+#
+# Usage:
+#   ./tasks/scripts/vm/bundle-vm-runtime.sh
+
+set -euo pipefail
+
+ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+
+SOURCE_DIR="${ROOT}/target/vm-runtime"
+
+if [ ! -d "${SOURCE_DIR}" ]; then
+    echo "ERROR: VM runtime source not found at ${SOURCE_DIR}"
+    echo "       Run: mise run vm:setup"
+    exit 1
+fi
+
+# Verify required files are present
+for required in libkrun.so gvproxy; do
+    if ! ls "${SOURCE_DIR}/${required}" >/dev/null 2>&1; then
+        # Try platform-specific variants
+        if [ "$required" = "libkrun.so" ] && ls "${SOURCE_DIR}"/libkrun.dylib >/dev/null 2>&1; then
+            continue
+        fi
+        echo "ERROR: Required runtime file not found: ${SOURCE_DIR}/${required}"
+        echo "       Run: mise run vm:setup"
+        exit 1
+    fi
+done
+
+TARGETS=(
+    "${ROOT}/target/debug"
+    "${ROOT}/target/release"
+)
+
+for target_dir in "${TARGETS[@]}"; do
+    # Only stage if the binary exists (avoid creating orphan runtime dirs)
+    if [ ! -f "${target_dir}/openshell-vm" ] && [ ! -f "${target_dir}/openshell-vm.d" ]; then
+        continue
+    fi
+
+    runtime_dir="${target_dir}/openshell-vm.runtime"
+    mkdir -p "${runtime_dir}"
+
+    for file in "${SOURCE_DIR}"/*; do
+        [ -f "$file" ] || continue
+        name="$(basename "$file")"
+        install -m 0755 "$file" "${runtime_dir}/${name}"
+    done
+
+    echo "staged runtime bundle in ${runtime_dir}"
+done
diff --git a/tasks/scripts/vm/codesign-openshell-vm.sh b/tasks/scripts/vm/codesign-openshell-vm.sh
new file mode 100755
index 000000000..0aeeca9b1
--- /dev/null
+++ b/tasks/scripts/vm/codesign-openshell-vm.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+if [ "$(uname -s)" != "Darwin" ]; then
+  exit 0
+fi
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+codesign --entitlements "${ROOT}/crates/openshell-vm/entitlements.plist" --force -s - "${ROOT}/target/debug/openshell-vm"
diff --git a/tasks/scripts/vm/compress-vm-runtime.sh b/tasks/scripts/vm/compress-vm-runtime.sh
new file mode 100755
index 000000000..67290a936
--- /dev/null
+++ b/tasks/scripts/vm/compress-vm-runtime.sh
@@ -0,0 +1,246 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Gather VM runtime artifacts from local sources and compress for embedding.
+#
+# This script collects libkrun, libkrunfw, and gvproxy from local sources
+# (Homebrew on macOS, built from source on Linux) and compresses them with
+# zstd for embedding into the openshell-vm binary.
+#
+# Usage:
+#   ./compress-vm-runtime.sh
+#
+# Environment:
+#   OPENSHELL_VM_RUNTIME_COMPRESSED_DIR - Output directory (default: target/vm-runtime-compressed)
+#   VM_RUNTIME_TARBALL - Path to a pre-built vm-runtime-*.tar.zst tarball.
+#                        When set, the script extracts and re-compresses
+#                        artifacts from this tarball instead of looking for
+#                        local builds.  Used by CI and download-kernel-runtime.sh.
+#
+# The script sets OPENSHELL_VM_RUNTIME_COMPRESSED_DIR for use by build.rs.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/_lib.sh"
+ROOT="$(vm_lib_root)"
+
+# Source pins for gvproxy version
+source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true
+GVPROXY_VERSION="${GVPROXY_VERSION:-v0.8.8}"
+
+# ── macOS dylib portability helpers ─────────────────────────────────────
+
+# Make a dylib portable by rewriting paths to use @loader_path
+make_dylib_portable() {
+    local dylib="$1"
+    local dylib_name
+    dylib_name="$(basename "$dylib")"
+    
+    # Rewrite install name
+    install_name_tool -id "@loader_path/${dylib_name}" "$dylib" 2>/dev/null || true
+    
+    # Rewrite libkrunfw reference if present
+    local krunfw_path
+    krunfw_path=$(otool -L "$dylib" 2>/dev/null | grep libkrunfw | awk '{print $1}' || true)
+    if [ -n "$krunfw_path" ] && [[ "$krunfw_path" != @* ]]; then
+        install_name_tool -change "$krunfw_path" "@loader_path/libkrunfw.dylib" "$dylib"
+    fi
+    
+    # Re-codesign
+    codesign -f -s - "$dylib" 2>/dev/null || true
+}
+
+WORK_DIR="${ROOT}/target/vm-runtime"
+OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}"
+
+rm -rf "$WORK_DIR"
+mkdir -p "$WORK_DIR" "$OUTPUT_DIR"
+
+# ── Fast path: pre-built tarball from CI or download-kernel-runtime.sh ──
+
+if [ -n "${VM_RUNTIME_TARBALL:-}" ]; then
+    echo "==> Using pre-built runtime tarball: ${VM_RUNTIME_TARBALL}"
+
+    if [ ! -f "${VM_RUNTIME_TARBALL}" ]; then
+        echo "Error: VM_RUNTIME_TARBALL not found: ${VM_RUNTIME_TARBALL}" >&2
+        exit 1
+    fi
+
+    # Extract tarball contents
+    zstd -d "${VM_RUNTIME_TARBALL}" --stdout | tar -xf - -C "$WORK_DIR"
+
+    echo "    Extracted files:"
+    ls -lah "$WORK_DIR"
+
+    echo ""
+    compress_dir "$WORK_DIR" "$OUTPUT_DIR"
+
+    # Check for rootfs tarball (built separately)
+    ROOTFS_TARBALL="${OUTPUT_DIR}/rootfs.tar.zst"
+    if [ -f "$ROOTFS_TARBALL" ]; then
+        echo "    rootfs.tar.zst: $(du -h "$ROOTFS_TARBALL" | cut -f1) (pre-built)"
+    else
+        echo ""
+        echo "Note: rootfs.tar.zst not found."
+        echo "      To build one, run: mise run vm:rootfs -- --base"
+    fi
+
+    echo ""
+    echo "==> Compressed artifacts in ${OUTPUT_DIR}:"
+    ls -lah "$OUTPUT_DIR"
+    TOTAL=$(du -sh "$OUTPUT_DIR" | cut -f1)
+    echo ""
+    echo "==> Total compressed size: ${TOTAL}"
+    echo ""
+    echo "Next step: mise run vm:build"
+    exit 0
+fi
+
+echo "==> Detecting platform..."
+
+case "$(uname -s)-$(uname -m)" in
+  Darwin-arm64)
+    PLATFORM="darwin-aarch64"
+    echo "    Platform: macOS ARM64"
+    
+    # Source priority for libkrun:
+    # 1. Custom build from build-libkrun-macos.sh (portable, no GPU deps)
+    # 2. Custom runtime with custom libkrunfw
+    LIBKRUN_BUILD_DIR="${ROOT}/target/libkrun-build"
+    CUSTOM_DIR="${ROOT}/target/custom-runtime"
+    BREW_PREFIX="$(brew --prefix 2>/dev/null || echo /opt/homebrew)"
+    
+    if [ -f "${LIBKRUN_BUILD_DIR}/libkrun.dylib" ]; then
+      echo "    Using portable libkrun from ${LIBKRUN_BUILD_DIR}"
+      cp "${LIBKRUN_BUILD_DIR}/libkrun.dylib" "$WORK_DIR/"
+      cp "${LIBKRUN_BUILD_DIR}/libkrunfw.dylib" "$WORK_DIR/"
+      
+      # Verify portability
+      if otool -L "${LIBKRUN_BUILD_DIR}/libkrun.dylib" | grep -q "/opt/homebrew"; then
+        echo "    Warning: libkrun has hardcoded Homebrew paths - may not be portable"
+      else
+        echo "    ✓ libkrun is portable (no hardcoded paths)"
+      fi
+    elif [ -f "${CUSTOM_DIR}/provenance.json" ]; then
+      echo "    Using custom runtime from ${CUSTOM_DIR}"
+      
+      # libkrun from Homebrew (needs path rewriting for portability)
+      if [ -f "${CUSTOM_DIR}/libkrun.dylib" ]; then
+        cp "${CUSTOM_DIR}/libkrun.dylib" "$WORK_DIR/"
+      else
+        cp "${BREW_PREFIX}/lib/libkrun.dylib" "$WORK_DIR/"
+        make_dylib_portable "$WORK_DIR/libkrun.dylib"
+      fi
+      
+      # libkrunfw from custom build
+      cp "${CUSTOM_DIR}/libkrunfw.dylib" "$WORK_DIR/"
+    else
+      echo "Error: No portable libkrun build found." >&2
+      echo "       Run: FROM_SOURCE=1 mise run vm:setup" >&2
+      exit 1
+    fi
+    
+    # Normalize libkrunfw naming - ensure both names exist for build.rs
+    # build.rs expects libkrunfw.5.dylib.zst; some builds produce libkrunfw.dylib
+    if [ ! -f "$WORK_DIR/libkrunfw.dylib" ] && [ -f "$WORK_DIR/libkrunfw.5.dylib" ]; then
+      cp "$WORK_DIR/libkrunfw.5.dylib" "$WORK_DIR/libkrunfw.dylib"
+    fi
+    if [ ! -f "$WORK_DIR/libkrunfw.5.dylib" ] && [ -f "$WORK_DIR/libkrunfw.dylib" ]; then
+      cp "$WORK_DIR/libkrunfw.dylib" "$WORK_DIR/libkrunfw.5.dylib"
+    fi
+    
+    # gvproxy - prefer Podman, fall back to Homebrew
+    if [ -x /opt/podman/bin/gvproxy ]; then
+      cp /opt/podman/bin/gvproxy "$WORK_DIR/"
+      echo "    Using gvproxy from Podman"
+    elif [ -x "${BREW_PREFIX}/bin/gvproxy" ]; then
+      cp "${BREW_PREFIX}/bin/gvproxy" "$WORK_DIR/"
+      echo "    Using gvproxy from Homebrew"
+    else
+      echo "Error: gvproxy not found. Install Podman Desktop or run: brew install gvproxy" >&2
+      exit 1
+    fi
+    ;;
+    
+  Linux-*)
+    ARCH="$(uname -m)"
+    case "$ARCH" in
+      aarch64) GVPROXY_ARCH="arm64" ;;
+      x86_64)  GVPROXY_ARCH="amd64" ;;
+      *)
+        echo "Error: Unsupported Linux architecture: ${ARCH}" >&2
+        exit 1
+        ;;
+    esac
+    PLATFORM="linux-${ARCH}"
+    echo "    Platform: Linux ${ARCH}"
+    
+    BUILD_DIR="${ROOT}/target/libkrun-build"
+    if [ ! -f "${BUILD_DIR}/libkrun.so" ]; then
+      echo "Error: libkrun not found. Run: FROM_SOURCE=1 mise run vm:setup" >&2
+      exit 1
+    fi
+    
+    cp "${BUILD_DIR}/libkrun.so" "$WORK_DIR/"
+    
+    # Copy libkrunfw - find the versioned .so file
+    for krunfw in "${BUILD_DIR}"/libkrunfw.so*; do
+      [ -f "$krunfw" ] || continue
+      cp "$krunfw" "$WORK_DIR/"
+    done
+    
+    # Ensure the soname symlink (libkrunfw.so.5) exists alongside the fully
+    # versioned file (libkrunfw.so.5.x.y). libloading loads by soname.
+    if [ ! -f "$WORK_DIR/libkrunfw.so.5" ]; then
+      versioned=$(ls "$WORK_DIR"/libkrunfw.so.5.* 2>/dev/null | head -n1)
+      if [ -n "$versioned" ]; then
+        cp "$versioned" "$WORK_DIR/libkrunfw.so.5"
+      fi
+    fi
+
+    # Download gvproxy if not present
+    if [ ! -f "$WORK_DIR/gvproxy" ]; then
+      echo "    Downloading gvproxy for linux-${GVPROXY_ARCH}..."
+      curl -fsSL -o "$WORK_DIR/gvproxy" \
+        "https://github.com/containers/gvisor-tap-vsock/releases/download/${GVPROXY_VERSION}/gvproxy-linux-${GVPROXY_ARCH}"
+      chmod +x "$WORK_DIR/gvproxy"
+    fi
+    ;;
+    
+  *)
+    echo "Error: Unsupported platform: $(uname -s)-$(uname -m)" >&2
+    echo "Supported platforms: Darwin-arm64, Linux-aarch64, Linux-x86_64" >&2
+    exit 1
+    ;;
+esac
+
+echo ""
+echo "==> Collected artifacts:"
+ls -lah "$WORK_DIR"
+
+echo ""
+compress_dir "$WORK_DIR" "$OUTPUT_DIR"
+
+# Check for rootfs tarball (built separately by build-rootfs-tarball.sh)
+ROOTFS_TARBALL="${OUTPUT_DIR}/rootfs.tar.zst"
+if [ -f "$ROOTFS_TARBALL" ]; then
+    echo "    rootfs.tar.zst: $(du -h "$ROOTFS_TARBALL" | cut -f1) (pre-built)"
+else
+    echo ""
+    echo "Note: rootfs.tar.zst not found."
+      echo "      To build one, run: mise run vm:rootfs -- --base"
+      echo "      Without it, the binary will still work but require the rootfs"
+      echo "      to be built separately on first run."
+fi
+
+echo ""
+echo "==> Compressed artifacts in ${OUTPUT_DIR}:"
+ls -lah "$OUTPUT_DIR"
+
+TOTAL=$(du -sh "$OUTPUT_DIR" | cut -f1)
+echo ""
+echo "==> Total compressed size: ${TOTAL}"
+echo ""
+echo "Next step: mise run vm:build"
diff --git a/tasks/scripts/vm/download-kernel-runtime.sh b/tasks/scripts/vm/download-kernel-runtime.sh
new file mode 100755
index 000000000..8f0427af9
--- /dev/null
+++ b/tasks/scripts/vm/download-kernel-runtime.sh
@@ -0,0 +1,141 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Download pre-built VM kernel runtime artifacts from the vm-dev GitHub Release
+# and stage them for the openshell-vm cargo build.
+#
+# This script is used by CI (release-vm-dev.yml) and can also be used locally
+# to avoid building libkrun/libkrunfw from source.
+#
+# Usage:
+#   ./download-kernel-runtime.sh [--platform PLATFORM]
+#
+# Environment:
+#   VM_RUNTIME_RELEASE_TAG  - GitHub Release tag (default: vm-dev)
+#   GITHUB_REPOSITORY       - owner/repo (default: NVIDIA/OpenShell)
+#   OPENSHELL_VM_RUNTIME_COMPRESSED_DIR - Output directory (default: target/vm-runtime-compressed)
+#
+# Platforms: linux-aarch64, linux-x86_64, darwin-aarch64
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/_lib.sh"
+ROOT="$(vm_lib_root)"
+
+RELEASE_TAG="${VM_RUNTIME_RELEASE_TAG:-vm-dev}"
+REPO="${GITHUB_REPOSITORY:-NVIDIA/OpenShell}"
+OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}"
+
+# ── Auto-detect platform (detect_platform from _lib.sh) ─────────────────
+
+PLATFORM=""
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --platform)
+            PLATFORM="$2"; shift 2 ;;
+        --help|-h)
+            echo "Usage: $0 [--platform PLATFORM]"
+            echo ""
+            echo "Download pre-built VM kernel runtime from the vm-dev GitHub Release."
+            echo ""
+            echo "Platforms: linux-aarch64, linux-x86_64, darwin-aarch64"
+            echo ""
+            echo "Environment:"
+            echo "  VM_RUNTIME_RELEASE_TAG              Release tag (default: vm-dev)"
+            echo "  GITHUB_REPOSITORY                   owner/repo (default: NVIDIA/OpenShell)"
+            echo "  OPENSHELL_VM_RUNTIME_COMPRESSED_DIR Output directory"
+            exit 0
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2; exit 1 ;;
+    esac
+done
+
+if [ -z "$PLATFORM" ]; then
+    PLATFORM="$(detect_platform)"
+fi
+
+TARBALL_NAME="vm-runtime-${PLATFORM}.tar.zst"
+
+echo "==> Downloading VM kernel runtime"
+echo "    Repository: ${REPO}"
+echo "    Release:    ${RELEASE_TAG}"
+echo "    Platform:   ${PLATFORM}"
+echo "    Artifact:   ${TARBALL_NAME}"
+echo "    Output:     ${OUTPUT_DIR}"
+echo ""
+
+# ── Check for gh CLI ────────────────────────────────────────────────────
+
+if ! command -v gh &>/dev/null; then
+    echo "Error: GitHub CLI (gh) is required." >&2
+    echo "  Install: https://cli.github.com/" >&2
+    exit 1
+fi
+
+# ── Download the runtime tarball ────────────────────────────────────────
+
+DOWNLOAD_DIR="${ROOT}/target/vm-runtime-download"
+mkdir -p "$DOWNLOAD_DIR" "$OUTPUT_DIR"
+
+echo "==> Downloading ${TARBALL_NAME} from ${RELEASE_TAG}..."
+gh release download "${RELEASE_TAG}" \
+    --repo "${REPO}" \
+    --pattern "${TARBALL_NAME}" \
+    --dir "${DOWNLOAD_DIR}" \
+    --clobber
+
+if [ ! -f "${DOWNLOAD_DIR}/${TARBALL_NAME}" ]; then
+    echo "Error: Download failed — ${TARBALL_NAME} not found." >&2
+    echo "" >&2
+    echo "The vm-dev release may not have kernel runtime artifacts yet." >&2
+    echo "Run the 'Release VM Kernel' workflow first:" >&2
+    echo "  gh workflow run release-vm-kernel.yml" >&2
+    exit 1
+fi
+
+echo "    Downloaded: $(du -sh "${DOWNLOAD_DIR}/${TARBALL_NAME}" | cut -f1)"
+
+# ── Extract and stage for cargo build ───────────────────────────────────
+
+echo ""
+echo "==> Extracting runtime artifacts..."
+
+EXTRACT_DIR="${ROOT}/target/vm-runtime-extracted"
+rm -rf "$EXTRACT_DIR"
+mkdir -p "$EXTRACT_DIR"
+
+zstd -d "${DOWNLOAD_DIR}/${TARBALL_NAME}" --stdout | tar -xf - -C "$EXTRACT_DIR"
+
+echo "    Extracted files:"
+ls -lah "$EXTRACT_DIR"
+
+# ── Compress individual files for embedding ─────────────────────────────
+# The cargo build expects individual .zst files (libkrun.so.zst, etc.)
+# in OPENSHELL_VM_RUNTIME_COMPRESSED_DIR. The downloaded tarball contains
+# the raw libraries, so we re-compress each one.
+
+echo ""
+compress_dir "$EXTRACT_DIR" "$OUTPUT_DIR"
+
+# ── Check for rootfs (may already be present from a separate build step) ──
+
+if [ -f "${OUTPUT_DIR}/rootfs.tar.zst" ]; then
+    echo ""
+    echo "    rootfs.tar.zst: $(du -h "${OUTPUT_DIR}/rootfs.tar.zst" | cut -f1) (pre-existing)"
+else
+    echo ""
+    echo "Note: rootfs.tar.zst not found in ${OUTPUT_DIR}."
+    echo "      Build it with: mise run vm:rootfs -- --base"
+fi
+
+echo ""
+echo "==> Staged artifacts in ${OUTPUT_DIR}:"
+ls -lah "$OUTPUT_DIR"
+
+echo ""
+echo "==> Done."
+echo ""
+echo "Next step: mise run vm:build"
diff --git a/tasks/scripts/vm/ensure-vm-rootfs.sh b/tasks/scripts/vm/ensure-vm-rootfs.sh
new file mode 100755
index 000000000..3cf9ddfc6
--- /dev/null
+++ b/tasks/scripts/vm/ensure-vm-rootfs.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+GATEWAY_BIN="${ROOT}/target/debug/openshell-vm"
+
+NAME="default"
+ROOTFS_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --name)
+      NAME="$2"
+      shift 2
+      ;;
+    --name=*)
+      NAME="${1#--name=}"
+      shift
+      ;;
+    --rootfs)
+      ROOTFS_ARGS=("$1" "$2")
+      shift 2
+      ;;
+    --rootfs=*)
+      ROOTFS_ARGS=("$1")
+      shift
+      ;;
+    *)
+      echo "Unknown argument: $1" >&2
+      exit 1
+      ;;
+  esac
+done
+
+if [ ! -x "${GATEWAY_BIN}" ]; then
+  echo "ERROR: openshell-vm binary not found at ${GATEWAY_BIN}" >&2
+  echo "       Run: mise run vm:build" >&2
+  exit 1
+fi
+
+prepare_args=(--name "${NAME}")
+if [ "${#ROOTFS_ARGS[@]}" -gt 0 ]; then
+  prepare_args=("${ROOTFS_ARGS[@]}" "${prepare_args[@]}")
+fi
+if [ "${OPENSHELL_VM_FORCE_ROOTFS_REBUILD:-}" = "1" ]; then
+  prepare_args+=(prepare-rootfs --force)
+else
+  prepare_args+=(prepare-rootfs)
+fi
+
+if ROOTFS_PATH="$("${GATEWAY_BIN}" "${prepare_args[@]}" 2>/dev/null)"; then
+  echo "using openshell-vm rootfs at ${ROOTFS_PATH}"
+  exit 0
+fi
+
+# prepare-rootfs failed — no embedded rootfs in the binary.
+# Fall back to target/rootfs-build if it exists (rootfs was built separately
+# but not yet compressed for embedding via mise run vm:rootfs).
+if [ "${#ROOTFS_ARGS[@]}" -eq 0 ]; then
+  FALLBACK_ROOTFS="${ROOT}/target/rootfs-build"
+  if [ -d "${FALLBACK_ROOTFS}/srv" ]; then
+    echo "using openshell-vm rootfs at ${FALLBACK_ROOTFS}"
+    exit 0
+  fi
+fi
+
+echo "ERROR: No rootfs available." >&2
+echo "       Run: mise run vm:rootfs -- --base   # build rootfs (~5-10 min, requires Docker)" >&2
+exit 1
diff --git a/tasks/scripts/vm/package-vm-runtime.sh b/tasks/scripts/vm/package-vm-runtime.sh
new file mode 100755
index 000000000..f97eec870
--- /dev/null
+++ b/tasks/scripts/vm/package-vm-runtime.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Package VM runtime artifacts into a release tarball.
+#
+# Used by CI (release-vm-kernel.yml) to bundle libkrun, libkrunfw, and gvproxy
+# into a platform-specific tarball for the vm-dev GitHub Release. Handles
+# gvproxy download, provenance metadata generation, and tarball creation.
+#
+# Usage:
+#   ./package-vm-runtime.sh --platform <PLATFORM> --build-dir <DIR> --output <FILE>
+#
+# Arguments:
+#   --platform    One of: linux-aarch64, linux-x86_64, darwin-aarch64
+#   --build-dir   Directory containing built libkrun and libkrunfw artifacts
+#   --output      Path for the output .tar.zst file
+#
+# Environment (optional, for provenance):
+#   GITHUB_SHA      - Git commit SHA
+#   GITHUB_RUN_ID   - GitHub Actions run ID
+#   CUSTOM_PROVENANCE_DIR - Directory containing provenance.json from custom
+#                           libkrunfw build (macOS only)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/_lib.sh"
+ROOT="$(vm_lib_root)"
+
+# Source pins for gvproxy version
+source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true
+GVPROXY_VERSION="${GVPROXY_VERSION:-v0.8.8}"
+
+PLATFORM=""
+BUILD_DIR=""
+OUTPUT=""
+CUSTOM_PROVENANCE_DIR="${CUSTOM_PROVENANCE_DIR:-}"
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --platform)     PLATFORM="$2"; shift 2 ;;
+        --build-dir)    BUILD_DIR="$2"; shift 2 ;;
+        --output)       OUTPUT="$2"; shift 2 ;;
+        --help|-h)
+            echo "Usage: $0 --platform <PLATFORM> --build-dir <DIR> --output <FILE>"
+            echo ""
+            echo "Package VM runtime artifacts into a release tarball."
+            echo ""
+            echo "Platforms: linux-aarch64, linux-x86_64, darwin-aarch64"
+            exit 0
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2; exit 1 ;;
+    esac
+done
+
+if [ -z "$PLATFORM" ] || [ -z "$BUILD_DIR" ] || [ -z "$OUTPUT" ]; then
+    echo "Error: --platform, --build-dir, and --output are all required" >&2
+    exit 1
+fi
+
+echo "==> Packaging VM runtime"
+echo "    Platform:  ${PLATFORM}"
+echo "    Build dir: ${BUILD_DIR}"
+echo "    Output:    ${OUTPUT}"
+echo ""
+
+# ── Create staging directory ────────────────────────────────────────────
+
+PACKAGE_DIR="$(mktemp -d)"
+trap 'rm -rf "$PACKAGE_DIR"' EXIT
+
+# ── Copy runtime libraries ──────────────────────────────────────────────
+
+case "$PLATFORM" in
+    linux-*)
+        cp "${BUILD_DIR}/libkrun.so" "${PACKAGE_DIR}/"
+        # Copy libkrunfw — find versioned .so and create soname symlink
+        for f in "${BUILD_DIR}"/libkrunfw.so*; do
+            [ -f "$f" ] && cp "$f" "${PACKAGE_DIR}/"
+        done
+        if [ ! -f "${PACKAGE_DIR}/libkrunfw.so.5" ]; then
+            versioned="$(ls "${PACKAGE_DIR}"/libkrunfw.so.5.* 2>/dev/null | head -n1 || true)"
+            [ -n "$versioned" ] && cp "$versioned" "${PACKAGE_DIR}/libkrunfw.so.5"
+        fi
+        ;;
+    darwin-aarch64)
+        cp "${BUILD_DIR}/libkrun.dylib" "${PACKAGE_DIR}/"
+        # libkrunfw — prefer build dir, fall back to custom runtime dir
+        candidates=("${BUILD_DIR}/libkrunfw.dylib" "${BUILD_DIR}/libkrunfw.5.dylib")
+        if [ -n "$CUSTOM_PROVENANCE_DIR" ]; then
+            candidates+=("${CUSTOM_PROVENANCE_DIR}/libkrunfw.dylib" "${CUSTOM_PROVENANCE_DIR}/libkrunfw.5.dylib")
+        fi
+        for candidate in "${candidates[@]}"; do
+            if [ -f "$candidate" ]; then
+                cp "$candidate" "${PACKAGE_DIR}/"
+            fi
+        done
+        ;;
+    *)
+        echo "Error: Unknown platform: ${PLATFORM}" >&2
+        exit 1
+        ;;
+esac
+
+# ── Download gvproxy ────────────────────────────────────────────────────
+
+echo "==> Downloading gvproxy ${GVPROXY_VERSION} for ${PLATFORM}..."
+case "$PLATFORM" in
+    linux-aarch64)  GVPROXY_SUFFIX="linux-arm64" ;;
+    linux-x86_64)   GVPROXY_SUFFIX="linux-amd64" ;;
+    darwin-aarch64)  GVPROXY_SUFFIX="darwin" ;;
+esac
+
+curl -fsSL -o "${PACKAGE_DIR}/gvproxy" \
+    "https://github.com/containers/gvisor-tap-vsock/releases/download/${GVPROXY_VERSION}/gvproxy-${GVPROXY_SUFFIX}"
+chmod +x "${PACKAGE_DIR}/gvproxy"
+
+# ── Write provenance metadata ───────────────────────────────────────────
+
+echo "==> Writing provenance metadata..."
+
+LIBKRUNFW_COMMIT="unknown"
+KERNEL_VERSION="unknown"
+
+# Try custom provenance first (macOS builds produce this)
+if [ -n "$CUSTOM_PROVENANCE_DIR" ] && [ -f "${CUSTOM_PROVENANCE_DIR}/provenance.json" ]; then
+    LIBKRUNFW_COMMIT="$(jq -r '.libkrunfw_commit // "unknown"' "${CUSTOM_PROVENANCE_DIR}/provenance.json" 2>/dev/null || echo unknown)"
+    KERNEL_VERSION="$(jq -r '.kernel_version // "unknown"' "${CUSTOM_PROVENANCE_DIR}/provenance.json" 2>/dev/null || echo unknown)"
+fi
+
+# Fall back to inspecting the build directory (Linux builds)
+if [ "$LIBKRUNFW_COMMIT" = "unknown" ] && [ -d "${BUILD_DIR}/libkrunfw/.git" ]; then
+    LIBKRUNFW_COMMIT="$(git -C "${BUILD_DIR}/libkrunfw" rev-parse HEAD 2>/dev/null || echo unknown)"
+fi
+if [ "$KERNEL_VERSION" = "unknown" ] && [ -f "${BUILD_DIR}/libkrunfw/Makefile" ]; then
+    KERNEL_VERSION="$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' "${BUILD_DIR}/libkrunfw/Makefile" | head -1 | sed 's/.*= *//' || echo unknown)"
+fi
+
+if ! command -v jq &>/dev/null; then
+    echo "Error: jq is required for provenance generation" >&2
+    exit 1
+fi
+
+jq -n \
+    --arg artifact "vm-runtime" \
+    --arg platform "$PLATFORM" \
+    --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+    --arg kfw_commit "$LIBKRUNFW_COMMIT" \
+    --arg kver "$KERNEL_VERSION" \
+    --arg sha "${GITHUB_SHA:-unknown}" \
+    --arg run "${GITHUB_RUN_ID:-unknown}" \
+    '{artifact: $artifact, platform: $platform, build_timestamp: $ts, libkrunfw_commit: $kfw_commit, kernel_version: $kver, github_sha: $sha, github_run_id: $run}' \
+    > "${PACKAGE_DIR}/provenance.json"
+
+# ── Create tarball ──────────────────────────────────────────────────────
+
+echo "==> Creating tarball..."
+mkdir -p "$(dirname "$OUTPUT")"
+tar -C "${PACKAGE_DIR}" -cf - . | zstd -19 -T0 -o "$OUTPUT"
+
+echo ""
+echo "==> Packaged ${OUTPUT} ($(du -sh "$OUTPUT" | cut -f1))"
+echo "    Contents:"
+ls -lah "${PACKAGE_DIR}"
diff --git a/tasks/scripts/vm/run-vm.sh b/tasks/scripts/vm/run-vm.sh
new file mode 100755
index 000000000..630d1eecd
--- /dev/null
+++ b/tasks/scripts/vm/run-vm.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+RUNTIME_DIR="${ROOT}/target/debug/openshell-vm.runtime"
+GATEWAY_BIN="${ROOT}/target/debug/openshell-vm"
+
+if [ "$(uname -s)" = "Darwin" ]; then
+  export DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}"
+fi
+
+args=("$@")
+name="default"
+rootfs_args=()
+expect_name=0
+expect_rootfs=0
+subcommand=""
+skip_prepare=0
+
+for arg in "${args[@]}"; do
+  if [ "${expect_name}" -eq 1 ]; then
+    name="${arg}"
+    expect_name=0
+    continue
+  fi
+
+  if [ "${expect_rootfs}" -eq 1 ]; then
+    rootfs_args=(--rootfs "${arg}")
+    expect_rootfs=0
+    continue
+  fi
+
+  case "${arg}" in
+    --name)
+      expect_name=1
+      ;;
+    --name=*)
+      name="${arg#--name=}"
+      ;;
+    --rootfs)
+      expect_rootfs=1
+      ;;
+    --rootfs=*)
+      rootfs_args=("${arg}")
+      ;;
+    --help|-h|--version)
+      skip_prepare=1
+      ;;
+    exec|prepare-rootfs)
+      subcommand="${arg}"
+      break
+      ;;
+  esac
+done
+
+if [ "${skip_prepare}" -eq 0 ] && [ -z "${subcommand}" ]; then
+  prep_args=(--name "${name}")
+  if [ "${#rootfs_args[@]}" -gt 0 ]; then
+    prep_args=("${rootfs_args[@]}" "${prep_args[@]}")
+  fi
+  resolved_rootfs="$("${ROOT}/tasks/scripts/vm/ensure-vm-rootfs.sh" "${prep_args[@]}" \
+    | tail -n 1 | sed 's/^using openshell-vm rootfs at //')"
+  "${ROOT}/tasks/scripts/vm/sync-vm-rootfs.sh" "${prep_args[@]}"
+
+  # When no --rootfs was supplied by the caller, inject the resolved rootfs path
+  # so the binary finds the rootfs regardless of whether it is embedded.
+  if [ "${#rootfs_args[@]}" -eq 0 ] && [ -n "${resolved_rootfs}" ]; then
+    args=(--rootfs "${resolved_rootfs}" "${args[@]}")
+  fi
+fi
+
+exec "${GATEWAY_BIN}" "${args[@]}"
diff --git a/tasks/scripts/vm/sync-vm-rootfs.sh b/tasks/scripts/vm/sync-vm-rootfs.sh
new file mode 100755
index 000000000..fa13ee1e5
--- /dev/null
+++ b/tasks/scripts/vm/sync-vm-rootfs.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Sync mutable development artifacts into the existing VM rootfs.
+# Runs on every `mise run vm` so that script changes, helm chart
+# updates, manifest changes, and supervisor binary rebuilds are
+# picked up without a full rootfs rebuild.
+#
+# This is fast (<1s) — it only copies files, no Docker or VM boot.
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+SCRIPT_DIR="${ROOT}/crates/openshell-vm/scripts"
+IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-openshell}"
+IMAGE_TAG="${IMAGE_TAG:-dev}"
+SERVER_IMAGE="${IMAGE_REPO_BASE}/gateway:${IMAGE_TAG}"
+NAME="default"
+ROOTFS_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --name)
+            NAME="$2"
+            shift 2
+            ;;
+        --name=*)
+            NAME="${1#--name=}"
+            shift
+            ;;
+        --rootfs)
+            ROOTFS_ARGS=("$1" "$2")
+            shift 2
+            ;;
+        --rootfs=*)
+            ROOTFS_ARGS=("$1")
+            shift
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2
+            exit 1
+            ;;
+    esac
+done
+
+ensure_args=(--name "${NAME}")
+if [ "${#ROOTFS_ARGS[@]}" -gt 0 ]; then
+    ensure_args=("${ROOTFS_ARGS[@]}" "${ensure_args[@]}")
+fi
+
+if ! ROOTFS_DIR="$("${ROOT}/tasks/scripts/vm/ensure-vm-rootfs.sh" "${ensure_args[@]}" | tail -n 1 | sed 's/^using openshell-vm rootfs at //')"; then
+    echo "ERROR: ensure-vm-rootfs.sh failed — no rootfs available." >&2
+    exit 1
+fi
+
+patch_vm_helmchart() {
+    local helmchart="$1"
+    [ -f "${helmchart}" ] || return 0
+
+    sed_in_place() {
+        local expr="$1"
+        sed -i.bak -E "${expr}" "${helmchart}"
+        rm -f "${helmchart}.bak"
+    }
+
+    # Mirror the build-rootfs patching so the VM keeps using the locally
+    # imported openshell/gateway:dev image after incremental rootfs syncs.
+    sed_in_place 's|__IMAGE_PULL_POLICY__|IfNotPresent|g'
+    sed_in_place 's|__SANDBOX_IMAGE_PULL_POLICY__|"IfNotPresent"|g'
+    sed_in_place 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g'
+    sed_in_place "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|"
+    sed_in_place "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|"
+    sed_in_place 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g'
+    sed_in_place 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g'
+    sed_in_place 's|__DISABLE_GATEWAY_AUTH__|false|g'
+    sed_in_place 's|__DISABLE_TLS__|false|g'
+    sed_in_place 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g'
+    sed_in_place '/__CHART_CHECKSUM__/d'
+}
+
+if [ ! -d "${ROOTFS_DIR}/srv" ]; then
+    # Rootfs doesn't exist yet — nothing to sync. ensure-vm-rootfs.sh
+    # or build-rootfs.sh will create it.
+    exit 0
+fi
+
+echo "Syncing development artifacts into rootfs..."
+
+# ── Init scripts and utilities ─────────────────────────────────────────
+for script in openshell-vm-init.sh openshell-vm-exec-agent.py check-vm-capabilities.sh; do
+    src="${SCRIPT_DIR}/${script}"
+    dst="${ROOTFS_DIR}/srv/${script}"
+    if [ -f "$src" ]; then
+        if ! cmp -s "$src" "$dst" 2>/dev/null; then
+            cp "$src" "$dst"
+            chmod +x "$dst"
+            echo "  updated: /srv/${script}"
+        fi
+    fi
+done
+
+# ── Helm chart ─────────────────────────────────────────────────────────
+HELM_CHART_DIR="${ROOT}/deploy/helm/openshell"
+CHART_STAGING="${ROOTFS_DIR}/opt/openshell/charts"
+if [ -d "${HELM_CHART_DIR}" ]; then
+    mkdir -p "${CHART_STAGING}"
+    # Package into a temp dir and compare — only update if changed.
+    TMP_CHART=$(mktemp -d)
+    helm package "${HELM_CHART_DIR}" -d "${TMP_CHART}" >/dev/null 2>&1
+    for tgz in "${TMP_CHART}"/*.tgz; do
+        [ -f "$tgz" ] || continue
+        base=$(basename "$tgz")
+        if ! cmp -s "$tgz" "${CHART_STAGING}/${base}" 2>/dev/null; then
+            cp "$tgz" "${CHART_STAGING}/${base}"
+            echo "  updated: /opt/openshell/charts/${base}"
+        fi
+    done
+    rm -rf "${TMP_CHART}"
+fi
+
+# ── Kubernetes manifests ───────────────────────────────────────────────
+MANIFEST_SRC="${ROOT}/deploy/kube/manifests"
+MANIFEST_DST="${ROOTFS_DIR}/opt/openshell/manifests"
+if [ -d "${MANIFEST_SRC}" ]; then
+    mkdir -p "${MANIFEST_DST}"
+    for manifest in "${MANIFEST_SRC}"/*.yaml; do
+        [ -f "$manifest" ] || continue
+        base=$(basename "$manifest")
+        if ! cmp -s "$manifest" "${MANIFEST_DST}/${base}" 2>/dev/null; then
+            cp "$manifest" "${MANIFEST_DST}/${base}"
+            echo "  updated: /opt/openshell/manifests/${base}"
+        fi
+    done
+fi
+
+patch_vm_helmchart "${MANIFEST_DST}/openshell-helmchart.yaml"
+patch_vm_helmchart "${ROOTFS_DIR}/var/lib/rancher/k3s/server/manifests/openshell-helmchart.yaml"
+
+# ── Gateway image tarball ──────────────────────────────────────────────
+# The VM rootfs airgap-imports openshell/gateway:dev from k3s/agent/images/.
+# Keep that tarball in sync with the local Docker image so `mise run e2e:vm`
+# validates the current openshell-server code, not whatever image happened to
+# be baked into the rootfs last time it was rebuilt.
+SERVER_IMAGE_TAR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images/openshell-server.tar.zst"
+SERVER_IMAGE_ID_FILE="${ROOTFS_DIR}/opt/openshell/.gateway-image-id"
+if command -v docker >/dev/null 2>&1 && docker image inspect "${SERVER_IMAGE}" >/dev/null 2>&1; then
+    current_image_id=$(docker image inspect --format '{{.Id}}' "${SERVER_IMAGE}")
+    previous_image_id=""
+    if [ -f "${SERVER_IMAGE_ID_FILE}" ]; then
+        previous_image_id=$(cat "${SERVER_IMAGE_ID_FILE}")
+    fi
+
+    if [ "${current_image_id}" != "${previous_image_id}" ] || [ ! -f "${SERVER_IMAGE_TAR}" ]; then
+        mkdir -p "$(dirname "${SERVER_IMAGE_TAR}")" "$(dirname "${SERVER_IMAGE_ID_FILE}")"
+        tmp_tar=$(mktemp /tmp/openshell-server-image.XXXXXX)
+        docker save "${SERVER_IMAGE}" | zstd -f -T0 -3 -o "${tmp_tar}" >/dev/null
+        mv "${tmp_tar}" "${SERVER_IMAGE_TAR}"
+        printf '%s\n' "${current_image_id}" > "${SERVER_IMAGE_ID_FILE}"
+        echo "  updated: /var/lib/rancher/k3s/agent/images/openshell-server.tar.zst"
+    fi
+fi
+
+# ── Supervisor binary ─────────────────────────────────────────────────
+SUPERVISOR_TARGET="aarch64-unknown-linux-gnu"
+SUPERVISOR_BIN="${ROOT}/target/${SUPERVISOR_TARGET}/release/openshell-sandbox"
+SUPERVISOR_DST="${ROOTFS_DIR}/opt/openshell/bin/openshell-sandbox"
+if [ -f "${SUPERVISOR_BIN}" ]; then
+    mkdir -p "$(dirname "${SUPERVISOR_DST}")"
+    if ! cmp -s "${SUPERVISOR_BIN}" "${SUPERVISOR_DST}" 2>/dev/null; then
+        cp "${SUPERVISOR_BIN}" "${SUPERVISOR_DST}"
+        chmod +x "${SUPERVISOR_DST}"
+        echo "  updated: /opt/openshell/bin/openshell-sandbox"
+    fi
+fi
+
+# ── Fix execute permissions on k3s data binaries ──────────────────────
+# docker export and macOS virtio-fs can strip execute bits.
+chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/* 2>/dev/null || true
+chmod +x "${ROOTFS_DIR}"/var/lib/rancher/k3s/data/*/bin/aux/* 2>/dev/null || true
+
+echo "Sync complete."
diff --git a/tasks/scripts/vm/vm-clean.sh b/tasks/scripts/vm/vm-clean.sh
new file mode 100755
index 000000000..c293348d0
--- /dev/null
+++ b/tasks/scripts/vm/vm-clean.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Remove all openshell-vm cached artifacts.
+#
+# Use this when you need a clean slate — after running this, you will need to
+# re-run `mise run vm:setup` before building again.
+#
+# Usage:
+#   ./vm-clean.sh           # clean VM-specific artifacts
+#   ./vm-clean.sh --all     # also remove the compiled binary
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/_lib.sh"
+ROOT="$(vm_lib_root)"
+
+CLEAN_ALL=0
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --all)
+            CLEAN_ALL=1
+            shift
+            ;;
+        --help|-h)
+            echo "Usage: $0 [--all]"
+            echo ""
+            echo "Remove all openshell-vm cached build artifacts."
+            echo ""
+            echo "Options:"
+            echo "  --all   Also remove compiled binaries (target/debug/openshell-vm)"
+            exit 0
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2
+            exit 1
+            ;;
+    esac
+done
+
+echo "==> Cleaning openshell-vm artifacts..."
+
+removed=0
+
+remove_if_exists() {
+    local path="$1"
+    local label="$2"
+    if [ -e "$path" ]; then
+        local size
+        size="$(du -sh "$path" 2>/dev/null | cut -f1 || echo "?")"
+        rm -rf "$path"
+        echo "    Removed ${label} (${size}): ${path}"
+        removed=$((removed + 1))
+    fi
+}
+
+# Build artifacts under target/
+remove_if_exists "${ROOT}/target/vm-runtime"              "uncompressed staging"
+remove_if_exists "${ROOT}/target/vm-runtime-compressed"   "compressed artifacts"
+remove_if_exists "${ROOT}/target/vm-runtime-download"     "downloaded tarballs"
+remove_if_exists "${ROOT}/target/vm-runtime-extracted"    "extraction temp"
+remove_if_exists "${ROOT}/target/libkrun-build"           "libkrun source build"
+remove_if_exists "${ROOT}/target/custom-runtime"          "custom libkrunfw"
+remove_if_exists "${ROOT}/target/rootfs-build"            "rootfs directory"
+
+# Named instance rootfs directories
+XDG_DATA="${XDG_DATA_HOME:-${HOME}/.local/share}"
+VM_DATA_DIR="${XDG_DATA}/openshell/openshell-vm"
+remove_if_exists "${VM_DATA_DIR}" "named instance rootfs"
+
+# Embedded runtime cache
+VM_RUNTIME_CACHE="${XDG_DATA}/openshell/vm-runtime"
+remove_if_exists "${VM_RUNTIME_CACHE}" "embedded runtime cache"
+
+if [ "$CLEAN_ALL" -eq 1 ]; then
+    # Remove compiled binaries and sidecar bundles
+    for profile in debug release; do
+        remove_if_exists "${ROOT}/target/${profile}/openshell-vm"          "${profile} binary"
+        remove_if_exists "${ROOT}/target/${profile}/openshell-vm.runtime"  "${profile} runtime bundle"
+    done
+fi
+
+echo ""
+if [ "$removed" -eq 0 ]; then
+    echo "    Nothing to clean."
+else
+    echo "    Removed ${removed} item(s)."
+fi
+echo ""
+echo "Next step: mise run vm:setup"
diff --git a/tasks/scripts/vm/vm-setup.sh b/tasks/scripts/vm/vm-setup.sh
new file mode 100755
index 000000000..16eb2aaa9
--- /dev/null
+++ b/tasks/scripts/vm/vm-setup.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# One-time setup for the openshell-vm runtime.
+#
+# Downloads pre-built runtime artifacts (libkrun, libkrunfw, gvproxy) from the
+# vm-dev GitHub Release, or builds them from source when --from-source is set.
+# After obtaining the runtime, compresses the artifacts for embedding into the
+# openshell-vm binary.
+#
+# Usage:
+#   ./vm-setup.sh                   # download pre-built (default, ~30s)
+#   ./vm-setup.sh --from-source     # build from source (~15-45min)
+#
+# Environment:
+#   FROM_SOURCE=1   - Equivalent to --from-source
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/_lib.sh"
+ROOT="$(vm_lib_root)"
+
+FROM_SOURCE="${FROM_SOURCE:-0}"
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --from-source)
+            FROM_SOURCE=1
+            shift
+            ;;
+        --help|-h)
+            echo "Usage: $0 [--from-source]"
+            echo ""
+            echo "Set up the openshell-vm runtime (libkrun, libkrunfw, gvproxy)."
+            echo ""
+            echo "Options:"
+            echo "  --from-source   Build runtime from source instead of downloading (~15-45min)"
+            echo ""
+            echo "Environment:"
+            echo "  FROM_SOURCE=1   Equivalent to --from-source"
+            exit 0
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2
+            echo "Use --help for usage information" >&2
+            exit 1
+            ;;
+    esac
+done
+
+PLATFORM="$(detect_platform)"
+echo "==> openshell-vm setup"
+echo "    Platform: ${PLATFORM}"
+echo "    Mode:     $([ "$FROM_SOURCE" = "1" ] && echo "build from source" || echo "download pre-built")"
+echo ""
+
+# ── Obtain runtime artifacts ────────────────────────────────────────────
+
+if [ "$FROM_SOURCE" = "1" ]; then
+    echo "==> Building runtime from source..."
+    echo ""
+
+    case "$PLATFORM" in
+        darwin-aarch64)
+            # macOS: build custom libkrunfw (kernel) then portable libkrun
+            "${ROOT}/crates/openshell-vm/runtime/build-custom-libkrunfw.sh"
+            echo ""
+            "${ROOT}/tasks/scripts/vm/build-libkrun-macos.sh"
+            ;;
+        linux-*)
+            # Linux: build both libkrunfw and libkrun in one go
+            "${ROOT}/tasks/scripts/vm/build-libkrun.sh"
+            ;;
+    esac
+    echo ""
+    echo "==> Compressing runtime artifacts for embedding..."
+    "${ROOT}/tasks/scripts/vm/compress-vm-runtime.sh"
+else
+    echo "==> Downloading pre-built runtime..."
+    "${ROOT}/tasks/scripts/vm/download-kernel-runtime.sh"
+fi
+
+# ── Validate ────────────────────────────────────────────────────────────
+
+OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}"
+
+# Check that we have the essential compressed artifacts
+missing=0
+case "$PLATFORM" in
+    darwin-aarch64)
+        for f in libkrun.dylib.zst libkrunfw.5.dylib.zst gvproxy.zst; do
+            if [ ! -f "${OUTPUT_DIR}/${f}" ]; then
+                echo "ERROR: Missing ${OUTPUT_DIR}/${f}" >&2
+                missing=1
+            fi
+        done
+        ;;
+    linux-aarch64|linux-x86_64)
+        for f in libkrun.so.zst libkrunfw.so.5.zst gvproxy.zst; do
+            if [ ! -f "${OUTPUT_DIR}/${f}" ]; then
+                echo "ERROR: Missing ${OUTPUT_DIR}/${f}" >&2
+                missing=1
+            fi
+        done
+        ;;
+esac
+
+if [ "$missing" -eq 1 ]; then
+    echo "" >&2
+    echo "Setup failed: some runtime artifacts are missing." >&2
+    exit 1
+fi
+
+echo ""
+echo "==> Setup complete!"
+echo "    Compressed artifacts in: ${OUTPUT_DIR}"
+echo ""
+echo "Next steps:"
+echo "  mise run vm:rootfs --base   # build rootfs (requires Docker)"
+echo "  mise run vm                 # build and run the VM"
diff --git a/tasks/test.toml b/tasks/test.toml
index c383eafb5..f24ea6f2b 100644
--- a/tasks/test.toml
+++ b/tasks/test.toml
@@ -17,7 +17,7 @@ depends = ["e2e:python:gpu"]
 
 ["test:rust"]
 description = "Run Rust tests"
-run = "cargo test --workspace"
+run = "cargo test --workspace --exclude openshell-vm"
 hide = true
 
 ["test:python"]
@@ -47,3 +47,8 @@ description = "Run Python GPU e2e tests"
 depends = ["python:proto", "CLUSTER_GPU=1 cluster"]
 env = { UV_NO_SYNC = "1", PYTHONPATH = "python" }
 run = "uv run pytest -o python_files='test_*.py' -m gpu -n ${E2E_PARALLEL:-1} e2e/python"
+
+["e2e:vm"]
+description = "Boot openshell-vm and run smoke e2e (macOS ARM64; pass -- --vm-port=N [--vm-name=NAME] to reuse)"
+depends = ["build:docker:gateway", "vm:build"]
+run = "e2e/rust/e2e-vm.sh"
diff --git a/tasks/vm.toml b/tasks/vm.toml
new file mode 100644
index 000000000..ca06b08c1
--- /dev/null
+++ b/tasks/vm.toml
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# openshell-vm development tasks
+#
+# Workflow:
+#   mise run vm:setup          # one-time: download pre-built runtime (~30s)
+#   mise run vm                # build + run the VM
+#   mise run vm:clean          # wipe everything and start over
+#
+# See crates/openshell-vm/README.md for full documentation.
+
+# ═══════════════════════════════════════════════════════════════════════════
+# Main Commands
+# ═══════════════════════════════════════════════════════════════════════════
+
+[vm]
+description = "Build and run the openshell-vm microVM"
+depends = ["build:docker:gateway"]
+run = [
+  "mise run vm:build",
+  "tasks/scripts/vm/run-vm.sh",
+]
+
+["vm:build"]
+description = "Build the openshell-vm binary with embedded runtime"
+run = [
+  "tasks/scripts/vm/compress-vm-runtime.sh",
+  "OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=$PWD/target/vm-runtime-compressed cargo build -p openshell-vm",
+  "tasks/scripts/vm/codesign-openshell-vm.sh",
+  "tasks/scripts/vm/bundle-vm-runtime.sh",
+]
+
+["vm:setup"]
+description = "One-time setup: download (or build) the VM runtime"
+run = "tasks/scripts/vm/vm-setup.sh"
+
+["vm:rootfs"]
+description = "Build the VM rootfs tarball (use -- --base for lightweight)"
+run = "tasks/scripts/vm/build-rootfs-tarball.sh"
+
+["vm:clean"]
+description = "Remove all VM cached artifacts (runtime, rootfs, builds)"
+run = "tasks/scripts/vm/vm-clean.sh"
diff --git a/uv.lock b/uv.lock
index 38a03ce29..3869daf05 100644
--- a/uv.lock
+++ b/uv.lock
@@ -537,6 +537,7 @@ dependencies = [
 dev = [
     { name = "grpcio-tools" },
     { name = "maturin" },
+    { name = "pyelftools" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
@@ -566,6 +567,7 @@ requires-dist = [
 dev = [
     { name = "grpcio-tools", specifier = ">=1.60" },
     { name = "maturin", specifier = ">=1.5,<2.0" },
+    { name = "pyelftools", specifier = ">=0.30" },
     { name = "pytest", specifier = ">=8.0" },
     { name = "pytest-asyncio", specifier = ">=0.23" },
     { name = "pytest-cov", specifier = ">=4.0" },
@@ -635,6 +637,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264, upload-time = "2024-12-17T10:53:35.645Z" },
 ]
 
+[[package]]
+name = "pyelftools"
+version = "0.32"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b9/ab/33968940b2deb3d92f5b146bc6d4009a5f95d1d06c148ea2f9ee965071af/pyelftools-0.32.tar.gz", hash = "sha256:6de90ee7b8263e740c8715a925382d4099b354f29ac48ea40d840cf7aa14ace5", size = 15047199, upload-time = "2025-02-19T14:20:05.549Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/af/43/700932c4f0638c3421177144a2e86448c0d75dbaee2c7936bda3f9fd0878/pyelftools-0.32-py3-none-any.whl", hash = "sha256:013df952a006db5e138b1edf6d8a68ecc50630adbd0d83a2d41e7f846163d738", size = 188525, upload-time = "2025-02-19T14:19:59.919Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.20.0"