Merge pull request #54 from harche/feat/recreate-reaped-sandboxes #229
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ci | |
| on: | |
| push: | |
| branches: | |
| - main | |
| pull_request: | |
| permissions: | |
| contents: read | |
| jobs: | |
| build-and-test: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| node-version: [20.x, 22.x] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: ${{ matrix['node-version'] }} | |
| cache: npm | |
| - run: npm ci | |
| - run: npm run lint | |
| - run: npm run build -w @prodisco/search-libs | |
| - run: npm run build -w @prodisco/prometheus-client | |
| - run: npm run build -w @prodisco/loki-client | |
| - run: npm run proto:generate -w @prodisco/sandbox-server | |
| - run: npm run build -w @prodisco/sandbox-server | |
| - run: npm run build | |
| - run: npm run test | |
| - name: Run transport security tests | |
| run: npm run test:security -w @prodisco/sandbox-server | |
| container-integration: | |
| runs-on: ubuntu-latest | |
| needs: build-and-test | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 22.x | |
| cache: npm | |
| - name: Install dependencies | |
| run: npm ci | |
| - name: Build packages | |
| run: | | |
| npm run build -w @prodisco/search-libs | |
| npm run build -w @prodisco/prometheus-client | |
| npm run build -w @prodisco/loki-client | |
| npm run proto:generate -w @prodisco/sandbox-server | |
| npm run build -w @prodisco/sandbox-server | |
| npm run build | |
| - name: Install kind | |
| uses: helm/kind-action@v1 | |
| with: | |
| install_only: true | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@v4 | |
| - name: Run container integration tests | |
| run: ./scripts/integration/run-container-integration.sh | |
| - name: Upload artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: container-integration-artifacts | |
| path: artifacts/container-integration/ | |
| retention-days: 7 | |
| npm-pack-integration: | |
| runs-on: ubuntu-latest | |
| needs: build-and-test | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 22.x | |
| cache: npm | |
| - name: Install dependencies | |
| run: npm ci | |
| - name: Create Kind cluster | |
| uses: helm/kind-action@v1 | |
| with: | |
| cluster_name: npm-pack-int | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@v4 | |
| - name: Wait for cluster to be ready | |
| run: | | |
| kubectl wait --for=condition=Ready nodes --all --timeout=120s | |
| kubectl cluster-info | |
| - name: Run npm pack integration tests | |
| timeout-minutes: 5 | |
| run: ./scripts/integration/run-npm-pack-integration.sh | |
| - name: Upload artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: npm-pack-integration-artifacts | |
| path: artifacts/npm-pack-integration/ | |
| retention-days: 7 | |
| cluster-integration: | |
| runs-on: ubuntu-latest | |
| needs: build-and-test | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 22.x | |
| cache: npm | |
| - name: Install dependencies | |
| run: npm ci | |
| - name: Build packages | |
| run: | | |
| npm run build -w @prodisco/search-libs | |
| npm run build -w @prodisco/prometheus-client | |
| npm run build -w @prodisco/loki-client | |
| npm run proto:generate -w @prodisco/sandbox-server | |
| npm run build -w @prodisco/sandbox-server | |
| - name: Install helm | |
| uses: azure/setup-helm@v4 | |
| - name: Create Kind cluster | |
| uses: helm/kind-action@v1 | |
| with: | |
| cluster_name: cluster-int | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@v4 | |
| - name: Wait for cluster to be ready | |
| run: | | |
| kubectl wait --for=condition=Ready nodes --all --timeout=120s | |
| kubectl cluster-info | |
| - name: Install Prometheus | |
| run: | | |
| # Install kube-prometheus-stack via helm | |
| helm repo add prometheus-community https://prometheus-community.github.io/helm-charts | |
| helm repo update | |
| kubectl create namespace monitoring | |
| helm install prometheus prometheus-community/kube-prometheus-stack \ | |
| --namespace monitoring \ | |
| --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \ | |
| --set grafana.enabled=false \ | |
| --set alertmanager.enabled=false \ | |
| --wait --timeout 5m | |
| - name: Port-forward Prometheus | |
| run: | | |
| kubectl -n monitoring port-forward svc/prometheus-kube-prometheus-prometheus 9090:9090 & | |
| sleep 5 | |
| # Verify Prometheus is accessible | |
| curl -s http://localhost:9090/-/healthy || echo "Prometheus health check failed" | |
| - name: Run cluster integration tests | |
| timeout-minutes: 10 | |
| run: npm test -w @prodisco/sandbox-server -- cluster-integration | |
| - name: Collect logs on failure | |
| if: failure() | |
| run: | | |
| mkdir -p artifacts/cluster-integration | |
| kubectl get pods -A > artifacts/cluster-integration/pods.txt | |
| kubectl -n monitoring logs -l app.kubernetes.io/name=prometheus --tail=100 > artifacts/cluster-integration/prometheus.log 2>&1 || true | |
| kubectl cluster-info dump > artifacts/cluster-integration/cluster-dump.txt 2>&1 || true | |
| - name: Upload artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: cluster-integration-artifacts | |
| path: artifacts/cluster-integration/ | |
| retention-days: 7 | |
| http-transport-integration: | |
| runs-on: ubuntu-latest | |
| needs: build-and-test | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 22.x | |
| cache: npm | |
| - name: Install dependencies | |
| run: npm ci | |
| - name: Build packages | |
| run: | | |
| npm run build -w @prodisco/search-libs | |
| npm run build -w @prodisco/prometheus-client | |
| npm run build -w @prodisco/loki-client | |
| npm run proto:generate -w @prodisco/sandbox-server | |
| npm run build -w @prodisco/sandbox-server | |
| npm run build | |
| - name: Create Kind cluster | |
| uses: helm/kind-action@v1 | |
| with: | |
| cluster_name: http-int | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@v4 | |
| - name: Wait for cluster to be ready | |
| run: | | |
| kubectl wait --for=condition=Ready nodes --all --timeout=120s | |
| kubectl cluster-info | |
| - name: Build and load Docker image | |
| run: | | |
| npm run docker:build:config -- --config examples/prodisco.kubernetes.yaml --tag test --mcp-image prodisco/mcp-server --skip-sandbox | |
| kind load docker-image prodisco/mcp-server:test --name http-int | |
| - name: Deploy MCP server | |
| run: kubectl apply -f k8s/mcp-server.yaml | |
| - name: Wait for deployment | |
| run: | | |
| kubectl wait --for=condition=Available --timeout=180s deployment/mcp-server -n prodisco | |
| kubectl wait --for=condition=Ready --timeout=60s pod -l app=mcp-server -n prodisco | |
| - name: Start port-forward | |
| run: | | |
| kubectl port-forward -n prodisco svc/mcp-server 3000:3000 & | |
| sleep 5 | |
| - name: Test health endpoint | |
| run: | | |
| HEALTH=$(curl -s http://localhost:3000/health) | |
| echo "Health response: $HEALTH" | |
| echo "$HEALTH" | grep -q '"status":"ok"' | |
| - name: Test MCP initialize | |
| run: | | |
| RESPONSE=$(curl -s -i -X POST http://localhost:3000/mcp \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: application/json, text/event-stream" \ | |
| -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci-test","version":"1.0.0"}}}') | |
| echo "$RESPONSE" | |
| # Verify we got a session ID | |
| echo "$RESPONSE" | grep -i "mcp-session-id:" | |
| # Verify response contains server info | |
| echo "$RESPONSE" | grep -q "kubernetes-mcp" | |
| - name: Test MCP tools/list | |
| run: | | |
| # Get session ID | |
| INIT_RESPONSE=$(curl -s -i -X POST http://localhost:3000/mcp \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: application/json, text/event-stream" \ | |
| -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci-test","version":"1.0.0"}}}') | |
| SESSION_ID=$(echo "$INIT_RESPONSE" | grep -i "mcp-session-id:" | cut -d' ' -f2 | tr -d '\r') | |
| echo "Session ID: $SESSION_ID" | |
| # List tools | |
| TOOLS=$(curl -s -X POST http://localhost:3000/mcp \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: application/json, text/event-stream" \ | |
| -H "mcp-session-id: $SESSION_ID" \ | |
| -d '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}') | |
| echo "$TOOLS" | |
| echo "$TOOLS" | grep -q "prodisco.searchTools" | |
| echo "$TOOLS" | grep -q "prodisco.runSandbox" | |
| - name: Test prodisco.searchTools | |
| run: | | |
| # Get session ID | |
| INIT_RESPONSE=$(curl -s -i -X POST http://localhost:3000/mcp \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: application/json, text/event-stream" \ | |
| -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci-test","version":"1.0.0"}}}') | |
| SESSION_ID=$(echo "$INIT_RESPONSE" | grep -i "mcp-session-id:" | cut -d' ' -f2 | tr -d '\r') | |
| # Call searchTools with current schema (methodName, documentType, library, category) | |
| RESULT=$(curl -s -X POST http://localhost:3000/mcp \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: application/json, text/event-stream" \ | |
| -H "mcp-session-id: $SESSION_ID" \ | |
| -d '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"prodisco.searchTools","arguments":{"methodName":"listNamespace","documentType":"method","library":"@kubernetes/client-node","limit":3}}}') | |
| echo "$RESULT" | |
| echo "$RESULT" | grep -q "result" | |
| - name: Collect logs on failure | |
| if: failure() | |
| run: | | |
| mkdir -p artifacts/http-integration | |
| kubectl get pods -A > artifacts/http-integration/pods.txt | |
| kubectl -n prodisco logs -l app=mcp-server --tail=200 > artifacts/http-integration/mcp-server.log 2>&1 || true | |
| kubectl describe pod -n prodisco -l app=mcp-server > artifacts/http-integration/pod-describe.txt 2>&1 || true | |
| - name: Upload artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: http-integration-artifacts | |
| path: artifacts/http-integration/ | |
| retention-days: 7 | |
| tls-integration: | |
| runs-on: ubuntu-latest | |
| needs: build-and-test | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 22.x | |
| cache: npm | |
| - name: Install dependencies | |
| run: npm ci | |
| - name: Build packages | |
| run: | | |
| npm run build -w @prodisco/search-libs | |
| npm run build -w @prodisco/prometheus-client | |
| npm run build -w @prodisco/loki-client | |
| npm run proto:generate -w @prodisco/sandbox-server | |
| npm run build -w @prodisco/sandbox-server | |
| - name: Create Kind cluster | |
| uses: helm/kind-action@v1 | |
| with: | |
| cluster_name: prodisco-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@v4 | |
| - name: Wait for cluster to be ready | |
| run: | | |
| kubectl wait --for=condition=Ready nodes --all --timeout=120s | |
| kubectl cluster-info | |
| - name: Install cert-manager | |
| run: | | |
| kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.0/cert-manager.yaml | |
| kubectl wait --for=condition=Available --timeout=120s deployment/cert-manager -n cert-manager | |
| kubectl wait --for=condition=Available --timeout=120s deployment/cert-manager-webhook -n cert-manager | |
| kubectl wait --for=condition=Available --timeout=120s deployment/cert-manager-cainjector -n cert-manager | |
| - name: Build and load Docker image | |
| run: | | |
| npm run docker:build:config -- --config examples/prodisco.kubernetes.yaml --tag test --sandbox-image prodisco/sandbox-server --skip-mcp | |
| kind load docker-image prodisco/sandbox-server:test --name prodisco-test | |
| - name: Create namespace | |
| run: kubectl create namespace prodisco | |
| - name: Apply cert-manager resources | |
| run: | | |
| kubectl apply -f packages/sandbox-server/k8s/cert-manager/issuer.yaml | |
| # Wait for CA secret to be created | |
| for i in {1..30}; do | |
| if kubectl get secret sandbox-ca-secret -n prodisco &>/dev/null; then | |
| echo "CA secret created" | |
| break | |
| fi | |
| echo "Waiting for CA secret... ($i/30)" | |
| sleep 2 | |
| done | |
| kubectl apply -f packages/sandbox-server/k8s/cert-manager/server-certificate.yaml | |
| kubectl apply -f packages/sandbox-server/k8s/cert-manager/client-certificate.yaml | |
| - name: Wait for certificates | |
| run: | | |
| for cert in sandbox-server-tls sandbox-client-tls; do | |
| for i in {1..30}; do | |
| STATUS=$(kubectl get certificate $cert -n prodisco -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "") | |
| if [ "$STATUS" = "True" ]; then | |
| echo "$cert is ready" | |
| break | |
| fi | |
| echo "Waiting for $cert... ($i/30)" | |
| sleep 2 | |
| done | |
| done | |
| - name: Deploy sandbox-server with TLS | |
| run: | | |
| # Apply deployment with TLS configuration | |
| cat <<EOF | kubectl apply -f - | |
| apiVersion: apps/v1 | |
| kind: Deployment | |
| metadata: | |
| name: sandbox-server | |
| namespace: prodisco | |
| labels: | |
| app: sandbox-server | |
| spec: | |
| replicas: 1 | |
| selector: | |
| matchLabels: | |
| app: sandbox-server | |
| template: | |
| metadata: | |
| labels: | |
| app: sandbox-server | |
| spec: | |
| serviceAccountName: sandbox-server | |
| containers: | |
| - name: sandbox | |
| image: prodisco/sandbox-server:test | |
| imagePullPolicy: IfNotPresent | |
| ports: | |
| - containerPort: 50051 | |
| name: grpc | |
| protocol: TCP | |
| env: | |
| - name: SANDBOX_USE_TCP | |
| value: "true" | |
| - name: SANDBOX_TCP_HOST | |
| value: "0.0.0.0" | |
| - name: SANDBOX_TCP_PORT | |
| value: "50051" | |
| - name: SCRIPTS_CACHE_DIR | |
| value: "/tmp/prodisco-scripts" | |
| - name: SANDBOX_TRANSPORT_MODE | |
| value: "tls" | |
| - name: SANDBOX_TLS_CERT_PATH | |
| value: "/etc/sandbox-tls/tls.crt" | |
| - name: SANDBOX_TLS_KEY_PATH | |
| value: "/etc/sandbox-tls/tls.key" | |
| - name: SANDBOX_TLS_CA_PATH | |
| value: "/etc/sandbox-tls/ca.crt" | |
| resources: | |
| requests: | |
| memory: "128Mi" | |
| cpu: "100m" | |
| limits: | |
| memory: "512Mi" | |
| cpu: "500m" | |
| # Note: Native gRPC probes don't support TLS, so we use exec probe | |
| readinessProbe: | |
| exec: | |
| command: | |
| - /bin/sh | |
| - -c | |
| - "kill -0 1" | |
| initialDelaySeconds: 5 | |
| periodSeconds: 10 | |
| livenessProbe: | |
| exec: | |
| command: | |
| - /bin/sh | |
| - -c | |
| - "kill -0 1" | |
| initialDelaySeconds: 10 | |
| periodSeconds: 30 | |
| volumeMounts: | |
| - name: scripts-cache | |
| mountPath: /tmp/prodisco-scripts | |
| - name: tls-certs | |
| mountPath: /etc/sandbox-tls | |
| readOnly: true | |
| volumes: | |
| - name: scripts-cache | |
| emptyDir: {} | |
| - name: tls-certs | |
| secret: | |
| secretName: sandbox-server-tls | |
| --- | |
| apiVersion: v1 | |
| kind: ServiceAccount | |
| metadata: | |
| name: sandbox-server | |
| namespace: prodisco | |
| --- | |
| apiVersion: rbac.authorization.k8s.io/v1 | |
| kind: ClusterRole | |
| metadata: | |
| name: sandbox-server | |
| rules: | |
| - apiGroups: [""] | |
| resources: ["pods", "pods/log", "services", "endpoints", "configmaps", "secrets", "namespaces", "nodes", "persistentvolumes", "persistentvolumeclaims", "events", "serviceaccounts"] | |
| verbs: ["get", "list", "watch"] | |
| - apiGroups: ["apps"] | |
| resources: ["deployments", "daemonsets", "replicasets", "statefulsets"] | |
| verbs: ["get", "list", "watch"] | |
| - apiGroups: ["batch"] | |
| resources: ["jobs", "cronjobs"] | |
| verbs: ["get", "list", "watch"] | |
| --- | |
| apiVersion: rbac.authorization.k8s.io/v1 | |
| kind: ClusterRoleBinding | |
| metadata: | |
| name: sandbox-server | |
| roleRef: | |
| apiGroup: rbac.authorization.k8s.io | |
| kind: ClusterRole | |
| name: sandbox-server | |
| subjects: | |
| - kind: ServiceAccount | |
| name: sandbox-server | |
| namespace: prodisco | |
| --- | |
| apiVersion: v1 | |
| kind: Service | |
| metadata: | |
| name: sandbox-server | |
| namespace: prodisco | |
| labels: | |
| app: sandbox-server | |
| spec: | |
| type: ClusterIP | |
| ports: | |
| - port: 50051 | |
| targetPort: 50051 | |
| protocol: TCP | |
| name: grpc | |
| selector: | |
| app: sandbox-server | |
| EOF | |
| - name: Wait for deployment | |
| run: kubectl wait --for=condition=Available --timeout=120s deployment/sandbox-server -n prodisco | |
| - name: Run TLS integration tests | |
| timeout-minutes: 10 | |
| run: SANDBOX_E2E_TESTS=true npm run test:e2e -w @prodisco/sandbox-server | |
| - name: Collect logs on failure | |
| if: failure() | |
| run: | | |
| mkdir -p artifacts/tls-integration | |
| kubectl get pods -A > artifacts/tls-integration/pods.txt | |
| kubectl -n prodisco logs -l app=sandbox-server --tail=100 > artifacts/tls-integration/sandbox-server.log 2>&1 || true | |
| kubectl get certificates -n prodisco -o yaml > artifacts/tls-integration/certificates.yaml 2>&1 || true | |
| kubectl get secrets -n prodisco > artifacts/tls-integration/secrets.txt 2>&1 || true | |
| kubectl cluster-info dump > artifacts/tls-integration/cluster-dump.txt 2>&1 || true | |
| - name: Upload artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: tls-integration-artifacts | |
| path: artifacts/tls-integration/ | |
| retention-days: 7 |