diff --git a/docs.json b/docs.json index bcec05cf42..8cec7c7682 100644 --- a/docs.json +++ b/docs.json @@ -1132,10 +1132,7 @@ "pages": [ "training/api-reference" ], - "openapi": { - "source": "training/api-reference/openapi.json", - "directory": "training/api-reference" - } + "openapi": "training/api-reference/openapi.json" } ] }, @@ -2636,4 +2633,4 @@ } ], "baseUrl": "https://docs.wandb.ai" -} \ No newline at end of file +} diff --git a/scripts/reference-generation/training/README.md b/scripts/reference-generation/training/README.md index 8799de287a..664e1bf766 100644 --- a/scripts/reference-generation/training/README.md +++ b/scripts/reference-generation/training/README.md @@ -95,6 +95,16 @@ After running the scripts: grep "training/api-reference" docs.json ``` +## Known Issues and Workarounds + +### Missing Tags on Health Endpoints (RESOLVED) + +**Issue**: The upstream Training API's OpenAPI spec was missing `tags` fields on the health endpoints (`/v1/health` and `/v1/system-check`). This caused Mintlify to place these endpoints in an incorrect navigation hierarchy with an extra "API Reference" layer in the breadcrumb. + +**Status**: ✅ **FIXED** - The upstream fix was deployed on January 21, 2026 via [PR #213](https://github.com/coreweave/serverless-training/pull/213). The health endpoints now correctly include `"tags": ["health"]` in the OpenAPI spec. + +**Temporary Workaround**: The `sync_openapi_spec.py` script includes a `patch_spec()` function that automatically adds the missing tags when syncing. This workaround can be removed once the next sync pulls the fixed spec from production. + ## Troubleshooting ### 502 Bad Gateway Errors diff --git a/scripts/reference-generation/training/sync_openapi_spec.py b/scripts/reference-generation/training/sync_openapi_spec.py index b0c49403a4..8f33b8d2e0 100755 --- a/scripts/reference-generation/training/sync_openapi_spec.py +++ b/scripts/reference-generation/training/sync_openapi_spec.py @@ -44,6 +44,31 @@ def spec_hash(spec: dict) -> str: return hashlib.sha256(spec_str.encode()).hexdigest() +def patch_spec(spec: dict) -> dict: + """ + Apply patches to the OpenAPI spec to fix known issues. + + This is a workaround until the upstream Training API is fixed. + """ + patched = False + + # Fix missing tags on health endpoints + health_endpoints = ["/v1/health", "/v1/system-check"] + for path in health_endpoints: + if path in spec.get("paths", {}): + for method in ["get", "post", "put", "delete", "patch"]: + if method in spec["paths"][path]: + if "tags" not in spec["paths"][path][method]: + spec["paths"][path][method]["tags"] = ["health"] + print(f" ✓ Added missing 'health' tag to {method.upper()} {path}") + patched = True + + if patched: + print(" ℹ Applied patches to fix upstream OpenAPI spec issues") + + return spec + + def compare_specs(local_spec: dict, remote_spec: dict) -> Tuple[bool, list]: """ Compare local and remote specs. @@ -138,6 +163,9 @@ def main(): print(" ✗ No local spec and couldn't fetch remote spec") return 1 + # Apply patches to fix known issues + remote_spec = patch_spec(remote_spec) + # Load local spec local_spec = load_local_spec(local_spec_path) diff --git a/training/api-reference.mdx b/training/api-reference.mdx index 9080663ceb..0c03083bbe 100644 --- a/training/api-reference.mdx +++ b/training/api-reference.mdx @@ -29,6 +29,7 @@ https://api.training.wandb.ai/v1 ### chat-completions - **[POST /v1/chat/completions](https://docs.wandb.ai/training/api-reference/chat-completions/create-chat-completion)** - Create Chat Completion +- **[POST /v1/chat/completions/](https://docs.wandb.ai/training/api-reference/chat-completions/create-chat-completion)** - Create Chat Completion ### models diff --git a/training/api-reference/openapi.json b/training/api-reference/openapi.json index 30b8fc5598..5f18e92fe2 100644 --- a/training/api-reference/openapi.json +++ b/training/api-reference/openapi.json @@ -563,6 +563,9 @@ }, "/v1/health": { "get": { + "tags": [ + "health" + ], "summary": "Health Check", "operationId": "health_check_v1_health_get", "responses": { @@ -579,6 +582,9 @@ }, "/v1/system-check": { "get": { + "tags": [ + "health" + ], "summary": "System Check", "description": "Check health of all system components.\n\nReturns:\n JSON with status of:\n - api: Always true (if endpoint is reachable)\n - database: Whether DB connection works\n - cpu_queue: Success, duration, and any errors\n - gpu_queue: Success, duration, and any errors\n\nReturns HTTP 503 if any checks fail.", "operationId": "system_check_v1_system_check_get",