From b5ccc36c9439f6ac31d9f85e3b4dfe907a1c5442 Mon Sep 17 00:00:00 2001
From: Chris Coutinho <chris@coutinho.io>
Date: Fri, 5 Jun 2026 14:25:24 +0200
Subject: [PATCH] fix(providers): pin an explicit timeout on Mistral embedding
 requests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MistralProvider relied on the mistralai SDK's internal default timeout for
embedding calls. Pass an explicit `timeout_ms` on every `embeddings.create_async`
instead, so the bound is intentional and tunable rather than dependent on the
SDK's hard-coded per-method default.

A client-level `httpx.Timeout(..., connect=...)` is NOT a viable alternative
here: the SDK's generated embeddings methods hard-default `timeout_ms` and pass
that scalar straight into `httpx.build_request(timeout=...)`, which replaces any
client-configured timeout — so an injected httpx client is ignored and a
separate connect timeout cannot be expressed (single scalar only). This is the
residual of upstream mistralai/client-python#449 (the original `timeout=None`
override hang, fixed in v2.3.0; server-side hang tracked in #474). A NOTE in the
code records this. We deliberately pin 2.4.5 (2.4.6 was a supply-chain
compromise, #523).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 nextcloud_mcp_server/providers/mistral.py | 21 +++++++++++++++++++++
 tests/unit/providers/test_mistral.py      |  4 ++++
 2 files changed, 25 insertions(+)

diff --git a/nextcloud_mcp_server/providers/mistral.py b/nextcloud_mcp_server/providers/mistral.py
index b5f615843..ac491ea2f 100644
--- a/nextcloud_mcp_server/providers/mistral.py
+++ b/nextcloud_mcp_server/providers/mistral.py
@@ -27,6 +27,25 @@
 # but we keep this in line with sibling providers (OpenAI=100, Ollama=32).
 BATCH_SIZE = 64
 
+# Per-request timeout (milliseconds) applied to embedding calls.
+#
+# NOTE: We pass this explicitly on every request instead of configuring a
+# timeout on an injected httpx client, because the mistralai SDK ignores
+# client-level timeouts for embeddings. Its generated ``embeddings.create*``
+# methods hard-default ``timeout_ms`` (60_000) and feed that scalar straight
+# into ``httpx.build_request(timeout=...)``, which *replaces* any
+# ``httpx.Timeout`` configured on the client. Consequently an injected
+# ``AsyncClient(timeout=...)`` is silently dropped, and a distinct ``connect``
+# timeout cannot be expressed at all (the SDK exposes only a single scalar).
+#   - Upstream: mistralai/client-python#449 (the original "SDK passes
+#     timeout=None and overrides the client" hang; fixed in v2.3.0) plus the
+#     residual per-method-default override discussed there and tracked via
+#     #474.
+#   - We pin mistralai 2.4.5 on purpose (2.4.6 was a supply-chain compromise,
+#     #523), so setting the bound explicitly here keeps it intentional and
+#     independent of the SDK's internal default.
+_EMBED_TIMEOUT_MS = 60_000
+
 _NO_EMBEDDING_MODEL_MSG = "Embedding not supported - no embedding_model configured"
 
 
@@ -100,6 +119,7 @@ async def embed(self, text: str) -> list[float]:
         response = await self.client.embeddings.create_async(
             model=self.embedding_model,
             inputs=[text],
+            timeout_ms=_EMBED_TIMEOUT_MS,
         )
 
         if not response.data or response.data[0].embedding is None:
@@ -151,6 +171,7 @@ async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]:
         response = await self.client.embeddings.create_async(
             model=self.embedding_model,
             inputs=batch,
+            timeout_ms=_EMBED_TIMEOUT_MS,
         )
 
         # Defensive: response.data items have Optional fields. Sort by index
diff --git a/tests/unit/providers/test_mistral.py b/tests/unit/providers/test_mistral.py
index 0e38da15a..d6c7ddbc6 100644
--- a/tests/unit/providers/test_mistral.py
+++ b/tests/unit/providers/test_mistral.py
@@ -6,6 +6,7 @@
 from mistralai.client.errors import SDKError
 
 from nextcloud_mcp_server.providers.mistral import (
+    _EMBED_TIMEOUT_MS,
     BATCH_SIZE,
     MISTRAL_EMBEDDING_DIMENSIONS,
     MistralProvider,
@@ -50,9 +51,12 @@ async def test_mistral_embedding_single(mock_mistral_client):
     embedding = await provider.embed("hello world")
 
     assert embedding == [0.1, 0.2, 0.3]
+    # An explicit timeout_ms is always passed: the SDK ignores client-level
+    # httpx timeouts for embeddings (see NOTE in mistral.py / upstream #449).
     mock_mistral_client.embeddings.create_async.assert_awaited_once_with(
         model="mistral-embed",
         inputs=["hello world"],
+        timeout_ms=_EMBED_TIMEOUT_MS,
     )