From b5ccc36c9439f6ac31d9f85e3b4dfe907a1c5442 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Fri, 5 Jun 2026 14:25:24 +0200 Subject: [PATCH] fix(providers): pin an explicit timeout on Mistral embedding requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MistralProvider relied on the mistralai SDK's internal default timeout for embedding calls. Pass an explicit `timeout_ms` on every `embeddings.create_async` instead, so the bound is intentional and tunable rather than dependent on the SDK's hard-coded per-method default. A client-level `httpx.Timeout(..., connect=...)` is NOT a viable alternative here: the SDK's generated embeddings methods hard-default `timeout_ms` and pass that scalar straight into `httpx.build_request(timeout=...)`, which replaces any client-configured timeout — so an injected httpx client is ignored and a separate connect timeout cannot be expressed (single scalar only). This is the residual of upstream mistralai/client-python#449 (the original `timeout=None` override hang, fixed in v2.3.0; server-side hang tracked in #474). A NOTE in the code records this. We deliberately pin 2.4.5 (2.4.6 was a supply-chain compromise, #523). Co-Authored-By: Claude Opus 4.8 (1M context) --- nextcloud_mcp_server/providers/mistral.py | 21 +++++++++++++++++++++ tests/unit/providers/test_mistral.py | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/nextcloud_mcp_server/providers/mistral.py b/nextcloud_mcp_server/providers/mistral.py index b5f615843..ac491ea2f 100644 --- a/nextcloud_mcp_server/providers/mistral.py +++ b/nextcloud_mcp_server/providers/mistral.py @@ -27,6 +27,25 @@ # but we keep this in line with sibling providers (OpenAI=100, Ollama=32). BATCH_SIZE = 64 +# Per-request timeout (milliseconds) applied to embedding calls. +# +# NOTE: We pass this explicitly on every request instead of configuring a +# timeout on an injected httpx client, because the mistralai SDK ignores +# client-level timeouts for embeddings. Its generated ``embeddings.create*`` +# methods hard-default ``timeout_ms`` (60_000) and feed that scalar straight +# into ``httpx.build_request(timeout=...)``, which *replaces* any +# ``httpx.Timeout`` configured on the client. Consequently an injected +# ``AsyncClient(timeout=...)`` is silently dropped, and a distinct ``connect`` +# timeout cannot be expressed at all (the SDK exposes only a single scalar). +# - Upstream: mistralai/client-python#449 (the original "SDK passes +# timeout=None and overrides the client" hang; fixed in v2.3.0) plus the +# residual per-method-default override discussed there and tracked via +# #474. +# - We pin mistralai 2.4.5 on purpose (2.4.6 was a supply-chain compromise, +# #523), so setting the bound explicitly here keeps it intentional and +# independent of the SDK's internal default. +_EMBED_TIMEOUT_MS = 60_000 + _NO_EMBEDDING_MODEL_MSG = "Embedding not supported - no embedding_model configured" @@ -100,6 +119,7 @@ async def embed(self, text: str) -> list[float]: response = await self.client.embeddings.create_async( model=self.embedding_model, inputs=[text], + timeout_ms=_EMBED_TIMEOUT_MS, ) if not response.data or response.data[0].embedding is None: @@ -151,6 +171,7 @@ async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]: response = await self.client.embeddings.create_async( model=self.embedding_model, inputs=batch, + timeout_ms=_EMBED_TIMEOUT_MS, ) # Defensive: response.data items have Optional fields. Sort by index diff --git a/tests/unit/providers/test_mistral.py b/tests/unit/providers/test_mistral.py index 0e38da15a..d6c7ddbc6 100644 --- a/tests/unit/providers/test_mistral.py +++ b/tests/unit/providers/test_mistral.py @@ -6,6 +6,7 @@ from mistralai.client.errors import SDKError from nextcloud_mcp_server.providers.mistral import ( + _EMBED_TIMEOUT_MS, BATCH_SIZE, MISTRAL_EMBEDDING_DIMENSIONS, MistralProvider, @@ -50,9 +51,12 @@ async def test_mistral_embedding_single(mock_mistral_client): embedding = await provider.embed("hello world") assert embedding == [0.1, 0.2, 0.3] + # An explicit timeout_ms is always passed: the SDK ignores client-level + # httpx timeouts for embeddings (see NOTE in mistral.py / upstream #449). mock_mistral_client.embeddings.create_async.assert_awaited_once_with( model="mistral-embed", inputs=["hello world"], + timeout_ms=_EMBED_TIMEOUT_MS, )