diff --git a/nextcloud_mcp_server/providers/mistral.py b/nextcloud_mcp_server/providers/mistral.py index b5f61584..ac491ea2 100644 --- a/nextcloud_mcp_server/providers/mistral.py +++ b/nextcloud_mcp_server/providers/mistral.py @@ -27,6 +27,25 @@ # but we keep this in line with sibling providers (OpenAI=100, Ollama=32). BATCH_SIZE = 64 +# Per-request timeout (milliseconds) applied to embedding calls. +# +# NOTE: We pass this explicitly on every request instead of configuring a +# timeout on an injected httpx client, because the mistralai SDK ignores +# client-level timeouts for embeddings. Its generated ``embeddings.create*`` +# methods hard-default ``timeout_ms`` (60_000) and feed that scalar straight +# into ``httpx.build_request(timeout=...)``, which *replaces* any +# ``httpx.Timeout`` configured on the client. Consequently an injected +# ``AsyncClient(timeout=...)`` is silently dropped, and a distinct ``connect`` +# timeout cannot be expressed at all (the SDK exposes only a single scalar). +# - Upstream: mistralai/client-python#449 (the original "SDK passes +# timeout=None and overrides the client" hang; fixed in v2.3.0) plus the +# residual per-method-default override discussed there and tracked via +# #474. +# - We pin mistralai 2.4.5 on purpose (2.4.6 was a supply-chain compromise, +# #523), so setting the bound explicitly here keeps it intentional and +# independent of the SDK's internal default. +_EMBED_TIMEOUT_MS = 60_000 + _NO_EMBEDDING_MODEL_MSG = "Embedding not supported - no embedding_model configured" @@ -100,6 +119,7 @@ async def embed(self, text: str) -> list[float]: response = await self.client.embeddings.create_async( model=self.embedding_model, inputs=[text], + timeout_ms=_EMBED_TIMEOUT_MS, ) if not response.data or response.data[0].embedding is None: @@ -151,6 +171,7 @@ async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]: response = await self.client.embeddings.create_async( model=self.embedding_model, inputs=batch, + timeout_ms=_EMBED_TIMEOUT_MS, ) # Defensive: response.data items have Optional fields. Sort by index diff --git a/tests/unit/providers/test_mistral.py b/tests/unit/providers/test_mistral.py index 0e38da15..d6c7ddbc 100644 --- a/tests/unit/providers/test_mistral.py +++ b/tests/unit/providers/test_mistral.py @@ -6,6 +6,7 @@ from mistralai.client.errors import SDKError from nextcloud_mcp_server.providers.mistral import ( + _EMBED_TIMEOUT_MS, BATCH_SIZE, MISTRAL_EMBEDDING_DIMENSIONS, MistralProvider, @@ -50,9 +51,12 @@ async def test_mistral_embedding_single(mock_mistral_client): embedding = await provider.embed("hello world") assert embedding == [0.1, 0.2, 0.3] + # An explicit timeout_ms is always passed: the SDK ignores client-level + # httpx timeouts for embeddings (see NOTE in mistral.py / upstream #449). mock_mistral_client.embeddings.create_async.assert_awaited_once_with( model="mistral-embed", inputs=["hello world"], + timeout_ms=_EMBED_TIMEOUT_MS, )