Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/opencode/src/flag/flag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export const Flag = {
MIMOCODE_DISABLE_MOUSE: truthy("MIMOCODE_DISABLE_MOUSE"),
MIMOCODE_OUTPUT_LENGTH_CONTINUATION_LIMIT: number("MIMOCODE_OUTPUT_LENGTH_CONTINUATION_LIMIT") ?? 3,
MIMOCODE_INVALID_OUTPUT_CONTINUATION_LIMIT: number("MIMOCODE_INVALID_OUTPUT_CONTINUATION_LIMIT") ?? 2,
MIMOCODE_TEXT_TOOL_CALL_RETRY_LIMIT: number("MIMOCODE_TEXT_TOOL_CALL_RETRY_LIMIT") ?? 2,

// Sliding-window n-gram repetition detection for streamed reasoning + text.
// An n-gram of size N appearing REPEAT_THRESHOLD times within the last
Expand Down
25 changes: 25 additions & 0 deletions packages/opencode/src/session/classify.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { MessageV2 } from "./message-v2"
export type StepClassification =
| { type: "final"; degraded?: boolean }
| { type: "continue" }
| { type: "text-tool-call" }
| { type: "filtered" }
| { type: "think-only" }
| { type: "invalid"; reason: string }
Expand Down Expand Up @@ -58,6 +59,30 @@ export function classifyAssistantStep(input: {
// 2. Nothing finalized yet.
if (!assistant.finish) return { type: "continue" }

// 3a. Text-form tool call: the model serialized a tool call as PROSE TEXT
// instead of emitting a structured tool_use. Signature: finish "tool-calls"
// but NO structured tool part (a real tool part would have re-looped at #1)
// and text carrying tool-call markup. Must precede the unconditional
// tool-calls continue below, which would otherwise swallow this state.
// Guards: skip if this turn was already discarded (assistant.error set — let
// it fall through to `failed` at #5), and skip a stale/resumed turn the
// conversation already moved past (mirrors the #4 staleness guard) so a
// degraded turn left in history can't re-fire across turns/resumes.
if (
assistant.finish === "tool-calls" &&
!assistant.error &&
input.lastUser.id < assistant.id &&
!input.parts.some((part) => part.type === "tool") &&
input.parts.some(
(part) =>
part.type === "text" &&
!part.synthetic &&
!part.ignored &&
/<invoke name=|<parameter name=|<\/invoke>|<function_calls>/.test(part.text),
)
)
return { type: "text-tool-call" }

// 3. Provider-executed-only tool step (no client tool part left, see #1).
if (assistant.finish === "tool-calls") return { type: "continue" }

Expand Down
2 changes: 2 additions & 0 deletions packages/opencode/src/session/message-v2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export const ContextOverflowError = NamedError.create(
z.object({ message: z.string(), responseBody: z.string().optional() }),
)
export const InvalidOutputError = NamedError.create("InvalidOutputError", z.object({ message: z.string() }))
export const TextToolCallError = NamedError.create("TextToolCallError", z.object({ message: z.string() }))
export const ContentFilterError = NamedError.create("ContentFilterError", z.object({ message: z.string() }))
export const ModelError = NamedError.create("ModelError", z.object({ message: z.string() }))

Expand Down Expand Up @@ -445,6 +446,7 @@ export const Assistant = Base.extend({
StructuredOutputError.Schema,
ContextOverflowError.Schema,
InvalidOutputError.Schema,
TextToolCallError.Schema,
ContentFilterError.Schema,
ModelError.Schema,
APIError.Schema,
Expand Down
81 changes: 81 additions & 0 deletions packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ const PREDICT_NUDGE = `Based on the conversation above, write the user's most li

const OUTPUT_LENGTH_CONTINUATION_LIMIT = Flag.MIMOCODE_OUTPUT_LENGTH_CONTINUATION_LIMIT
const INVALID_OUTPUT_CONTINUATION_LIMIT = Flag.MIMOCODE_INVALID_OUTPUT_CONTINUATION_LIMIT
const TEXT_TOOL_CALL_RETRY_LIMIT = Flag.MIMOCODE_TEXT_TOOL_CALL_RETRY_LIMIT

const log = Log.create({ service: "session.prompt" })

Expand Down Expand Up @@ -1803,6 +1804,10 @@ NOTE: At any point in time through this workflow you should feel free to ask the
// 与 invalidContinuations(generic invalid)分离,互不污染。局部于 runLoop,
// 新一轮用户 turn 自动归零。
let structuredRetries = 0
// Bounded retries for text-form tool calls (model wrote a tool call as
// prose text instead of a structured tool_use). Local to runLoop so each
// fresh user turn starts clean.
let textToolCallRetries = 0
const resolvedAgentID = agentID ?? "main"
// Tracks plugin-driven cancellation (session.pre OR any session.userQuery.pre)
// so session.post reports outcome="cancelled" instead of "error".
Expand Down Expand Up @@ -2191,6 +2196,69 @@ NOTE: At any point in time through this workflow you should feel free to ask the
return true
})

// Text-form tool call recovery. The model serialized a tool call as prose
// text instead of a structured tool_use (a degraded state under large
// context). The bad assistant turn is DISCARDED from history by setting
// assistant.error (toModelMessages skips a message whose info.error is
// set, message-v2.ts), so it can neither strand the conversation on an
// assistant turn (provider prefill rejection) nor poison later context.
// We then retry the request (caller does `continue`, no new message). On
// exhaustion the error stays terminal. Returns true ⇒ continue; false ⇒ break.
const autoRetryTextToolCall = Effect.fn("SessionPrompt.autoRetryTextToolCall")(function* (input: {
lastUser: MessageV2.User
assistant: MessageV2.Assistant
}) {
// Already discarded on a prior pass — let classify fall through to
// `failed` instead of re-detecting and burning another retry.
if (input.assistant.error) return false
// Discard the bad turn from request history: toModelMessages skips a
// message whose info.error is set, so it can neither strand the
// conversation on an assistant turn nor poison later context.
input.assistant.error = new MessageV2.TextToolCallError({
message: "Model emitted a tool call as text instead of a structured tool call.",
}).toObject()
yield* sessions.updateMessage(input.assistant)
if (textToolCallRetries >= TEXT_TOOL_CALL_RETRY_LIMIT) {
yield* bus.publish(Session.Event.Error, {
sessionID: input.assistant.sessionID,
error: input.assistant.error,
})
return false
}
textToolCallRetries++
yield* slog.info("retrying text-form tool call", { attempt: textToolCallRetries })
// Append a synthetic user turn so the discarded assistant becomes stale
// (classify staleness guard) AND the loop reaches generation — mirrors
// autoRetryStructuredOutput. Without this the loop re-enters, re-detects
// the same turn, and burns retries with zero model calls.
const msg = yield* sessions.updateMessage({
id: MessageID.ascending(),
role: "user" as const,
sessionID: input.lastUser.sessionID,
agentID: input.lastUser.agentID,
agent: input.lastUser.agent,
model: input.lastUser.model,
tools: input.lastUser.tools,
format: input.lastUser.format,
time: { created: Date.now() },
})
yield* sessions.updatePart({
id: PartID.ascending(),
messageID: msg.id,
sessionID: msg.sessionID,
type: "text",
synthetic: true,
text: [
"<system-reminder>",
"Your previous response wrote a tool call as plain text instead of invoking the tool.",
"Re-issue it through the real tool channel — emit a structured tool call, not text.",
"Do not paste the tool call as text again.",
"</system-reminder>",
].join("\n"),
} satisfies MessageV2.TextPart)
return true
})

// json_schema mode but the model never produced structured output (plain
// text stop, empty, think-only, or any other non-tool terminal). Retry up
// to lastUser.format.retryCount with a repair nudge; on exhaustion write a
Expand Down Expand Up @@ -2434,6 +2502,11 @@ NOTE: At any point in time through this workflow you should feel free to ask the
yield* slog.info("exiting loop", { classification: classification.type, reason: classification.reason })
break
}
if (classification.type === "text-tool-call") {
if (yield* autoRetryTextToolCall({ lastUser, assistant: lastAssistant })) continue
yield* slog.info("exiting loop", { classification: classification.type })
break
}
if (classification.type === "think-only" || classification.type === "invalid") {
const reason = classification.type === "invalid" ? classification.reason : "think-only"
if (yield* autoContinueInvalidOutput({ lastUser, assistant: lastAssistant, reason })) continue
Expand Down Expand Up @@ -2969,6 +3042,10 @@ NOTE: At any point in time through this workflow you should feel free to ask the
yield* writeModelError({ assistant: handle.message, reason: forkClassification.reason })
return "break" as const
}
if (forkClassification.type === "text-tool-call") {
if (yield* autoRetryTextToolCall({ lastUser, assistant: handle.message })) return "continue" as const
return "break" as const
}
if (forkClassification.type !== "continue" && !handle.message.error && format.type === "json_schema") {
if (yield* autoRetryStructuredOutput({ lastUser, assistant: handle.message }))
return "continue" as const
Expand Down Expand Up @@ -3180,6 +3257,10 @@ NOTE: At any point in time through this workflow you should feel free to ask the
yield* writeModelError({ assistant: handle.message, reason: classification.reason })
return "break" as const
}
if (classification.type === "text-tool-call") {
if (yield* autoRetryTextToolCall({ lastUser, assistant: handle.message })) return "continue" as const
return "break" as const
}
if (classification.type !== "continue" && !handle.message.error && format.type === "json_schema") {
if (yield* autoRetryStructuredOutput({ lastUser, assistant: handle.message })) return "continue" as const
return "break" as const
Expand Down
55 changes: 55 additions & 0 deletions packages/opencode/test/session/classify.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -332,4 +332,59 @@ describe("classifyAssistantStep", () => {
})
expect(result).toEqual({ type: "failed", reason: "APIError" })
})

describe("text-form tool call", () => {
test("finish=tool-calls + no tool part + tool-call markup in text => text-tool-call", () => {
const result = classifyAssistantStep({
phase: "after-process",
lastUser,
assistant: assistantInfo("m-2", { finish: "tool-calls" }),
parts: [textPart("m-2", 'call\n<invoke name="bash">\n<parameter name="command">ls</parameter>\n</invoke>')],
})
expect(result.type).toBe("text-tool-call")
})

test("finish=tool-calls WITH a real tool part => continue (not text-tool-call)", () => {
const result = classifyAssistantStep({
phase: "after-process",
lastUser,
assistant: assistantInfo("m-2", { finish: "tool-calls" }),
parts: [toolPart("m-2")],
})
expect(result.type).toBe("continue")
})

test("finish=tool-calls + text without markup => continue (plain tool-calls)", () => {
const result = classifyAssistantStep({
phase: "after-process",
lastUser,
assistant: assistantInfo("m-2", { finish: "tool-calls" }),
parts: [textPart("m-2", "just some normal prose, no tool markup")],
})
expect(result.type).toBe("continue")
})

test("already-discarded turn (assistant.error set) does NOT re-detect as text-tool-call", () => {
const result = classifyAssistantStep({
phase: "after-process",
lastUser,
assistant: assistantInfo("m-2", {
finish: "tool-calls",
error: new MessageV2.TextToolCallError({ message: "discarded" }).toObject(),
}),
parts: [textPart("m-2", '<invoke name="bash"><parameter name="command">ls</parameter></invoke>')],
})
expect(result.type).not.toBe("text-tool-call")
})

test("stale turn predating current user (existing-assistant) => continue, not text-tool-call", () => {
const result = classifyAssistantStep({
phase: "existing-assistant",
lastUser: userInfo("m-3"),
assistant: assistantInfo("m-2", { finish: "tool-calls" }),
parts: [textPart("m-2", '<invoke name="bash"><parameter name="command">ls</parameter></invoke>')],
})
expect(result.type).toBe("continue")
})
})
})
124 changes: 124 additions & 0 deletions packages/opencode/test/session/prompt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,40 @@ function chat(text: string) {
})
}

// Like chat() but lets the caller pick the finish_reason. Used to simulate a
// degraded turn: content is tool-call markup TEXT while finish_reason claims
// "tool_calls" — yet no structured tool_calls field is emitted (the model
// wrote the call as prose).
function chatFinish(text: string, finishReason: string) {
const payload =
[
`data: ${JSON.stringify({
id: "chatcmpl-1",
object: "chat.completion.chunk",
choices: [{ delta: { role: "assistant" } }],
})}`,
`data: ${JSON.stringify({
id: "chatcmpl-1",
object: "chat.completion.chunk",
choices: [{ delta: { content: text } }],
})}`,
`data: ${JSON.stringify({
id: "chatcmpl-1",
object: "chat.completion.chunk",
choices: [{ delta: {}, finish_reason: finishReason }],
})}`,
"data: [DONE]",
].join("\n\n") + "\n\n"

const encoder = new TextEncoder()
return new ReadableStream<Uint8Array>({
start(ctrl) {
ctrl.enqueue(encoder.encode(payload))
ctrl.close()
},
})
}

function hanging(ready: () => void) {
const encoder = new TextEncoder()
let timer: ReturnType<typeof setTimeout> | undefined
Expand Down Expand Up @@ -412,6 +446,96 @@ describe("session.prompt regression", () => {
void server.stop(true)
}
})

test("text-form tool call is discarded and the request is regenerated", async () => {
let calls = 0
const server = Bun.serve({
port: 0,
fetch(req) {
const url = new URL(req.url)
if (!url.pathname.endsWith("/chat/completions")) {
return new Response("not found", { status: 404 })
}
calls++
// Call 1: degraded turn — tool call written as TEXT, finish "tool_calls",
// no structured tool_calls field. Call 2: clean recovery text.
const body =
calls === 1
? chatFinish(
'call\n<invoke name="bash">\n<parameter name="command">ls</parameter>\n</invoke>',
"tool_calls",
)
: chat("recovered: here is the answer")
return new Response(body, {
status: 200,
headers: { "Content-Type": "text/event-stream" },
})
},
})

try {
await using tmp = await tmpdir({
git: true,
init: async (dir) => {
await Bun.write(
path.join(dir, "mimocode.json"),
JSON.stringify({
$schema: "https://opencode.ai/config.json",
enabled_providers: ["alibaba"],
provider: {
alibaba: {
options: {
apiKey: "test-key",
baseURL: `${server.url.origin}/v1`,
},
},
},
agent: {
build: {
model: "alibaba/qwen-plus",
},
},
}),
)
},
})

await Instance.provide({
directory: tmp.path,
fn: () =>
run(
Effect.gen(function* () {
const prompt = yield* SessionPrompt.Service
const sessions = yield* Session.Service
const session = yield* sessions.create({ title: "text-tool-call retry" })
const result = yield* prompt.prompt({
sessionID: session.id,
agent: "build",
parts: [{ type: "text", text: "do something" }],
})

// Proof the retry REGENERATED: the model was called a second time
// (the original bug burned the counter with calls === 1).
expect(calls).toBe(2)
// Final answer is the recovered text, not the discarded markup.
expect(result.info.role).toBe("assistant")
expect(
result.parts.some((part) => part.type === "text" && part.text.includes("recovered")),
).toBe(true)

// The discarded degraded turn carries the TextToolCallError marker.
const msgs = yield* sessions.messages({ sessionID: session.id })
const discarded = msgs.find(
(msg) => msg.info.role === "assistant" && msg.info.error?.name === "TextToolCallError",
)
expect(discarded).toBeDefined()
}),
),
})
} finally {
void server.stop(true)
}
})
})

describe("session.prompt agent variant", () => {
Expand Down
Loading
Loading