Skip to content
6 changes: 4 additions & 2 deletions packages/core/src/render/wizard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,10 @@ interface IKeyInfo {
}

/** Map a raw keypress to a wizard action, or null to ignore. Digits jump the
* cursor and are handled by the driver (not here). */
function actionFor(
* cursor and are handled by the driver (not here). Exported so the key→action
* decode is unit-testable without a PTY (the raw-mode plumbing around it is
* not — see node-pty/Bun limits). */
export function actionFor(
str: string | undefined,
key: IKeyInfo
): IWizardAction | null {
Expand Down
70 changes: 70 additions & 0 deletions packages/core/tests/helpers/scripted-model.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import type {
IChatMessage,
IModelResponse,
IProvider,
IToolCall,
} from "../../src/inference";

/**
* A deterministic stand-in for the model seam (`IProvider`). Drives the real
* agent loop with a scripted sequence of turns so a full session — tool calls,
* gate, repair loop, final verdict — is replayable in a test without a live LLM.
*
* Each `complete()` consumes the next turn. A turn is either a fixed
* `{content, toolCalls}` or a function of the conversation so far (so a turn can
* REACT to gate feedback — e.g. "if the last message mentions the error, fix
* it"). When the script is exhausted the model yields (no content, no tool
* calls), which the loop reads as "the model is done".
*/
export interface IScriptedTurn {
content?: string;
toolCalls?: IToolCall[];
}

export type ScriptedTurn =
| IScriptedTurn
| ((messages: readonly IChatMessage[]) => IScriptedTurn);

export interface IScriptedModel extends IProvider {
/** How many times the loop has called the model. */
readonly calls: number;
}

/** Build one tool call for a scripted turn. */
export function call(name: string, args: Record<string, unknown>): IToolCall {
return { name, arguments: args };
}

export function scriptedModel(turns: readonly ScriptedTurn[]): IScriptedModel {
let idx = 0;

return {
get calls(): number {
return idx;
},

complete(messages: readonly IChatMessage[]): Promise<IModelResponse> {
// One call past the script returns the empty yield (the loop's natural
// stop). Any call beyond that means the loop failed to terminate — throw
// immediately so the test fails fast instead of hanging or hitting the
// runaway-turn backstop.
if (idx > turns.length) {
throw new Error(
`Scripted model called ${idx + 1} times, but the script has only ${turns.length} turns (loop did not terminate).`
);
}

const turn = turns[idx];

idx += 1;

const resolved: IScriptedTurn =
typeof turn === "function" ? turn(messages) : (turn ?? {});

return Promise.resolve({
content: resolved.content ?? "",
toolCalls: resolved.toolCalls ?? [],
});
},
Comment thread
agjs marked this conversation as resolved.
};
}
113 changes: 113 additions & 0 deletions packages/core/tests/helpers/session-harness.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import {
existsSync,
mkdtempSync,
readFileSync,
rmSync,
mkdirSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { dirname, join } from "node:path";
import { Session, type ILoopEvent } from "../../src/loop";
import type { PolicyMode } from "../../src/policy/policy.types";
import {
scriptedModel,
type IScriptedModel,
type ScriptedTurn,
} from "./scripted-model";

export interface IRunScriptedSession {
/** The user's first message (the task). */
task: string;
/** The model's scripted turns. */
turns: readonly ScriptedTurn[];
/** Editable scope (paths under cwd). Omit ⇒ whole temp dir is editable. */
files?: string[];
/** Gate command run when a mutating turn ends (e.g. `"true"`, `"test -f ok"`). */
accept?: string;
/** Auto-fix command run before re-validating. */
fix?: string;
/** Read-only context files. */
context?: string[];
/** Files to pre-create in the temp cwd before the session runs (path → text). */
seed?: Record<string, string>;
/** Per-send turn cap. */
maxTurns?: number;
/** Policy mode — e.g. "plan" to forbid all writes. */
policyMode?: PolicyMode;
}

export interface IScriptedSessionResult {
status: string;
turns: number;
events: ILoopEvent[];
model: IScriptedModel;
cwd: string;
/** Events of a given kind, in order. */
eventsOfKind(kind: string): ILoopEvent[];
/** Whether any event of a kind fired. */
sawKind(kind: string): boolean;
fileText(rel: string): string;
fileExists(rel: string): boolean;
}

const createdDirs: string[] = [];

/** Remove every temp dir this harness created. Call from afterAll. */
export function cleanupScriptedSessions(): void {
for (const dir of createdDirs.splice(0)) {
rmSync(dir, { recursive: true, force: true });
}
}

function seedFiles(cwd: string, seed: Record<string, string>): void {
for (const [rel, text] of Object.entries(seed)) {
const abs = join(cwd, rel);

mkdirSync(dirname(abs), { recursive: true });
writeFileSync(abs, text);
}
}

export async function runScriptedSession(
opts: IRunScriptedSession
): Promise<IScriptedSessionResult> {
const cwd = mkdtempSync(join(tmpdir(), "tsforge-e2e-"));

createdDirs.push(cwd);

if (opts.seed !== undefined) {
seedFiles(cwd, opts.seed);
}

const events: ILoopEvent[] = [];
const model = scriptedModel(opts.turns);

const session = await Session.create({
provider: model,
cwd,
files: opts.files ?? ["**/*"],
accept: opts.accept,
fix: opts.fix,
context: opts.context,
maxTurns: opts.maxTurns,
policyMode: opts.policyMode,
report: (event: ILoopEvent) => {
events.push(event);
},
});

const result = await session.send(opts.task);

return {
status: result.status,
turns: result.turns,
events,
model,
cwd,
eventsOfKind: (kind: string) => events.filter((e) => e.kind === kind),
sawKind: (kind: string) => events.some((e) => e.kind === kind),
fileText: (rel: string) => readFileSync(join(cwd, rel), "utf8"),
fileExists: (rel: string) => existsSync(join(cwd, rel)),
};
}
Loading