Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/out-491-cli-catalog-scenario-resolution.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@outputai/cli": minor
---

CLI `start`/`run`/`test`/`dataset generate` now resolve scenarios and route execution against `--catalog`/`OUTPUT_CATALOG_ID` instead of the API server's default catalog. This removes the ~30s scenario-resolution stall in worktrees where the default catalog has no worker polling it. `workflow test` and `workflow dataset generate` also gain a `--catalog` flag (env: `OUTPUT_CATALOG_ID`), matching `list`/`start`/`run`.
84 changes: 84 additions & 0 deletions sdk/cli/src/commands/workflow/dataset/generate.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { describe, it, expect, vi, beforeEach } from 'vitest';

vi.mock( '#api/generated/api.js', () => ( {
postWorkflowRun: vi.fn()
} ) );

vi.mock( '#utils/scenario_resolver.js', () => ( {
resolveScenarioPath: vi.fn(),
getScenarioNotFoundMessage: vi.fn().mockReturnValue( 'not found' )
} ) );

vi.mock( '#utils/input_parser.js', () => ( {
parseInputFlag: vi.fn()
} ) );

vi.mock( '#services/datasets.js', () => ( {
writeDataset: vi.fn(),
resolveDefaultDatasetsDir: vi.fn().mockResolvedValue( '/datasets' ),
buildDataset: vi.fn().mockReturnValue( { name: 'basic' } ),
getExecutionTime: vi.fn().mockResolvedValue( 100 ),
extractDatasetName: vi.fn()
} ) );

describe( 'workflow dataset generate command', () => {
beforeEach( () => {
vi.clearAllMocks();
delete process.env.OUTPUT_CATALOG_ID;
} );

describe( 'command definition', () => {
it( 'binds the catalog flag to OUTPUT_CATALOG_ID', async () => {
const DatasetGenerate = ( await import( './generate.js' ) ).default;
expect( DatasetGenerate.flags ).toHaveProperty( 'catalog' );
expect( DatasetGenerate.flags.catalog.env ).toBe( 'OUTPUT_CATALOG_ID' );
expect( DatasetGenerate.flags.catalog.char ).toBe( 'c' );
} );
} );

describe( 'run()', () => {
const createCommand = async ( flagOverrides: Record<string, unknown> = {} ) => {
const DatasetGenerate = ( await import( './generate.js' ) ).default;
const { postWorkflowRun } = await import( '#api/generated/api.js' );
const { resolveScenarioPath } = await import( '#utils/scenario_resolver.js' );
const { parseInputFlag } = await import( '#utils/input_parser.js' );

const cmd = new DatasetGenerate( [ 'my_workflow' ], {} as any );
cmd.log = vi.fn();
cmd.error = vi.fn( () => {
throw new Error( 'error called' );
} ) as any;
( cmd as any ).parse = vi.fn().mockResolvedValue( {
args: { workflowName: 'my_workflow', scenario: 'basic' },
flags: { catalog: undefined, trace: undefined, name: undefined, download: false, limit: 5, input: undefined, ...flagOverrides }
} );

return {
cmd,
postWorkflowRun: vi.mocked( postWorkflowRun ),
resolveScenarioPath: vi.mocked( resolveScenarioPath ),
parseInputFlag: vi.mocked( parseInputFlag )
};
};

it( 'resolves the scenario and runs the workflow against the resolved catalog', async () => {
const { cmd, postWorkflowRun, resolveScenarioPath, parseInputFlag } = await createCommand( { catalog: 'my-catalog' } );
resolveScenarioPath.mockResolvedValue( { found: true, path: '/scenarios/basic.json', searchedPaths: [] } );
parseInputFlag.mockResolvedValue( { foo: 'bar' } as any );
postWorkflowRun.mockResolvedValue( {
data: { workflowId: 'wf-1', output: { ok: true } },
status: 200,
headers: new Headers()
} as any );

await cmd.run();

expect( resolveScenarioPath ).toHaveBeenCalledWith( 'my_workflow', 'basic', undefined, undefined, 'my-catalog' );
expect( postWorkflowRun ).toHaveBeenCalledWith(
expect.objectContaining( { workflowName: 'my_workflow', catalog: 'my-catalog' } ),
expect.anything()
);
} );
} );
} );
25 changes: 19 additions & 6 deletions sdk/cli/src/commands/workflow/dataset/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ export default class DatasetGenerate extends Command {
};

static override flags = {
catalog: Flags.string( {
char: 'c',
aliases: [ 'task-queue' ],
charAliases: [ 'q' ],
deprecateAliases: true,
description: 'Catalog name for workflow execution (defaults to OUTPUT_CATALOG_ID)',
env: 'OUTPUT_CATALOG_ID'
} ),
trace: Flags.string( {
char: 't',
description: 'Path to a local trace file to extract dataset from',
Expand Down Expand Up @@ -84,20 +92,23 @@ export default class DatasetGenerate extends Command {
args.workflowName,
args.scenario,
flags.input,
flags.name
flags.name,
flags.catalog
);
}

private async generateFromScenario(
workflowName: string,
scenario: string | undefined,
inputFlag: string | undefined,
nameOverride: string | undefined
nameOverride: string | undefined,
catalog: string | undefined
): Promise<void> {
const resolvedInput = await this.resolveScenarioInput(
workflowName,
scenario,
inputFlag
inputFlag,
catalog
);

const datasetName = nameOverride ?? scenario ?? 'dataset';
Expand All @@ -106,7 +117,8 @@ export default class DatasetGenerate extends Command {

const response = await postWorkflowRun( {
workflowName,
input: resolvedInput
input: resolvedInput,
catalog
}, {
config: { timeout: 600000 }
} );
Expand Down Expand Up @@ -199,7 +211,8 @@ export default class DatasetGenerate extends Command {
private async resolveScenarioInput(
workflowName: string,
scenario: string | undefined,
inputFlag: string | undefined
inputFlag: string | undefined,
catalog: string | undefined
): Promise<unknown> {
if ( inputFlag && scenario ) {
return ux.error(
Expand All @@ -213,7 +226,7 @@ export default class DatasetGenerate extends Command {
}

if ( scenario ) {
const resolution = await resolveScenarioPath( workflowName, scenario );
const resolution = await resolveScenarioPath( workflowName, scenario, undefined, undefined, catalog );
if ( !resolution.found ) {
return ux.error(
getScenarioNotFoundMessage( workflowName, scenario, resolution.searchedPaths ),
Expand Down
23 changes: 23 additions & 0 deletions sdk/cli/src/commands/workflow/run.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ describe( 'workflow run command', () => {

await cmd.run();

expect( resolveInput ).toHaveBeenCalledWith( 'my_workflow', undefined, undefined, 'run', undefined );
expect( postWorkflowRun ).toHaveBeenCalledTimes( 1 );
expect( postWorkflowRun ).toHaveBeenCalledWith(
{ workflowName: 'my_workflow', input: { key: 'value' }, catalog: undefined },
Expand All @@ -86,6 +87,28 @@ describe( 'workflow run command', () => {
expect( cmd.log ).toHaveBeenCalledWith( expect.stringMatching( /\n/ ) );
} );

it( 'threads the resolved catalog to resolveInput and postWorkflowRun', async () => {
const { cmd, postWorkflowRun, resolveInput } = await createCommand();
( cmd as any ).parse = vi.fn().mockResolvedValue( {
args: { workflowName: 'my_workflow', scenario: 'basic' },
flags: { input: undefined, catalog: 'my-catalog', format: 'text' }
} );
resolveInput.mockResolvedValue( { key: 'value' } );
postWorkflowRun.mockResolvedValue( {
data: { status: 'completed', result: {} },
status: 200,
headers: new Headers()
} as any );

await cmd.run();

expect( resolveInput ).toHaveBeenCalledWith( 'my_workflow', 'basic', undefined, 'run', 'my-catalog' );
expect( postWorkflowRun ).toHaveBeenCalledWith(
expect.objectContaining( { catalog: 'my-catalog' } ),
expect.anything()
);
} );

it( 'retries when response has Retry-After and succeeds on second attempt', async () => {
const { cmd, postWorkflowRun, resolveInput } = await createCommand();
resolveInput.mockResolvedValue( {} );
Expand Down
2 changes: 1 addition & 1 deletion sdk/cli/src/commands/workflow/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ export default class WorkflowRun extends Command {
async run(): Promise<WorkflowResultResponse> {
const { args, flags } = await this.parse( WorkflowRun );

const input = await resolveInput( args.workflowName, args.scenario, flags.input, 'run' );
const input = await resolveInput( args.workflowName, args.scenario, flags.input, 'run', flags.catalog );

this.log( `Executing workflow: ${args.workflowName}...` );

Expand Down
68 changes: 66 additions & 2 deletions sdk/cli/src/commands/workflow/start.spec.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { describe, it, expect, vi, beforeEach } from 'vitest';

vi.mock( '../../api/generated/api.js', () => ( {
vi.mock( '#api/generated/api.js', () => ( {
postWorkflowStart: vi.fn()
} ) );

vi.mock( '#utils/resolve_input.js', () => ( {
resolveInput: vi.fn()
} ) );

describe( 'workflow start command', () => {
beforeEach( () => {
beforeEach( async () => {
vi.clearAllMocks();
delete process.env.OUTPUT_CATALOG_ID;
const { resolveInput } = await import( '#utils/resolve_input.js' );
vi.mocked( resolveInput ).mockResolvedValue( {} );
} );

describe( 'command definition', () => {
Expand All @@ -30,5 +37,62 @@ describe( 'workflow start command', () => {
expect( WorkflowStart.args ).toHaveProperty( 'scenario' );
expect( WorkflowStart.args.scenario.required ).toBe( false );
} );

it( 'binds the catalog flag to OUTPUT_CATALOG_ID', async () => {
const WorkflowStart = ( await import( './start.js' ) ).default;
expect( WorkflowStart.flags.catalog.env ).toBe( 'OUTPUT_CATALOG_ID' );
expect( WorkflowStart.flags.catalog.char ).toBe( 'c' );
} );
} );

describe( 'run()', () => {
const createCommand = async ( flagOverrides: Record<string, unknown> = {} ) => {
const WorkflowStart = ( await import( './start.js' ) ).default;
const { postWorkflowStart } = await import( '#api/generated/api.js' );
const { resolveInput } = await import( '#utils/resolve_input.js' );

const cmd = new WorkflowStart( [ 'my_workflow' ], {} as any );
cmd.log = vi.fn();
cmd.error = vi.fn( () => {
throw new Error( 'error called' );
} ) as any;
( cmd as any ).parse = vi.fn().mockResolvedValue( {
args: { workflowName: 'my_workflow', scenario: undefined },
flags: { input: undefined, catalog: undefined, ...flagOverrides }
} );

return { cmd, postWorkflowStart: vi.mocked( postWorkflowStart ), resolveInput: vi.mocked( resolveInput ) };
};

it( 'threads the resolved catalog to resolveInput and postWorkflowStart', async () => {
const { cmd, postWorkflowStart, resolveInput } = await createCommand( { catalog: 'my-catalog' } );
resolveInput.mockResolvedValue( { key: 'value' } );
postWorkflowStart.mockResolvedValue( {
data: { workflowId: 'wf-123' },
status: 200,
headers: new Headers()
} as any );

await cmd.run();

expect( resolveInput ).toHaveBeenCalledWith( 'my_workflow', undefined, undefined, 'start', 'my-catalog' );
expect( postWorkflowStart ).toHaveBeenCalledWith(
expect.objectContaining( { workflowName: 'my_workflow', catalog: 'my-catalog' } )
);
} );

it( 'passes undefined catalog through when none is set', async () => {
const { cmd, postWorkflowStart, resolveInput } = await createCommand();
resolveInput.mockResolvedValue( {} );
postWorkflowStart.mockResolvedValue( {
data: { workflowId: 'wf-123' },
status: 200,
headers: new Headers()
} as any );

await cmd.run();

expect( resolveInput ).toHaveBeenCalledWith( 'my_workflow', undefined, undefined, 'start', undefined );
} );
} );
} );
2 changes: 1 addition & 1 deletion sdk/cli/src/commands/workflow/start.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ export default class WorkflowStart extends Command {
async run(): Promise<void> {
const { args, flags } = await this.parse( WorkflowStart );

const input = await resolveInput( args.workflowName, args.scenario, flags.input, 'start' );
const input = await resolveInput( args.workflowName, args.scenario, flags.input, 'start', flags.catalog );

this.log( `Starting workflow: ${args.workflowName}...` );

Expand Down
53 changes: 53 additions & 0 deletions sdk/cli/src/commands/workflow/test_eval.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,19 @@ vi.mock( '#api/generated/api.js', () => ( {
postWorkflowRun: vi.fn()
} ) );

vi.mock( '#api/workflow_catalog.js', () => ( {
fetchWorkflowCatalog: vi.fn()
} ) );

vi.mock( '#services/datasets.js', () => ( {
readAllDatasets: vi.fn(),
writeDataset: vi.fn()
} ) );

vi.mock( '#utils/eval_diagnostics.js', () => ( {
diagnoseMissingEvalWorkflow: vi.fn().mockResolvedValue( 'missing eval workflow' )
} ) );

const passingOutput: EvalOutput = {
cases: [ { datasetName: 'd1', verdict: 'pass', evaluators: [] } ],
summary: { total: 1, passed: 1, partial: 0, failed: 0, acceptableRate: 1 }
Expand All @@ -31,10 +39,16 @@ describe( 'workflow test command', () => {
process.exitCode = undefined;

const { readAllDatasets } = await import( '#services/datasets.js' );
const { fetchWorkflowCatalog } = await import( '#api/workflow_catalog.js' );
vi.mocked( readAllDatasets ).mockResolvedValue( {
datasets: [ { name: 'd1', input: {}, last_output: { output: {}, date: '2026-01-01' } } as any ],
dir: '/tmp/datasets'
} );
// Catalog includes both eval names so ensureEvalWorkflowRegistered passes deterministically.
vi.mocked( fetchWorkflowCatalog ).mockResolvedValue( [
{ name: getEvalWorkflowName( 'simple' ) },
{ name: getEvalWorkflowName( 'my_workflow' ) }
] as any );
} );

afterEach( () => {
Expand All @@ -46,6 +60,13 @@ describe( 'workflow test command', () => {
const WorkflowTest = ( await import( './test_eval.js' ) ).default;
expect( WorkflowTest.enableJsonFlag ).toBe( true );
} );

it( 'binds the catalog flag to OUTPUT_CATALOG_ID', async () => {
const WorkflowTest = ( await import( './test_eval.js' ) ).default;
expect( WorkflowTest.flags ).toHaveProperty( 'catalog' );
expect( WorkflowTest.flags.catalog.env ).toBe( 'OUTPUT_CATALOG_ID' );
expect( WorkflowTest.flags.catalog.char ).toBe( 'c' );
} );
} );

describe( 'run()', () => {
Expand Down Expand Up @@ -105,5 +126,37 @@ describe( 'workflow test command', () => {
expect( result ).toEqual( failingOutput );
expect( process.exitCode ).toBe( 1 );
} );

it( 'routes registration, dataset runs, and the eval run to the resolved catalog', async () => {
const WorkflowTest = ( await import( './test_eval.js' ) ).default;
const { postWorkflowRun } = await import( '#api/generated/api.js' );
const { fetchWorkflowCatalog } = await import( '#api/workflow_catalog.js' );

const cmd = new WorkflowTest( [ 'my_workflow' ], {} as any );
cmd.log = vi.fn();
( cmd as any ).jsonEnabled = vi.fn().mockReturnValue( false );
( cmd as any ).parse = vi.fn().mockResolvedValue( {
args: { workflowName: 'my_workflow' },
flags: { catalog: 'my-catalog', cached: false, save: false, dataset: undefined }
} );

vi.mocked( postWorkflowRun )
.mockResolvedValueOnce( { data: { output: {} }, status: 200, headers: new Headers() } as any )
.mockResolvedValueOnce( { data: { output: passingOutput }, status: 200, headers: new Headers() } as any );

await cmd.run();

expect( vi.mocked( fetchWorkflowCatalog ) ).toHaveBeenCalledWith( 'my-catalog' );
expect( postWorkflowRun ).toHaveBeenNthCalledWith(
1,
expect.objectContaining( { workflowName: 'my_workflow', catalog: 'my-catalog' } ),
expect.anything()
);
expect( postWorkflowRun ).toHaveBeenNthCalledWith(
2,
expect.objectContaining( { workflowName: getEvalWorkflowName( 'my_workflow' ), catalog: 'my-catalog' } ),
expect.anything()
);
} );
} );
} );
Loading
Loading