torque/examples/async-tools.ts at main · qforge-dev/torque · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/**
 * Async Tool Pattern Example
 *
 * This example demonstrates how to model conversations where tools
 * take time to execute (async operations). The pattern includes:
 * 1. Tool call with immediate acknowledgment
 * 2. Filler conversation while waiting
 * 3. Final result delivery
 *
 * This is useful for training LLMs to handle long-running operations
 * like web searches, API calls, or background tasks.
 */

import {
  generateDataset,
  tool,
  generatedUser,
  generatedAssistant,
  generatedToolCall,
  generatedToolCallResult,
  times,
  between,
} from "@qforge/torque";
import { createOpenAI } from "@ai-sdk/openai";
import { z } from "zod";

const apiKey = process.env.OPENAI_API_KEY;

if (!apiKey) {
  console.error("❌ ERROR: OPENAI_API_KEY not found!");
  console.log("\n📝 To add your API key:");
  console.log(
    "1. Add: OPENAI_API_KEY=your-key-here or change the apiKey variable above."
  );
  console.log("2. Run this script again\n");
  process.exit(1);
}

const openai = createOpenAI({
  apiKey,
});

// Define a search tool that takes time to execute
const searchTool = tool({
  name: "web_search",
  description: "Search the web for information",
  parameters: z.object({
    query: z.string().describe("The search query"),
    max_results: z.number().optional().describe("Maximum number of results"),
  }),
  output: z.union([
    z.object({
      results: z.array(
        z.object({
          title: z.string(),
          snippet: z.string(),
          url: z.string(),
        })
      ),
    }),
    z.string(),
  ]),
});

/*
Data-analysis variant:
const analysisTool = tool({
  name: "analyze_data",
  description: "Analyze a dataset and generate insights",
  parameters: z.object({
    dataset_url: z.string().describe("URL to the dataset"),
    analysis_type: z.enum(["descriptive", "predictive", "prescriptive"]),
  }),
  output: z.union([
    z.object({
      summary: z.string(),
      key_insights: z.array(z.string()),
      visualizations: z.array(z.string()).optional(),
    }),
    z.string(),
  ]),
});
*/

await generateDataset(
  () => [
    searchTool.toolFunction(),

    // User initiates request
    generatedUser({
      prompt: "Ask for information that would require a web search",
    }),

    // Assistant acknowledges and starts tool
    generatedAssistant({
      prompt: "Acknowledge the request and indicate starting the search",
    }),

    // Tool call
    generatedToolCall(searchTool, "search-1"),

    // Immediate acknowledgment (tool started but not complete)
    searchTool.toolCallResult("search-1", "<tool_ack />"),

    generatedAssistant({
      prompt: "Assure user the search is in progress and will take a moment",
    }),

    // Filler conversation while waiting (1-3 exchanges)
    times(between(1, 3), [
      generatedUser({
        prompt:
          "Casual conversation unrelated to the search - could be small talk, other questions, or checking in",
      }),
      generatedAssistant({
        prompt:
          "Respond naturally to the casual conversation. Don't mention the search unless user asks about it.",
      }),
    ]),

    // Final tool call with same arguments (result ready)
    generatedToolCall(searchTool, "search-1-FINAL", {
      reuseArgsFrom: "search-1",
    }),
    generatedToolCallResult(searchTool, "search-1-FINAL"),

    // Present results
    generatedAssistant({
      prompt: "Present the search results in a helpful, organized way",
    }),
    /*
      To switch to the analysis scenario, enable the tool definition above and replace
      the main body with:
        analysisTool.toolFunction(),
        generatedUser({ prompt: "Request analysis of a dataset" }),
        generatedAssistant({ prompt: "Acknowledge and start the analysis" }),
        generatedToolCall(analysisTool, "analysis-1"),
        analysisTool.toolCallResult("analysis-1", "<tool_ack />"),
        generatedAssistant({ prompt: "Explain the analysis will take some time due to dataset size" }),
        times(between(2, 4), [
          generatedUser({
            prompt:
              "Either ask about the analysis status or engage in unrelated conversation",
          }),
          generatedAssistant({
            prompt:
              "Respond appropriately - if asked about status, provide reassurance; otherwise engage naturally",
          }),
        ]),
        generatedToolCall(analysisTool, "analysis-1-FINAL", { reuseArgsFrom: "analysis-1" }),
        generatedToolCallResult(analysisTool, "analysis-1-FINAL"),
        generatedAssistant({
          prompt: "Present the analysis results with key insights highlighted",
        }),
      */
  ],
  {
    count: 5,
    model: openai("gpt-5-mini"),
    output: "data/async-search.jsonl",
    seed: 500,
    concurrency: 3,
    generationContext: {
      global: {
        messages: [
          {
            role: "system",
            content: `Keep messages natural and concise.
The filler conversation should feel realistic - not forced.
Avoid repetitive phrases like "Sure" or "Thanks" at the start of messages.`,
          },
        ],
      },
      user: {
        messages: [
          {
            role: "system",
            content:
              "User messages should be varied - sometimes patient, sometimes checking status, sometimes changing topic.",
          },
        ],
      },
    },
  }
);