From 0933ae7a16f03e84a34f661c190b5ea5622e6015 Mon Sep 17 00:00:00 2001 From: dvlin Date: Tue, 10 Feb 2026 02:58:20 +0800 Subject: [PATCH 01/16] feat(anyhunt/video-transcript): implement dual-mode transcript pipeline add LOCAL+CLOUD_FALLBACK workers, queue orchestration, runtime switch, and budget guard add console/admin pages, prisma migration, local deployment script, and deployment checklist docs --- CLAUDE.md | 15 +- apps/anyhunt/admin/www/CLAUDE.md | 29 +- apps/anyhunt/admin/www/src/App.tsx | 4 + .../www/src/components/layout/main-layout.tsx | 2 + apps/anyhunt/admin/www/src/features/CLAUDE.md | 29 +- .../admin/www/src/features/queues/types.ts | 2 +- .../www/src/features/video-transcripts/api.ts | 53 ++ .../src/features/video-transcripts/hooks.ts | 69 ++ .../src/features/video-transcripts/index.ts | 3 + .../src/features/video-transcripts/types.ts | 135 ++++ apps/anyhunt/admin/www/src/lib/api-paths.ts | 1 + .../admin/www/src/pages/QueuesPage.tsx | 6 +- .../www/src/pages/VideoTranscriptsPage.tsx | 511 ++++++++++++ apps/anyhunt/console/CLAUDE.md | 30 +- apps/anyhunt/console/src/App.tsx | 2 + .../src/components/layout/app-sidebar.tsx | 1 + apps/anyhunt/console/src/features/CLAUDE.md | 32 +- .../video-transcript-playground/api.ts | 43 + .../video-transcript-playground/hooks.ts | 60 ++ .../video-transcript-playground/index.ts | 4 + .../video-transcript-playground/schemas.ts | 15 + .../video-transcript-playground/types.ts | 49 ++ apps/anyhunt/console/src/lib/api-paths.ts | 1 + .../src/pages/VideoTranscriptPage.test.tsx | 29 + .../console/src/pages/VideoTranscriptPage.tsx | 368 +++++++++ apps/anyhunt/server/CLAUDE.md | 76 +- apps/anyhunt/server/prisma/main/CLAUDE.md | 1 + .../migration.sql | 53 ++ apps/anyhunt/server/prisma/main/schema.prisma | 42 + .../video-transcript/setup-local-worker.sh | 380 +++++++++ apps/anyhunt/server/src/admin/CLAUDE.md | 2 + .../server/src/admin/admin-queue.service.ts | 13 + apps/anyhunt/server/src/app.module.ts | 2 + .../server/src/queue/queue.constants.ts | 17 + apps/anyhunt/server/src/queue/queue.module.ts | 9 +- .../server/src/video-transcript/AGENTS.md | 1 + .../server/src/video-transcript/CLAUDE.md | 67 ++ ...ranscript-cloud-fallback.processor.spec.ts | 164 ++++ ...ranscript-fallback-scanner.service.spec.ts | 59 ++ .../video-transcript-local.processor.spec.ts | 109 +++ .../video-transcript.service.spec.ts | 255 ++++++ .../server/src/video-transcript/dto/index.ts | 1 + .../dto/video-transcript.schema.ts | 35 + .../server/src/video-transcript/index.ts | 4 + .../video-transcript-admin.controller.ts | 85 ++ .../video-transcript-admin.service.ts | 422 ++++++++++ .../video-transcript-artifact.service.ts | 140 ++++ .../video-transcript-budget.service.ts | 166 ++++ ...deo-transcript-cloud-fallback.processor.ts | 334 ++++++++ .../video-transcript-command.service.ts | 101 +++ .../video-transcript-executor.service.ts | 489 ++++++++++++ ...deo-transcript-fallback-scanner.service.ts | 60 ++ .../video-transcript-heartbeat.service.ts | 145 ++++ .../video-transcript-local.processor.ts | 198 +++++ ...video-transcript-runtime-config.service.ts | 79 ++ .../video-transcript.constants.ts | 103 +++ .../video-transcript.controller.ts | 86 ++ .../video-transcript.errors.ts | 63 ++ .../video-transcript.module.ts | 70 ++ .../video-transcript.service.ts | 348 +++++++++ .../video-transcript.types.ts | 86 ++ docs/CLAUDE.md | 23 + docs/architecture/CLAUDE.md | 14 + .../anyhunt-video-transcript-pipeline.md | 733 ++++++++++++++++++ docs/index.md | 3 +- 65 files changed, 6435 insertions(+), 96 deletions(-) create mode 100644 apps/anyhunt/admin/www/src/features/video-transcripts/api.ts create mode 100644 apps/anyhunt/admin/www/src/features/video-transcripts/hooks.ts create mode 100644 apps/anyhunt/admin/www/src/features/video-transcripts/index.ts create mode 100644 apps/anyhunt/admin/www/src/features/video-transcripts/types.ts create mode 100644 apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx create mode 100644 apps/anyhunt/console/src/features/video-transcript-playground/api.ts create mode 100644 apps/anyhunt/console/src/features/video-transcript-playground/hooks.ts create mode 100644 apps/anyhunt/console/src/features/video-transcript-playground/index.ts create mode 100644 apps/anyhunt/console/src/features/video-transcript-playground/schemas.ts create mode 100644 apps/anyhunt/console/src/features/video-transcript-playground/types.ts create mode 100644 apps/anyhunt/console/src/pages/VideoTranscriptPage.test.tsx create mode 100644 apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx create mode 100644 apps/anyhunt/server/prisma/main/migrations/20260209001000_add_video_transcript_task/migration.sql create mode 100755 apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh create mode 120000 apps/anyhunt/server/src/video-transcript/AGENTS.md create mode 100644 apps/anyhunt/server/src/video-transcript/CLAUDE.md create mode 100644 apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts create mode 100644 apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-fallback-scanner.service.spec.ts create mode 100644 apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-local.processor.spec.ts create mode 100644 apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts create mode 100644 apps/anyhunt/server/src/video-transcript/dto/index.ts create mode 100644 apps/anyhunt/server/src/video-transcript/dto/video-transcript.schema.ts create mode 100644 apps/anyhunt/server/src/video-transcript/index.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-admin.controller.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-admin.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-artifact.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-command.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-executor.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-fallback-scanner.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-heartbeat.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-local.processor.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript-runtime-config.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript.constants.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript.controller.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript.errors.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript.module.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript.service.ts create mode 100644 apps/anyhunt/server/src/video-transcript/video-transcript.types.ts create mode 100644 docs/architecture/anyhunt-video-transcript-pipeline.md diff --git a/CLAUDE.md b/CLAUDE.md index 00337d10b..0fda9f17f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,7 +1,20 @@ # Anyhunt 统一平台 > 本文档是 AI Agent 的核心指南。遵循 [agents.md 规范](https://agents.md/)。 +> 最近更新:2026-02-09(Anyhunt 视频转写补充本地一键部署脚本:`apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`) +> 最近更新:2026-02-09(Anyhunt 视频转写补充三节点部署定案(公网简化版):VPS1 API + VPS2 cloud fallback worker + Mac mini local worker) +> 最近更新:2026-02-09(Anyhunt 视频转写四轮可靠性修复:cloud 接管后 workspace 失败兜底、local 启动顺序严格对齐 10 分钟窗口起点、duration probe 解析增强、补充对应回归测试) +> 最近更新:2026-02-09(Anyhunt 视频转写三轮可靠性修复:timeout pre-check 不误杀 local、local fallback-check 调度失败降级、scanner 单角色启用、cloud duration probe 提前 preempt) +> 最近更新:2026-02-09(Anyhunt 视频转写二轮可靠性修复:fallback 补偿扫描(30s)+ DB 时间裁决 + Admin today 指标与 runtime switch 审计) +> 最近更新:2026-02-09(Anyhunt 视频转写方案执行进度同步:Step 1~6 已完成代码落地(server + console + admin + 单测),Step 7 待压测/上线演练) > 最近更新:2026-02-08(消息列表自动滚动:Following 模式定稿;runStart 一次 smooth + `160ms` 入场动效;AI 流式追随使用 `auto`;禁用 `overflow-anchor`;移除 `packages/ui/src/ai/assistant-ui` 目录) +> 最近更新:2026-02-08(Anyhunt 视频转写事件一致性定案:QueueEvents 仅观测,DB 字段裁决超时/接管;fallback 到点查库;新增执行时序图) +> 最近更新:2026-02-08(Anyhunt 视频转写部署交互定案:Queue Pull;VPS Dokploy 双服务;Mac mini `launchd` 常驻;Tailscale 内网边界;`VIDEO_TRANSCRIPT_LOCAL_ENABLED` 应急切换) +> 最近更新:2026-02-08(Anyhunt 视频转写方案升级:仅 local 开始后计时 10 分钟;超时先 preempt local 再 cloud fallback;预算 20 USD/日(Asia/Shanghai,按音频时长估算);cloud 重试 2 次 + 告警阈值) +> 最近更新:2026-02-08(Anyhunt 视频转写方案定案:固定技术路线 + 分步执行计划 + 强制进度同步准则) +> 最近更新:2026-02-08(Anyhunt 视频转写方案文档收敛:字段最小化 + 复用现有 server 模块 + Console 测试页设计) +> 最近更新:2026-02-08(Anyhunt 视频链接下载 + Mac mini Whisper 转写方案按现有 server 模块复用重写,收敛为 `artifacts Json` 建模) +> 最近更新:2026-02-08(新增 Anyhunt 视频链接下载 + Mac mini Whisper 转写架构方案,目标落地到 apps/anyhunt/server) > 最近更新:2026-02-07(协作总则:补充“最佳实践优先/允许破坏性重构”行为准则) > 最近更新:2026-01-27(CI:Build 限制 Turbo 并发与 Node heap,降低 8C8G 机器 OOM 概率) > 最近更新:2026-02-01(图标库回退 Lucide,移除 Hugeicons 依赖并统一调用方式) @@ -236,6 +249,7 @@ Anyhunt/ | [`docs/architecture/auth/moryflow-pc-mobile-access-token-upgrade.md`](./docs/architecture/auth/moryflow-pc-mobile-access-token-upgrade.md) | Moryflow PC/Mobile Access Token 持久化升级方案 | | [`docs/architecture/api-client-unification.md`](./docs/architecture/api-client-unification.md) | API Client 统一封装方案(Anyhunt + Moryflow) | | [`docs/architecture/anyhunt-console-public-api-key-plan.md`](./docs/architecture/anyhunt-console-public-api-key-plan.md) | Anyhunt Console 公共 API 化与 API Key 明文存储方案 | +| [`docs/architecture/anyhunt-video-transcript-pipeline.md`](./docs/architecture/anyhunt-video-transcript-pipeline.md) | 视频链接下载与双模式高可用转写方案(Anyhunt) | | [`docs/architecture/domains-and-deployment.md`](./docs/architecture/domains-and-deployment.md) | 域名与三机部署架构(megaboxpro/4c6g/8c16g + OAuth 登录) | | [`docs/architecture/ui-message-list-unification.md`](./docs/architecture/ui-message-list-unification.md) | 消息列表与输入框 UI 组件抽离方案(Moryflow/Anyhunt 统一) | | [`docs/architecture/ui-message-list-turn-anchor-adoption.md`](./docs/architecture/ui-message-list-turn-anchor-adoption.md) | Moryflow PC 消息列表交互复用改造方案(Following 模式) | @@ -802,4 +816,3 @@ pnpm typecheck --- _版本: 1.2 | 更新日期: 2026-02-07_ - diff --git a/apps/anyhunt/admin/www/CLAUDE.md b/apps/anyhunt/admin/www/CLAUDE.md index 4acd51eeb..3c01ba284 100644 --- a/apps/anyhunt/admin/www/CLAUDE.md +++ b/apps/anyhunt/admin/www/CLAUDE.md @@ -8,6 +8,8 @@ Anyhunt Dev 管理后台,用于系统监控与运营管理,需管理员权 ## 最近更新 +- 新增 Video Transcripts 页面(`/video-transcripts`):展示执行概览、today 指标、local 节点资源、预算闸门与任务列表 +- Video Transcripts 新增运行时开关(`VIDEO_TRANSCRIPT_LOCAL_ENABLED`)与审计记录展示 - LLM Model 弹窗修复 Raw config 标签使用 Label,避免 useFormField 上下文报错 - 管理后台下拉/折叠箭头改为 ChevronDown(无中轴) - 管理后台图标回退 Lucide,移除 Hugeicons 依赖并统一调用方式 @@ -74,19 +76,20 @@ Anyhunt Dev 管理后台,用于系统监控与运营管理,需管理员权 ## 功能列表 -| 功能 | 路径 | 说明 | -| ----------------- | ----------------- | -------------------------- | -| `dashboard/` | `/` | 系统概览与统计 | -| `users/` | `/users` | 用户管理 | -| `subscriptions/` | `/subscriptions` | Subscription list | -| `orders/` | `/orders` | Order history | -| `jobs/` | `/jobs` | Crawl/batch job monitoring | -| `queues/` | `/queues` | BullMQ queue status | -| `browser/` | `/browser` | Browser pool instances | -| `digest-topics/` | `/digest/topics` | Digest Topics 精选管理 | -| `digest-reports/` | `/digest/reports` | Digest 举报管理 | -| `digest-welcome/` | `/digest/welcome` | Digest Welcome 配置与页面 | -| `llm/` | `/llm` | LLM Providers/Models 配置 | +| 功能 | 路径 | 说明 | +| -------------------- | -------------------- | -------------------------- | +| `dashboard/` | `/` | 系统概览与统计 | +| `users/` | `/users` | 用户管理 | +| `subscriptions/` | `/subscriptions` | Subscription list | +| `orders/` | `/orders` | Order history | +| `jobs/` | `/jobs` | Crawl/batch job monitoring | +| `queues/` | `/queues` | BullMQ queue status | +| `video-transcripts/` | `/video-transcripts` | 视频转写执行/配置/资源看板 | +| `browser/` | `/browser` | Browser pool instances | +| `digest-topics/` | `/digest/topics` | Digest Topics 精选管理 | +| `digest-reports/` | `/digest/reports` | Digest 举报管理 | +| `digest-welcome/` | `/digest/welcome` | Digest Welcome 配置与页面 | +| `llm/` | `/llm` | LLM Providers/Models 配置 | ## Feature Module Structure diff --git a/apps/anyhunt/admin/www/src/App.tsx b/apps/anyhunt/admin/www/src/App.tsx index ab3249d03..1f9590694 100644 --- a/apps/anyhunt/admin/www/src/App.tsx +++ b/apps/anyhunt/admin/www/src/App.tsx @@ -19,6 +19,7 @@ import OrdersPage from './pages/OrdersPage'; import JobsPage from './pages/JobsPage'; import JobDetailPage from './pages/JobDetailPage'; import QueuesPage from './pages/QueuesPage'; +import VideoTranscriptsPage from './pages/VideoTranscriptsPage'; import ErrorsPage from './pages/ErrorsPage'; import BrowserPage from './pages/BrowserPage'; import DigestReportsPage from './pages/DigestReportsPage'; @@ -102,6 +103,9 @@ function App() { {/* Queues - 队列监控 */} } /> + {/* Video Transcripts - 双模式转写可观测 */} + } /> + {/* Errors - 错误分析 */} } /> diff --git a/apps/anyhunt/admin/www/src/components/layout/main-layout.tsx b/apps/anyhunt/admin/www/src/components/layout/main-layout.tsx index 95645cb20..5a4d6fec2 100644 --- a/apps/anyhunt/admin/www/src/components/layout/main-layout.tsx +++ b/apps/anyhunt/admin/www/src/components/layout/main-layout.tsx @@ -24,6 +24,7 @@ import { Flag, Newspaper, Pencil, + Clapperboard, type LucideIcon, } from 'lucide-react'; import { cn } from '@anyhunt/ui/lib'; @@ -57,6 +58,7 @@ const navGroups: NavGroup[] = [ items: [ { path: '/jobs', label: 'Jobs', icon: ListTodo }, { path: '/queues', label: 'Queues', icon: Layers }, + { path: '/video-transcripts', label: 'Video Transcripts', icon: Clapperboard }, { path: '/browser', label: 'Browser Pool', icon: Globe }, { path: '/errors', label: 'Errors', icon: TriangleAlert }, ], diff --git a/apps/anyhunt/admin/www/src/features/CLAUDE.md b/apps/anyhunt/admin/www/src/features/CLAUDE.md index 0b72d70f0..b00e90063 100644 --- a/apps/anyhunt/admin/www/src/features/CLAUDE.md +++ b/apps/anyhunt/admin/www/src/features/CLAUDE.md @@ -8,6 +8,8 @@ ## 最近更新 +- 新增 `video-transcripts/` 模块:对接 `/api/v1/admin/video-transcripts/*`(overview/resources/tasks/config) +- `video-transcripts/` 模块补齐 today 指标类型与运行时开关变更 mutation - Admin Features 图标回退 Lucide,移除 Hugeicons 依赖并统一调用方式 - LLM Feature:新增 presets API + model capabilities/reasoning/tiers 支持 - Feature types 与 API 返回结构改为 raw JSON + RFC7807(移除 success/data 包装) @@ -26,19 +28,20 @@ feature-name/ ## 功能清单 -| 功能 | 说明 | API 入口 | -| ----------------- | --------------------------- | ------------------------------ | -| `dashboard/` | 系统概览 | `/api/v1/admin/dashboard` | -| `users/` | 用户管理(含 Credits 充值) | `/api/v1/admin/users` | -| `subscriptions/` | 订阅管理 | `/api/v1/admin/subscriptions` | -| `orders/` | 订单管理 | `/api/v1/admin/orders` | -| `jobs/` | 任务监控 | `/api/v1/admin/jobs` | -| `queues/` | 队列监控 | `/api/v1/admin/queues` | -| `browser/` | 浏览器池状态 | `/api/v1/admin/browser` | -| `llm/` | LLM Providers/Models 配置 | `/api/v1/admin/llm/*` | -| `digest-topics/` | Digest 话题管理 | `/api/v1/admin/digest/topics` | -| `digest-reports/` | Digest 举报管理 | `/api/v1/admin/digest/reports` | -| `digest-welcome/` | Welcome 配置 | `/api/v1/admin/digest/welcome` | +| 功能 | 说明 | API 入口 | +| -------------------- | --------------------------- | ----------------------------------- | +| `dashboard/` | 系统概览 | `/api/v1/admin/dashboard` | +| `users/` | 用户管理(含 Credits 充值) | `/api/v1/admin/users` | +| `subscriptions/` | 订阅管理 | `/api/v1/admin/subscriptions` | +| `orders/` | 订单管理 | `/api/v1/admin/orders` | +| `jobs/` | 任务监控 | `/api/v1/admin/jobs` | +| `queues/` | 队列监控 | `/api/v1/admin/queues` | +| `video-transcripts/` | 视频转写可观测 | `/api/v1/admin/video-transcripts/*` | +| `browser/` | 浏览器池状态 | `/api/v1/admin/browser` | +| `llm/` | LLM Providers/Models 配置 | `/api/v1/admin/llm/*` | +| `digest-topics/` | Digest 话题管理 | `/api/v1/admin/digest/topics` | +| `digest-reports/` | Digest 举报管理 | `/api/v1/admin/digest/reports` | +| `digest-welcome/` | Welcome 配置 | `/api/v1/admin/digest/welcome` | ## 轮询刷新示例 diff --git a/apps/anyhunt/admin/www/src/features/queues/types.ts b/apps/anyhunt/admin/www/src/features/queues/types.ts index d5d1eb648..abef66fa5 100644 --- a/apps/anyhunt/admin/www/src/features/queues/types.ts +++ b/apps/anyhunt/admin/www/src/features/queues/types.ts @@ -5,7 +5,7 @@ export type { Pagination } from '@/lib/types'; import type { Pagination } from '@/lib/types'; /** 队列名称 */ -export type QueueName = 'screenshot' | 'scrape' | 'crawl' | 'batch-scrape'; +export type QueueName = string; /** 队列任务状态 */ export type QueueJobStatus = 'waiting' | 'active' | 'completed' | 'failed' | 'delayed'; diff --git a/apps/anyhunt/admin/www/src/features/video-transcripts/api.ts b/apps/anyhunt/admin/www/src/features/video-transcripts/api.ts new file mode 100644 index 000000000..e65fa11e1 --- /dev/null +++ b/apps/anyhunt/admin/www/src/features/video-transcripts/api.ts @@ -0,0 +1,53 @@ +/** + * [PROVIDES]: Admin Video Transcript API 方法 + * [DEPENDS]: apiClient, ADMIN_API + * [POS]: Admin Video Transcript API 访问层 + * + * [PROTOCOL]: 本文件变更时,需同步更新所属目录 CLAUDE.md + */ + +import { apiClient } from '@/lib/api-client'; +import { ADMIN_API } from '@/lib/api-paths'; +import type { + UpdateVideoTranscriptRuntimeConfigInput, + UpdateVideoTranscriptRuntimeConfigResponse, + VideoTranscriptRuntimeConfig, + VideoTranscriptOverview, + VideoTranscriptResources, + VideoTranscriptTaskListResponse, +} from './types'; + +export async function getVideoTranscriptOverview(): Promise { + return apiClient.get(`${ADMIN_API.VIDEO_TRANSCRIPTS}/overview`); +} + +export async function getVideoTranscriptResources(): Promise { + return apiClient.get(`${ADMIN_API.VIDEO_TRANSCRIPTS}/resources`); +} + +export async function getVideoTranscriptRuntimeConfig(): Promise { + return apiClient.get(`${ADMIN_API.VIDEO_TRANSCRIPTS}/config`); +} + +export async function updateVideoTranscriptRuntimeConfig( + input: UpdateVideoTranscriptRuntimeConfigInput +): Promise { + return apiClient.put( + `${ADMIN_API.VIDEO_TRANSCRIPTS}/config/local-enabled`, + input + ); +} + +export async function getVideoTranscriptTasks( + page = 1, + limit = 20 +): Promise { + const query = new URLSearchParams({ + page: String(page), + limit: String(limit), + }).toString(); + + return apiClient.get( + `${ADMIN_API.VIDEO_TRANSCRIPTS}/tasks?${query}` + ); +} diff --git a/apps/anyhunt/admin/www/src/features/video-transcripts/hooks.ts b/apps/anyhunt/admin/www/src/features/video-transcripts/hooks.ts new file mode 100644 index 000000000..734083fe5 --- /dev/null +++ b/apps/anyhunt/admin/www/src/features/video-transcripts/hooks.ts @@ -0,0 +1,69 @@ +/** + * [PROVIDES]: Admin Video Transcript React Query hooks + * [DEPENDS]: react-query, ./api + * [POS]: Admin Video Transcript 数据拉取 hooks + * + * [PROTOCOL]: 本文件变更时,需同步更新所属目录 CLAUDE.md + */ + +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'; +import { + getVideoTranscriptOverview, + getVideoTranscriptRuntimeConfig, + getVideoTranscriptResources, + getVideoTranscriptTasks, + updateVideoTranscriptRuntimeConfig, +} from './api'; + +export const videoTranscriptAdminKeys = { + all: ['admin', 'video-transcripts'] as const, + overview: () => [...videoTranscriptAdminKeys.all, 'overview'] as const, + resources: () => [...videoTranscriptAdminKeys.all, 'resources'] as const, + config: () => [...videoTranscriptAdminKeys.all, 'config'] as const, + tasks: (page: number, limit: number) => + [...videoTranscriptAdminKeys.all, 'tasks', page, limit] as const, +}; + +export function useVideoTranscriptOverview() { + return useQuery({ + queryKey: videoTranscriptAdminKeys.overview(), + queryFn: getVideoTranscriptOverview, + refetchInterval: 5000, + }); +} + +export function useVideoTranscriptResources() { + return useQuery({ + queryKey: videoTranscriptAdminKeys.resources(), + queryFn: getVideoTranscriptResources, + refetchInterval: 5000, + }); +} + +export function useVideoTranscriptRuntimeConfig() { + return useQuery({ + queryKey: videoTranscriptAdminKeys.config(), + queryFn: getVideoTranscriptRuntimeConfig, + refetchInterval: 5000, + }); +} + +export function useVideoTranscriptTasks(page = 1, limit = 20) { + return useQuery({ + queryKey: videoTranscriptAdminKeys.tasks(page, limit), + queryFn: () => getVideoTranscriptTasks(page, limit), + refetchInterval: 5000, + }); +} + +export function useUpdateVideoTranscriptRuntimeConfig() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: updateVideoTranscriptRuntimeConfig, + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: videoTranscriptAdminKeys.config() }); + queryClient.invalidateQueries({ queryKey: videoTranscriptAdminKeys.all }); + }, + }); +} diff --git a/apps/anyhunt/admin/www/src/features/video-transcripts/index.ts b/apps/anyhunt/admin/www/src/features/video-transcripts/index.ts new file mode 100644 index 000000000..917bef02c --- /dev/null +++ b/apps/anyhunt/admin/www/src/features/video-transcripts/index.ts @@ -0,0 +1,3 @@ +export * from './types'; +export * from './api'; +export * from './hooks'; diff --git a/apps/anyhunt/admin/www/src/features/video-transcripts/types.ts b/apps/anyhunt/admin/www/src/features/video-transcripts/types.ts new file mode 100644 index 000000000..07708a86a --- /dev/null +++ b/apps/anyhunt/admin/www/src/features/video-transcripts/types.ts @@ -0,0 +1,135 @@ +/** + * [DEFINES]: Admin Video Transcript 类型定义 + * [USED_BY]: video-transcripts api/hooks/page + * [POS]: Admin Video Transcript 可观测类型入口 + * + * [PROTOCOL]: 本文件变更时,需同步更新所属目录 CLAUDE.md + */ + +import type { Pagination } from '@/lib/types'; + +export interface VideoTranscriptBudget { + dayKey: string; + timezone: string; + usedUsd: number; + dailyBudgetUsd: number; + remainingUsd: number; +} + +export interface VideoTranscriptOverview { + total: number; + status: { + pending: number; + downloading: number; + extractingAudio: number; + transcribing: number; + uploading: number; + completed: number; + failed: number; + cancelled: number; + }; + executor: { + localCompleted: number; + cloudCompleted: number; + }; + budget: VideoTranscriptBudget; + today: { + timezone: string; + startAt: string; + endAt: string; + total: number; + completed: number; + failed: number; + cancelled: number; + successRate: number; + failureRate: number; + cloudFallbackTriggered: number; + cloudFallbackTriggerRate: number; + localCompletedWithinTimeout: number; + localWithinTimeoutRate: number; + averageDurationSec: number; + budgetGateTriggered: number; + }; +} + +export interface VideoTranscriptNode { + nodeId: string; + hostname: string; + pid: number; + cpuLoad1: number; + memoryTotal: number; + memoryFree: number; + processRss: number; + activeTasks: number; + updatedAt: string; +} + +export interface VideoTranscriptQueueMetrics { + name: string; + waiting: number; + active: number; + completed: number; + failed: number; + delayed: number; +} + +export interface VideoTranscriptResources { + queues: { + local: VideoTranscriptQueueMetrics; + cloudFallback: VideoTranscriptQueueMetrics; + }; + nodes: VideoTranscriptNode[]; + budget: VideoTranscriptBudget; + alerts: { + budgetOver80Percent: boolean; + localNodeOffline: boolean; + staleNodeIds: string[]; + }; +} + +export interface VideoTranscriptTaskItem { + id: string; + userId: string; + platform: string; + sourceUrl: string; + status: string; + executor: 'LOCAL' | 'CLOUD_FALLBACK' | null; + localStartedAt: string | null; + error: string | null; + createdAt: string; + updatedAt: string; + completedAt: string | null; +} + +export interface VideoTranscriptTaskListResponse { + items: VideoTranscriptTaskItem[]; + pagination: Pagination; +} + +export interface VideoTranscriptConfigAudit { + id: string; + actorUserId: string; + reason: string; + metadata: unknown; + createdAt: string; +} + +export interface VideoTranscriptRuntimeConfig { + localEnabled: boolean; + source: 'env' | 'override'; + overrideRaw: string | null; + audits: VideoTranscriptConfigAudit[]; +} + +export interface UpdateVideoTranscriptRuntimeConfigInput { + enabled: boolean; + reason?: string; +} + +export interface UpdateVideoTranscriptRuntimeConfigResponse { + localEnabled: boolean; + source: 'env' | 'override'; + overrideRaw: string | null; + auditLogId: string; + updatedAt: string; +} diff --git a/apps/anyhunt/admin/www/src/lib/api-paths.ts b/apps/anyhunt/admin/www/src/lib/api-paths.ts index ec21c5d8e..a40fb95d5 100644 --- a/apps/anyhunt/admin/www/src/lib/api-paths.ts +++ b/apps/anyhunt/admin/www/src/lib/api-paths.ts @@ -13,6 +13,7 @@ export const ADMIN_API = { DASHBOARD: '/api/v1/admin/dashboard', JOBS: '/api/v1/admin/jobs', QUEUES: '/api/v1/admin/queues', + VIDEO_TRANSCRIPTS: '/api/v1/admin/video-transcripts', BROWSER: '/api/v1/admin/browser', // LLM LLM_SETTINGS: '/api/v1/admin/llm/settings', diff --git a/apps/anyhunt/admin/www/src/pages/QueuesPage.tsx b/apps/anyhunt/admin/www/src/pages/QueuesPage.tsx index 3f3d38895..90e7d8341 100644 --- a/apps/anyhunt/admin/www/src/pages/QueuesPage.tsx +++ b/apps/anyhunt/admin/www/src/pages/QueuesPage.tsx @@ -55,11 +55,13 @@ import { import { useCleanupStaleJobs } from '@/features/jobs'; import type { QueueName, QueueJobStatus, QueueStats } from '@/features/queues'; -const QUEUE_LABELS: Record = { +const QUEUE_LABELS: Record = { screenshot: 'Screenshot', scrape: 'Scrape', crawl: 'Crawl', 'batch-scrape': 'Batch Scrape', + VIDEO_TRANSCRIPT_LOCAL_QUEUE: 'Video Transcript (Local)', + VIDEO_TRANSCRIPT_CLOUD_FALLBACK_QUEUE: 'Video Transcript (Cloud Fallback)', }; const STATUS_TABS: { value: QueueJobStatus; label: string; icon: React.ReactNode }[] = [ @@ -93,7 +95,7 @@ function QueueCard({ > - {QUEUE_LABELS[stats.name as QueueName] || stats.name} + {QUEUE_LABELS[stats.name] || stats.name} {isPaused && ( 已暂停 diff --git a/apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx b/apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx new file mode 100644 index 000000000..bf92420b8 --- /dev/null +++ b/apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx @@ -0,0 +1,511 @@ +/** + * [PROPS]: none + * [EMITS]: pagination/refresh actions + * [POS]: Admin 视频转写执行与资源看板 + * + * [PROTOCOL]: 本文件变更时,需同步更新 apps/anyhunt/admin/www/CLAUDE.md + */ + +import { useMemo, useState } from 'react'; +import { Cloud, Cpu, RefreshCw, Server } from 'lucide-react'; +import { toast } from 'sonner'; +import { + Badge, + Button, + Switch, + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, + PageHeader, + Progress, + Skeleton, + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@anyhunt/ui'; +import { + useVideoTranscriptOverview, + useVideoTranscriptRuntimeConfig, + useVideoTranscriptResources, + useVideoTranscriptTasks, + useUpdateVideoTranscriptRuntimeConfig, +} from '@/features/video-transcripts'; + +const PAGE_SIZE = 20; + +function formatBytes(bytes: number): string { + if (!Number.isFinite(bytes) || bytes <= 0) { + return '0 B'; + } + + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + let value = bytes; + let unitIndex = 0; + + while (value >= 1024 && unitIndex < units.length - 1) { + value /= 1024; + unitIndex += 1; + } + + return `${value.toFixed(unitIndex === 0 ? 0 : 2)} ${units[unitIndex]}`; +} + +function getStatusBadgeVariant(status: string) { + if (status === 'COMPLETED') { + return 'default'; + } + if (status === 'FAILED' || status === 'CANCELLED') { + return 'destructive'; + } + return 'secondary'; +} + +export default function VideoTranscriptsPage() { + const [page, setPage] = useState(1); + + const overviewQuery = useVideoTranscriptOverview(); + const resourcesQuery = useVideoTranscriptResources(); + const configQuery = useVideoTranscriptRuntimeConfig(); + const tasksQuery = useVideoTranscriptTasks(page, PAGE_SIZE); + const updateRuntimeConfig = useUpdateVideoTranscriptRuntimeConfig(); + + const runningCount = useMemo(() => { + if (!overviewQuery.data) { + return 0; + } + + const status = overviewQuery.data.status; + return ( + status.pending + + status.downloading + + status.extractingAudio + + status.transcribing + + status.uploading + ); + }, [overviewQuery.data]); + + const cloudFallbackRate = useMemo(() => { + if (!overviewQuery.data) { + return 0; + } + + const totalCompleted = + overviewQuery.data.executor.localCompleted + overviewQuery.data.executor.cloudCompleted; + + if (totalCompleted <= 0) { + return 0; + } + + return (overviewQuery.data.executor.cloudCompleted / totalCompleted) * 100; + }, [overviewQuery.data]); + + const budgetUsagePercent = useMemo(() => { + const budget = resourcesQuery.data?.budget; + if (!budget || budget.dailyBudgetUsd <= 0) { + return 0; + } + return Math.min((budget.usedUsd / budget.dailyBudgetUsd) * 100, 100); + }, [resourcesQuery.data?.budget]); + + const isLoading = + overviewQuery.isLoading || + resourcesQuery.isLoading || + tasksQuery.isLoading || + configQuery.isLoading; + + const handleToggleLocalEnabled = async (enabled: boolean) => { + try { + await updateRuntimeConfig.mutateAsync({ + enabled, + reason: `Admin set local enabled to ${enabled}`, + }); + toast.success(`Local routing ${enabled ? 'enabled' : 'disabled'}`); + } catch (error) { + toast.error(error instanceof Error ? error.message : 'Failed to update local routing'); + } + }; + + return ( +
+
+ + +
+ + {isLoading ? ( +
+ {[1, 2, 3, 4].map((index) => ( + + ))} +
+ ) : overviewQuery.data ? ( +
+ + + Total Tasks + {overviewQuery.data.total} + + + + + Running + {runningCount} + + + + + Completed + {overviewQuery.data.status.completed} + + + + + Cloud Fallback Rate + {cloudFallbackRate.toFixed(1)}% + + +
+ ) : null} + + {overviewQuery.data?.today ? ( + + + Today Metrics + + Success/failure, fallback trigger and SLA metrics ({overviewQuery.data.today.timezone} + ). + + + +
+

Success Rate

+

{overviewQuery.data.today.successRate}%

+
+
+

Failure Rate

+

{overviewQuery.data.today.failureRate}%

+
+
+

Fallback Trigger Rate

+

+ {overviewQuery.data.today.cloudFallbackTriggerRate}% +

+
+
+

Local Within 10m

+

+ {overviewQuery.data.today.localWithinTimeoutRate}% +

+
+
+

Avg Duration

+

+ {overviewQuery.data.today.averageDurationSec.toFixed(1)}s +

+
+
+

Budget Gate Triggered

+

+ {overviewQuery.data.today.budgetGateTriggered} +

+
+
+
+ ) : null} + + + + Local Routing Switch + + Controls whether new tasks go to local queue first. Changes are audited. + + + + {configQuery.data ? ( + <> +
+
+

VIDEO_TRANSCRIPT_LOCAL_ENABLED

+

+ Source: {configQuery.data.source} + {configQuery.data.overrideRaw ? ` (${configQuery.data.overrideRaw})` : ''} +

+
+ { + void handleToggleLocalEnabled(checked); + }} + /> +
+ + {configQuery.data.audits.length ? ( + + + + Time + Actor + Reason + + + + {configQuery.data.audits.map((audit) => ( + + {new Date(audit.createdAt).toLocaleString('zh-CN')} + {audit.actorUserId} + {audit.reason} + + ))} + +
+ ) : ( +

No audit records yet.

+ )} + + ) : ( +

No runtime config data.

+ )} +
+
+ +
+ + + + + Cloud Fallback Budget + + Daily cap and usage in configured timezone. + + + {resourcesQuery.data ? ( + <> +
+
+

Used

+

+ ${resourcesQuery.data.budget.usedUsd.toFixed(4)} / $ + {resourcesQuery.data.budget.dailyBudgetUsd.toFixed(2)} +

+
+
+

Remaining

+

+ ${resourcesQuery.data.budget.remainingUsd.toFixed(4)} +

+
+
+ +
+ {resourcesQuery.data.budget.dayKey} + {resourcesQuery.data.budget.timezone} + {resourcesQuery.data.alerts.budgetOver80Percent ? ( + Budget {'>'} 80% + ) : null} +
+ + ) : ( +

No budget data.

+ )} +
+
+ + + + + + Queue Snapshot + + Local and cloud fallback queue status. + + + {resourcesQuery.data ? ( +
+
+

Local Queue

+

+ {resourcesQuery.data.queues.local.name} +

+

+ waiting: {resourcesQuery.data.queues.local.waiting} +

+

active: {resourcesQuery.data.queues.local.active}

+

failed: {resourcesQuery.data.queues.local.failed}

+
+
+

Cloud Fallback Queue

+

+ {resourcesQuery.data.queues.cloudFallback.name} +

+

+ waiting: {resourcesQuery.data.queues.cloudFallback.waiting} +

+

+ active: {resourcesQuery.data.queues.cloudFallback.active} +

+

+ failed: {resourcesQuery.data.queues.cloudFallback.failed} +

+
+
+ ) : ( +

No queue data.

+ )} +
+
+
+ + + + + + Local Nodes + + + Heartbeat-based node status from Redis TTL keys. + {resourcesQuery.data?.alerts.localNodeOffline ? ( + Detected stale local nodes. + ) : null} + + + + {resourcesQuery.data?.nodes.length ? ( + + + + Node + Active Tasks + CPU (load1) + Memory + Process RSS + Heartbeat + + + + {resourcesQuery.data.nodes.map((node) => ( + + +
{node.nodeId}
+
{node.hostname}
+
+ {node.activeTasks} + {node.cpuLoad1.toFixed(2)} + + {formatBytes(node.memoryFree)} / {formatBytes(node.memoryTotal)} + + {formatBytes(node.processRss)} + {new Date(node.updatedAt).toLocaleString('zh-CN')} +
+ ))} +
+
+ ) : ( +

No live local nodes.

+ )} +
+
+ + + + Latest Tasks + Task status, executor and source URL. + + + {tasksQuery.data?.items.length ? ( + <> + + + + Status + Executor + Platform + Source URL + Created At + Error + + + + {tasksQuery.data.items.map((task) => ( + + + {task.status} + + {task.executor ?? '-'} + {task.platform} + + {task.sourceUrl} + + {new Date(task.createdAt).toLocaleString('zh-CN')} + + {task.error ?? '-'} + + + ))} + +
+
+ + + Page {tasksQuery.data.pagination.page} /{' '} + {tasksQuery.data.pagination.totalPages || 1} + + +
+ + ) : ( +

No tasks yet.

+ )} +
+
+ + {overviewQuery.error || resourcesQuery.error || configQuery.error || tasksQuery.error ? ( + + + {(overviewQuery.error as Error | null)?.message || + (resourcesQuery.error as Error | null)?.message || + (configQuery.error as Error | null)?.message || + (tasksQuery.error as Error | null)?.message || + 'Failed to load data.'} + + + ) : null} +
+ ); +} diff --git a/apps/anyhunt/console/CLAUDE.md b/apps/anyhunt/console/CLAUDE.md index d0fed2edc..6dabdd00e 100644 --- a/apps/anyhunt/console/CLAUDE.md +++ b/apps/anyhunt/console/CLAUDE.md @@ -8,6 +8,7 @@ Anyhunt Dev 用户控制台,用于管理 API Key、查看用量、测试抓取 ## 最近更新 +- 新增 Video Transcript Playground(`/fetchx/video-transcript`):支持提交 URL、轮询状态、取消任务、查看产物链接与转写预览 - Console 移除 assistant-ui 直连依赖与 adapter,滚动交互继续在 `@anyhunt/ui` 内复刻 - Console 统一将 ArrowLeft/ArrowRight 替换为 ChevronLeft/ChevronRight(无中轴) - Agent Browser Playground 下拉箭头改为 ChevronDown(无中轴) @@ -88,20 +89,21 @@ Anyhunt Dev 用户控制台,用于管理 API Key、查看用量、测试抓取 ## 功能列表 -| 功能 | 路径 | 说明 | -| --------------------------- | ------------------ | -------------------- | -| `api-keys/` | `/api-keys` | API Key 管理 | -| `scrape-playground/` | `/fetchx/scrape` | 单页抓取测试 | -| `crawl-playground/` | `/fetchx/crawl` | 多页爬取测试 | -| `map-playground/` | `/fetchx/map` | URL 发现测试 | -| `extract-playground/` | `/fetchx/extract` | AI 数据提取测试 | -| `search-playground/` | `/fetchx/search` | 网页搜索测试 | -| `embed-playground/` | `/fetchx/embed` | Embed 脚本测试 | -| `agent-browser-playground/` | `/agent-browser/*` | Agent + Browser 测试 | -| `memox/` | `/memox/*` | Memox 记忆管理 | -| `webhooks/` | `/webhooks` | Webhook 配置 | -| `settings/` | `/settings` | 账户设置 | -| `auth/` | `/login` | 登录表单 | +| 功能 | 路径 | 说明 | +| ------------------------------ | -------------------------- | ----------------------- | +| `api-keys/` | `/api-keys` | API Key 管理 | +| `scrape-playground/` | `/fetchx/scrape` | 单页抓取测试 | +| `crawl-playground/` | `/fetchx/crawl` | 多页爬取测试 | +| `map-playground/` | `/fetchx/map` | URL 发现测试 | +| `extract-playground/` | `/fetchx/extract` | AI 数据提取测试 | +| `search-playground/` | `/fetchx/search` | 网页搜索测试 | +| `embed-playground/` | `/fetchx/embed` | Embed 脚本测试 | +| `video-transcript-playground/` | `/fetchx/video-transcript` | 视频转写测试(Session) | +| `agent-browser-playground/` | `/agent-browser/*` | Agent + Browser 测试 | +| `memox/` | `/memox/*` | Memox 记忆管理 | +| `webhooks/` | `/webhooks` | Webhook 配置 | +| `settings/` | `/settings` | 账户设置 | +| `auth/` | `/login` | 登录表单 | ## 近期变更 diff --git a/apps/anyhunt/console/src/App.tsx b/apps/anyhunt/console/src/App.tsx index 0e4a729fe..adbbe3b0d 100644 --- a/apps/anyhunt/console/src/App.tsx +++ b/apps/anyhunt/console/src/App.tsx @@ -20,6 +20,7 @@ import MapPlaygroundPage from './pages/MapPlaygroundPage'; import ExtractPlaygroundPage from './pages/ExtractPlaygroundPage'; import SearchPlaygroundPage from './pages/SearchPlaygroundPage'; import EmbedPlaygroundPage from './pages/EmbedPlaygroundPage'; +import VideoTranscriptPage from './pages/VideoTranscriptPage'; import AgentBrowserLayoutPage from './pages/agent-browser/AgentBrowserLayoutPage'; import AgentBrowserOverviewPage from './pages/agent-browser/AgentBrowserOverviewPage'; import AgentBrowserBrowserPage from './pages/agent-browser/AgentBrowserBrowserPage'; @@ -106,6 +107,7 @@ function App() { } /> } /> } /> + } /> {/* Agent Browser - Agent + Browser 测试 */} diff --git a/apps/anyhunt/console/src/components/layout/app-sidebar.tsx b/apps/anyhunt/console/src/components/layout/app-sidebar.tsx index 7c371dc76..0f3cbd06e 100644 --- a/apps/anyhunt/console/src/components/layout/app-sidebar.tsx +++ b/apps/anyhunt/console/src/components/layout/app-sidebar.tsx @@ -46,6 +46,7 @@ const navGroups: NavGroup[] = [ { title: 'Extract', url: '/fetchx/extract' }, { title: 'Search', url: '/fetchx/search' }, { title: 'Embed', url: '/fetchx/embed' }, + { title: 'Video Transcript', url: '/fetchx/video-transcript' }, ], }, { diff --git a/apps/anyhunt/console/src/features/CLAUDE.md b/apps/anyhunt/console/src/features/CLAUDE.md index 9d138c187..4ef9a1ad9 100644 --- a/apps/anyhunt/console/src/features/CLAUDE.md +++ b/apps/anyhunt/console/src/features/CLAUDE.md @@ -19,21 +19,22 @@ feature-name/ ## 功能清单 -| 功能 | 说明 | API 入口 | -| --------------------------- | -------------------- | --------------------------------------------- | -| `api-keys/` | API Key 管理 | `/api/v1/app/api-keys` | -| `auth/` | 登录表单 | `/api/auth/*`(Better Auth) | -| `playground-shared/` | Playground 共享组件 | — | -| `scrape-playground/` | 单页抓取测试 | `/api/v1/scrape` | -| `crawl-playground/` | 多页爬取测试 | `/api/v1/crawl` | -| `map-playground/` | URL 发现测试 | `/api/v1/map` | -| `extract-playground/` | AI 数据提取测试 | `/api/v1/extract` | -| `search-playground/` | 网页搜索测试 | `/api/v1/search` | -| `embed-playground/` | Embed 测试 | Demo-only | -| `agent-browser-playground/` | Agent + Browser 测试 | `/api/v1/agent` + `/api/v1/browser/session/*` | -| `memox/` | Memox 记忆管理 | `/api/v1/memories`(API Key) | -| `settings/` | 账户设置 | `/api/v1/app/*` | -| `webhooks/` | Webhook 管理 | `/api/v1/webhooks` | +| 功能 | 说明 | API 入口 | +| ------------------------------ | -------------------- | --------------------------------------------- | +| `api-keys/` | API Key 管理 | `/api/v1/app/api-keys` | +| `auth/` | 登录表单 | `/api/auth/*`(Better Auth) | +| `playground-shared/` | Playground 共享组件 | — | +| `scrape-playground/` | 单页抓取测试 | `/api/v1/scrape` | +| `crawl-playground/` | 多页爬取测试 | `/api/v1/crawl` | +| `map-playground/` | URL 发现测试 | `/api/v1/map` | +| `extract-playground/` | AI 数据提取测试 | `/api/v1/extract` | +| `search-playground/` | 网页搜索测试 | `/api/v1/search` | +| `embed-playground/` | Embed 测试 | Demo-only | +| `video-transcript-playground/` | 视频转写测试 | `/api/v1/app/video-transcripts` | +| `agent-browser-playground/` | Agent + Browser 测试 | `/api/v1/agent` + `/api/v1/browser/session/*` | +| `memox/` | Memox 记忆管理 | `/api/v1/memories`(API Key) | +| `settings/` | 账户设置 | `/api/v1/app/*` | +| `webhooks/` | Webhook 管理 | `/api/v1/webhooks` | ## 常用模式 @@ -60,6 +61,7 @@ export function useApiKeys() { ## 近期变更 +- 新增 `video-transcript-playground/`:支持 Session 模式视频转写任务创建/轮询/取消 - Agent Browser Playground:MessageRow parts 解析复用 `@anyhunt/ui/ai/message`(split/clean),避免多端重复实现导致语义漂移 - Playground Shared/Memox 分页箭头统一改为 ChevronRight/ChevronLeft(无中轴) - Console Features 图标回退 Lucide,移除 Hugeicons 依赖并统一调用方式 diff --git a/apps/anyhunt/console/src/features/video-transcript-playground/api.ts b/apps/anyhunt/console/src/features/video-transcript-playground/api.ts new file mode 100644 index 000000000..29ead53ee --- /dev/null +++ b/apps/anyhunt/console/src/features/video-transcript-playground/api.ts @@ -0,0 +1,43 @@ +/** + * [PROVIDES]: Video Transcript playground API 请求方法 + * [DEPENDS]: apiClient, CONSOLE_API + * [POS]: Console Video Transcript API 封装 + * + * [PROTOCOL]: 本文件变更时,需同步更新所属目录 CLAUDE.md + */ + +import { apiClient } from '@/lib/api-client'; +import { CONSOLE_API } from '@/lib/api-paths'; +import type { + CreateVideoTranscriptTaskInput, + CreateVideoTranscriptTaskResponse, + ListVideoTranscriptTasksResponse, + VideoTranscriptTask, +} from './types'; + +export async function createVideoTranscriptTask( + input: CreateVideoTranscriptTaskInput +): Promise { + return apiClient.post(CONSOLE_API.VIDEO_TRANSCRIPTS, input); +} + +export async function getVideoTranscriptTask(taskId: string): Promise { + return apiClient.get(`${CONSOLE_API.VIDEO_TRANSCRIPTS}/${taskId}`); +} + +export async function listVideoTranscriptTasks( + page = 1, + limit = 20 +): Promise { + const query = new URLSearchParams({ + page: String(page), + limit: String(limit), + }).toString(); + return apiClient.get( + `${CONSOLE_API.VIDEO_TRANSCRIPTS}?${query}` + ); +} + +export async function cancelVideoTranscriptTask(taskId: string): Promise<{ ok: true }> { + return apiClient.post<{ ok: true }>(`${CONSOLE_API.VIDEO_TRANSCRIPTS}/${taskId}/cancel`); +} diff --git a/apps/anyhunt/console/src/features/video-transcript-playground/hooks.ts b/apps/anyhunt/console/src/features/video-transcript-playground/hooks.ts new file mode 100644 index 000000000..31e8509c5 --- /dev/null +++ b/apps/anyhunt/console/src/features/video-transcript-playground/hooks.ts @@ -0,0 +1,60 @@ +/** + * [PROVIDES]: Video Transcript React Query hooks + * [DEPENDS]: react-query, ./api + * [POS]: Console Video Transcript 数据访问 hooks + * + * [PROTOCOL]: 本文件变更时,需同步更新所属目录 CLAUDE.md + */ + +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'; +import { + cancelVideoTranscriptTask, + createVideoTranscriptTask, + getVideoTranscriptTask, + listVideoTranscriptTasks, +} from './api'; + +export const videoTranscriptKeys = { + all: ['app', 'video-transcripts'] as const, + list: (page: number, limit: number) => [...videoTranscriptKeys.all, 'list', page, limit] as const, + detail: (taskId: string) => [...videoTranscriptKeys.all, 'detail', taskId] as const, +}; + +export function useVideoTranscriptTasks(page = 1, limit = 20) { + return useQuery({ + queryKey: videoTranscriptKeys.list(page, limit), + queryFn: () => listVideoTranscriptTasks(page, limit), + refetchInterval: 5000, + }); +} + +export function useVideoTranscriptTask(taskId: string | null) { + return useQuery({ + queryKey: videoTranscriptKeys.detail(taskId ?? ''), + queryFn: () => getVideoTranscriptTask(taskId ?? ''), + enabled: Boolean(taskId), + refetchInterval: 3000, + }); +} + +export function useCreateVideoTranscriptTask() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: createVideoTranscriptTask, + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: videoTranscriptKeys.all }); + }, + }); +} + +export function useCancelVideoTranscriptTask() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: cancelVideoTranscriptTask, + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: videoTranscriptKeys.all }); + }, + }); +} diff --git a/apps/anyhunt/console/src/features/video-transcript-playground/index.ts b/apps/anyhunt/console/src/features/video-transcript-playground/index.ts new file mode 100644 index 000000000..a09ed2d08 --- /dev/null +++ b/apps/anyhunt/console/src/features/video-transcript-playground/index.ts @@ -0,0 +1,4 @@ +export * from './types'; +export * from './schemas'; +export * from './api'; +export * from './hooks'; diff --git a/apps/anyhunt/console/src/features/video-transcript-playground/schemas.ts b/apps/anyhunt/console/src/features/video-transcript-playground/schemas.ts new file mode 100644 index 000000000..37dd684eb --- /dev/null +++ b/apps/anyhunt/console/src/features/video-transcript-playground/schemas.ts @@ -0,0 +1,15 @@ +/** + * [DEFINES]: Video Transcript playground 表单 Schema + * [USED_BY]: VideoTranscriptPage + * [POS]: Console Video Transcript 表单校验定义 + * + * [PROTOCOL]: 本文件变更时,需同步更新所属目录 CLAUDE.md + */ + +import { z } from 'zod/v3'; + +export const videoTranscriptFormSchema = z.object({ + url: z.string().url('Please enter a valid URL').max(2048), +}); + +export type VideoTranscriptFormValues = z.infer; diff --git a/apps/anyhunt/console/src/features/video-transcript-playground/types.ts b/apps/anyhunt/console/src/features/video-transcript-playground/types.ts new file mode 100644 index 000000000..6b1dbe956 --- /dev/null +++ b/apps/anyhunt/console/src/features/video-transcript-playground/types.ts @@ -0,0 +1,49 @@ +/** + * [DEFINES]: Video Transcript playground 类型 + * [USED_BY]: video-transcript-playground api/hooks/page + * [POS]: Console Video Transcript 类型入口 + * + * [PROTOCOL]: 本文件变更时,需同步更新所属目录 CLAUDE.md + */ + +import type { PaginatedResponse } from '@/lib/types'; + +export type VideoTranscriptTaskStatus = + | 'PENDING' + | 'DOWNLOADING' + | 'EXTRACTING_AUDIO' + | 'TRANSCRIBING' + | 'UPLOADING' + | 'COMPLETED' + | 'FAILED' + | 'CANCELLED'; + +export type VideoTranscriptExecutor = 'LOCAL' | 'CLOUD_FALLBACK' | null; + +export interface VideoTranscriptTask { + id: string; + userId: string; + platform: string; + sourceUrl: string; + status: VideoTranscriptTaskStatus; + executor: VideoTranscriptExecutor; + localStartedAt: string | null; + artifacts: unknown; + result: unknown; + error: string | null; + startedAt: string | null; + completedAt: string | null; + createdAt: string; + updatedAt: string; +} + +export interface CreateVideoTranscriptTaskInput { + url: string; +} + +export interface CreateVideoTranscriptTaskResponse { + taskId: string; + status: VideoTranscriptTaskStatus; +} + +export type ListVideoTranscriptTasksResponse = PaginatedResponse; diff --git a/apps/anyhunt/console/src/lib/api-paths.ts b/apps/anyhunt/console/src/lib/api-paths.ts index 344ad58df..3c30d12db 100644 --- a/apps/anyhunt/console/src/lib/api-paths.ts +++ b/apps/anyhunt/console/src/lib/api-paths.ts @@ -20,6 +20,7 @@ export const PAYMENT_API = { // App 管理 API(Session 认证) export const CONSOLE_API = { API_KEYS: '/api/v1/app/api-keys', + VIDEO_TRANSCRIPTS: '/api/v1/app/video-transcripts', } as const; // Webhook API(API Key 认证) diff --git a/apps/anyhunt/console/src/pages/VideoTranscriptPage.test.tsx b/apps/anyhunt/console/src/pages/VideoTranscriptPage.test.tsx new file mode 100644 index 000000000..586510655 --- /dev/null +++ b/apps/anyhunt/console/src/pages/VideoTranscriptPage.test.tsx @@ -0,0 +1,29 @@ +import { render, screen } from '@testing-library/react'; +import { describe, expect, it, vi } from 'vitest'; +import { z } from 'zod/v3'; +import VideoTranscriptPage from './VideoTranscriptPage'; + +vi.mock('@/features/video-transcript-playground', () => ({ + videoTranscriptFormSchema: z.object({ + url: z.string().url(), + }), + useVideoTranscriptTasks: () => ({ + data: { + items: [], + pagination: { page: 1, limit: 20, total: 0, totalPages: 0 }, + }, + isLoading: false, + refetch: vi.fn(), + }), + useVideoTranscriptTask: () => ({ data: null, isLoading: false }), + useCreateVideoTranscriptTask: () => ({ mutateAsync: vi.fn(), isPending: false }), + useCancelVideoTranscriptTask: () => ({ mutateAsync: vi.fn(), isPending: false }), +})); + +describe('VideoTranscriptPage', () => { + it('renders without crashing', () => { + expect(() => render()).not.toThrow(); + expect(screen.getByText('Video Transcript')).toBeInTheDocument(); + expect(screen.getByText('Create Task')).toBeInTheDocument(); + }); +}); diff --git a/apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx b/apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx new file mode 100644 index 000000000..eb309818a --- /dev/null +++ b/apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx @@ -0,0 +1,368 @@ +/** + * [PROPS]: none + * [EMITS]: create/cancel transcript task actions + * [POS]: Console 视频转写测试页(Session API) + * + * [PROTOCOL]: 本文件变更时,需同步更新 apps/anyhunt/console/CLAUDE.md + */ + +import { useMemo, useState } from 'react'; +import { useForm } from 'react-hook-form'; +import { zodResolver } from '@hookform/resolvers/zod'; +import { Loader, RefreshCw, SquareX } from 'lucide-react'; +import { toast } from 'sonner'; +import { + Badge, + Button, + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, + Form, + FormControl, + FormField, + FormItem, + FormLabel, + FormMessage, + Input, + PageHeader, + Skeleton, +} from '@anyhunt/ui'; +import { + videoTranscriptFormSchema, + useCancelVideoTranscriptTask, + useCreateVideoTranscriptTask, + useVideoTranscriptTask, + useVideoTranscriptTasks, + type VideoTranscriptFormValues, + type VideoTranscriptTask, +} from '@/features/video-transcript-playground'; + +const PAGE_SIZE = 20; +const TERMINAL_STATUSES = new Set(['COMPLETED', 'FAILED', 'CANCELLED']); + +function getStatusBadgeVariant(status: VideoTranscriptTask['status']) { + if (status === 'COMPLETED') { + return 'default'; + } + if (status === 'FAILED' || status === 'CANCELLED') { + return 'destructive'; + } + return 'secondary'; +} + +function parseRecord(value: unknown): Record { + if (typeof value !== 'object' || value === null || Array.isArray(value)) { + return {}; + } + return value as Record; +} + +function buildArtifactLinks(artifacts: unknown) { + const record = parseRecord(artifacts); + const candidates: Array<{ label: string; key: string }> = [ + { label: 'Video', key: 'videoUrl' }, + { label: 'Audio', key: 'audioUrl' }, + { label: 'Transcript TXT', key: 'textUrl' }, + { label: 'Transcript SRT', key: 'srtUrl' }, + { label: 'Transcript JSON', key: 'jsonUrl' }, + ]; + + return candidates + .map((candidate) => { + const value = record[candidate.key]; + if (typeof value !== 'string' || !value.trim()) { + return null; + } + return { + label: candidate.label, + url: value, + }; + }) + .filter((item): item is { label: string; url: string } => item !== null); +} + +function extractTranscriptText(result: unknown): string { + const record = parseRecord(result); + const text = record.text; + if (typeof text === 'string') { + return text; + } + return ''; +} + +export default function VideoTranscriptPage() { + const [page, setPage] = useState(1); + const [selectedTaskId, setSelectedTaskId] = useState(null); + + const form = useForm({ + resolver: zodResolver(videoTranscriptFormSchema), + defaultValues: { + url: '', + }, + }); + + const { + data: taskList, + isLoading: isTaskListLoading, + refetch: refetchTaskList, + } = useVideoTranscriptTasks(page, PAGE_SIZE); + + const createTaskMutation = useCreateVideoTranscriptTask(); + const cancelTaskMutation = useCancelVideoTranscriptTask(); + + const effectiveSelectedTaskId = useMemo(() => { + if (selectedTaskId) { + return selectedTaskId; + } + return taskList?.items[0]?.id ?? null; + }, [selectedTaskId, taskList?.items]); + + const { data: taskDetail, isLoading: isTaskDetailLoading } = + useVideoTranscriptTask(effectiveSelectedTaskId); + + const handleSubmit = form.handleSubmit(async (values) => { + try { + const created = await createTaskMutation.mutateAsync({ url: values.url.trim() }); + toast.success('Task created'); + setSelectedTaskId(created.taskId); + form.reset({ url: values.url.trim() }); + } catch (error) { + toast.error(error instanceof Error ? error.message : 'Failed to create task'); + } + }); + + const handleCancel = async (taskId: string) => { + try { + await cancelTaskMutation.mutateAsync(taskId); + toast.success('Task cancelled'); + if (effectiveSelectedTaskId === taskId) { + await refetchTaskList(); + } + } catch (error) { + toast.error(error instanceof Error ? error.message : 'Failed to cancel task'); + } + }; + + const artifactLinks = buildArtifactLinks(taskDetail?.artifacts); + const transcriptText = extractTranscriptText(taskDetail?.result); + + return ( +
+ + + + + Create Task + + Supported platforms: Douyin, Bilibili, Xiaohongshu, YouTube. + + + +
+ + ( + + Video URL + + + + + + )} + /> + + + +
+
+ +
+ + +
+
+ Tasks + Latest submitted tasks (auto refresh every 5s). +
+ +
+
+ + {isTaskListLoading ? ( +
+ {[1, 2, 3].map((index) => ( + + ))} +
+ ) : taskList?.items.length ? ( + <> + {taskList.items.map((task) => { + const isSelected = task.id === effectiveSelectedTaskId; + const canCancel = !TERMINAL_STATUSES.has(task.status); + + return ( + + ) : null} +
+ + ); + })} +
+ + + Page {taskList.pagination.page} / {taskList.pagination.totalPages || 1} + + +
+ + ) : ( +

No tasks yet.

+ )} + + + + + + Task Detail + Status, executor, artifacts and transcript preview. + + + {isTaskDetailLoading ? ( +
+ {[1, 2, 3].map((index) => ( + + ))} +
+ ) : taskDetail ? ( + <> +
+
+

Status

+

{taskDetail.status}

+
+
+

Executor

+

{taskDetail.executor ?? '-'}

+
+
+

Platform

+

{taskDetail.platform}

+
+
+

Created At

+

+ {new Date(taskDetail.createdAt).toLocaleString('zh-CN')} +

+
+
+ + {taskDetail.error ? ( +
+ {taskDetail.error} +
+ ) : null} + +
+

Artifacts

+ {artifactLinks.length ? ( +
+ {artifactLinks.map((item) => ( + + {item.label} + + ))} +
+ ) : ( +

No artifact links yet.

+ )} +
+ +
+

Transcript Preview

+
+ {transcriptText || 'No transcript text yet.'} +
+
+ + ) : ( +

Select a task to view details.

+ )} +
+
+
+ + ); +} diff --git a/apps/anyhunt/server/CLAUDE.md b/apps/anyhunt/server/CLAUDE.md index f39513663..81ea8167b 100644 --- a/apps/anyhunt/server/CLAUDE.md +++ b/apps/anyhunt/server/CLAUDE.md @@ -8,6 +8,11 @@ Backend API + Web Data Engine built with NestJS. Core service for web scraping, ## 最近更新 +- Video Transcript:新增 Mac mini local-worker 一键部署脚本(`scripts/video-transcript/setup-local-worker.sh`),统一依赖安装、环境文件写入、`launchd` 注册与启动流程 +- Video Transcript:四轮可靠性修复(cloud 接管后 workspace 初始化失败纳入失败兜底;local 启动顺序调整为先写 `localStartedAt` 再调度 fallback;`duration probe` 解析增强;补充接管后 workspace 失败回归测试) +- Video Transcript:三轮可靠性修复(timeout pre-check 失败不误写任务失败、local fallback-check 调度失败降级、scanner 单角色启用、cloud duration probe 提前 preempt) +- Video Transcript:二轮可靠性修复(fallback 补偿扫描 30s、localStartedAt/timeout 判定改用 DB 时间、Admin runtime switch + 审计 + today 指标补齐、移除队列全局 5 分钟 timeout) +- Video Transcript:新增双模式转写模块(LOCAL + CLOUD_FALLBACK)、任务模型与 `/api/v1/app/video-transcripts` + `/api/v1/admin/video-transcripts` 接口,并接入 Admin 队列监控 - API Key:更新接口补齐 no-store,避免明文 key 被缓存 - LLM:ModelProviderFactory 单测在 isolate=false 下 resetModules 确保 mock 生效 - Agent:请求支持多轮消息(messages),计费估算基于 message 总量 @@ -114,41 +119,42 @@ pnpm --filter @anyhunt/anyhunt-server prisma:studio:vector ## Module Structure -| Module | Files | Description | CLAUDE.md | -| ---------------- | ----- | -------------------------------------------- | ------------------------- | -| `scraper/` | 24 | Core scraping engine | `src/scraper/CLAUDE.md` | -| `common/` | 22 | Shared guards, decorators, pipes, validators | `src/common/CLAUDE.md` | -| `llm/` | - | Admin LLM Providers/Models + runtime routing | `src/llm/CLAUDE.md` | -| `agent/` | - | L3 Agent API + Browser Tools | `src/agent/CLAUDE.md` | -| `digest/` | - | Intelligent Digest (subscriptions/inbox) | `src/digest/CLAUDE.md` | -| `admin/` | 16 | Admin dashboard APIs | `src/admin/CLAUDE.md` | -| `oembed/` | 18 | oEmbed provider support | `src/oembed/CLAUDE.md` | -| `billing/` | 5 | Billing rules + deduct/refund | - | -| `quota/` | 14 | Quota management | `src/quota/CLAUDE.md` | -| `api-key/` | 13 | API key management | `src/api-key/CLAUDE.md` | -| `memory/` | 10 | Semantic memory API (Memox) | `src/memory/CLAUDE.md` | -| `entity/` | 10 | Mem0 entities (user/agent/app/run) | `src/entity/CLAUDE.md` | -| `embedding/` | 4 | Embeddings generation (Memox) | `src/embedding/CLAUDE.md` | -| `crawler/` | 11 | Multi-page crawling | `src/crawler/CLAUDE.md` | -| `auth/` | 10 | Authentication (Better Auth) | `src/auth/CLAUDE.md` | -| `payment/` | 10 | Payment processing (Creem) | - | -| `webhook/` | 10 | Webhook notifications | `src/webhook/CLAUDE.md` | -| `extract/` | 9 | AI-powered data extraction | - | -| `batch-scrape/` | 9 | Bulk URL processing | - | -| `user/` | 9 | User management | - | -| `map/` | 8 | URL discovery | - | -| `storage/` | 7 | Cloudflare R2 storage | - | -| `search/` | 6 | Web search API | - | -| `browser/` | 6 | Browser pool management | `src/browser/CLAUDE.md` | -| `demo/` | 5 | Playground demo API | - | -| `redis/` | 4 | Redis caching | - | -| `health/` | 3 | Health check endpoints | - | -| `email/` | 3 | Email service | - | -| `queue/` | 3 | BullMQ queue config | - | -| `prisma/` | 3 | 主库连接(PrismaService) | - | -| `vector-prisma/` | 3 | 向量库连接(VectorPrismaService) | - | -| `config/` | 2 | Pricing configuration | - | -| `types/` | 6 | Shared type definitions | - | +| Module | Files | Description | CLAUDE.md | +| ------------------- | ----- | -------------------------------------------- | -------------------------------- | +| `scraper/` | 24 | Core scraping engine | `src/scraper/CLAUDE.md` | +| `common/` | 22 | Shared guards, decorators, pipes, validators | `src/common/CLAUDE.md` | +| `llm/` | - | Admin LLM Providers/Models + runtime routing | `src/llm/CLAUDE.md` | +| `agent/` | - | L3 Agent API + Browser Tools | `src/agent/CLAUDE.md` | +| `digest/` | - | Intelligent Digest (subscriptions/inbox) | `src/digest/CLAUDE.md` | +| `video-transcript/` | - | Video transcript pipeline (local + fallback) | `src/video-transcript/CLAUDE.md` | +| `admin/` | 16 | Admin dashboard APIs | `src/admin/CLAUDE.md` | +| `oembed/` | 18 | oEmbed provider support | `src/oembed/CLAUDE.md` | +| `billing/` | 5 | Billing rules + deduct/refund | - | +| `quota/` | 14 | Quota management | `src/quota/CLAUDE.md` | +| `api-key/` | 13 | API key management | `src/api-key/CLAUDE.md` | +| `memory/` | 10 | Semantic memory API (Memox) | `src/memory/CLAUDE.md` | +| `entity/` | 10 | Mem0 entities (user/agent/app/run) | `src/entity/CLAUDE.md` | +| `embedding/` | 4 | Embeddings generation (Memox) | `src/embedding/CLAUDE.md` | +| `crawler/` | 11 | Multi-page crawling | `src/crawler/CLAUDE.md` | +| `auth/` | 10 | Authentication (Better Auth) | `src/auth/CLAUDE.md` | +| `payment/` | 10 | Payment processing (Creem) | - | +| `webhook/` | 10 | Webhook notifications | `src/webhook/CLAUDE.md` | +| `extract/` | 9 | AI-powered data extraction | - | +| `batch-scrape/` | 9 | Bulk URL processing | - | +| `user/` | 9 | User management | - | +| `map/` | 8 | URL discovery | - | +| `storage/` | 7 | Cloudflare R2 storage | - | +| `search/` | 6 | Web search API | - | +| `browser/` | 6 | Browser pool management | `src/browser/CLAUDE.md` | +| `demo/` | 5 | Playground demo API | - | +| `redis/` | 4 | Redis caching | - | +| `health/` | 3 | Health check endpoints | - | +| `email/` | 3 | Email service | - | +| `queue/` | 3 | BullMQ queue config | - | +| `prisma/` | 3 | 主库连接(PrismaService) | - | +| `vector-prisma/` | 3 | 向量库连接(VectorPrismaService) | - | +| `config/` | 2 | Pricing configuration | - | +| `types/` | 6 | Shared type definitions | - | ## Common Patterns diff --git a/apps/anyhunt/server/prisma/main/CLAUDE.md b/apps/anyhunt/server/prisma/main/CLAUDE.md index 11d258c0f..f52810b3b 100644 --- a/apps/anyhunt/server/prisma/main/CLAUDE.md +++ b/apps/anyhunt/server/prisma/main/CLAUDE.md @@ -33,6 +33,7 @@ ## 近期变更记录 +- 2026-02-09:新增 `VideoTranscriptTask` + `VideoTranscriptExecutor`(本地主执行 + 云端兜底)迁移:`20260209001000_add_video_transcript_task`。 - 2026-01-25:重置数据库并生成 init 迁移作为新基线。 - 2026-01-25:新增 PaymentWebhookEvent 表,用于 Creem webhook 幂等去重。 - 2026-01-26:迁移脚本统一使用 `prisma.*.config.ts`,测试使用 migrate deploy 校验迁移。 diff --git a/apps/anyhunt/server/prisma/main/migrations/20260209001000_add_video_transcript_task/migration.sql b/apps/anyhunt/server/prisma/main/migrations/20260209001000_add_video_transcript_task/migration.sql new file mode 100644 index 000000000..f35174b90 --- /dev/null +++ b/apps/anyhunt/server/prisma/main/migrations/20260209001000_add_video_transcript_task/migration.sql @@ -0,0 +1,53 @@ +-- CreateEnum +CREATE TYPE "VideoTranscriptTaskStatus" AS ENUM ( + 'PENDING', + 'DOWNLOADING', + 'EXTRACTING_AUDIO', + 'TRANSCRIBING', + 'UPLOADING', + 'COMPLETED', + 'FAILED', + 'CANCELLED' +); + +-- CreateEnum +CREATE TYPE "VideoTranscriptExecutor" AS ENUM ('LOCAL', 'CLOUD_FALLBACK'); + +-- CreateTable +CREATE TABLE "VideoTranscriptTask" ( + "id" TEXT NOT NULL, + "userId" TEXT NOT NULL, + "platform" TEXT NOT NULL, + "sourceUrl" TEXT NOT NULL, + "status" "VideoTranscriptTaskStatus" NOT NULL DEFAULT 'PENDING', + "executor" "VideoTranscriptExecutor", + "localStartedAt" TIMESTAMP(3), + "artifacts" JSONB, + "result" JSONB, + "error" TEXT, + "startedAt" TIMESTAMP(3), + "completedAt" TIMESTAMP(3), + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "VideoTranscriptTask_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "VideoTranscriptTask_userId_createdAt_idx" ON "VideoTranscriptTask"("userId", "createdAt"); + +-- CreateIndex +CREATE INDEX "VideoTranscriptTask_status_createdAt_idx" ON "VideoTranscriptTask"("status", "createdAt"); + +-- CreateIndex +CREATE INDEX "VideoTranscriptTask_executor_createdAt_idx" ON "VideoTranscriptTask"("executor", "createdAt"); + +-- CreateIndex +CREATE INDEX "VideoTranscriptTask_localStartedAt_idx" ON "VideoTranscriptTask"("localStartedAt"); + +-- CreateIndex +CREATE INDEX "VideoTranscriptTask_sourceUrl_idx" ON "VideoTranscriptTask"("sourceUrl"); + +-- AddForeignKey +ALTER TABLE "VideoTranscriptTask" ADD CONSTRAINT "VideoTranscriptTask_userId_fkey" +FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/apps/anyhunt/server/prisma/main/schema.prisma b/apps/anyhunt/server/prisma/main/schema.prisma index 2888896f4..a98976b3e 100644 --- a/apps/anyhunt/server/prisma/main/schema.prisma +++ b/apps/anyhunt/server/prisma/main/schema.prisma @@ -36,6 +36,7 @@ model User { webhooks Webhook[] accountDeletionRecord AccountDeletionRecord? scrapeJobs ScrapeJob[] + videoTranscriptTasks VideoTranscriptTask[] batchScrapeJobs BatchScrapeJob[] crawlJobs CrawlJob[] agentTasks AgentTask[] @@ -482,6 +483,22 @@ enum ScrapeStatus { FAILED } +enum VideoTranscriptTaskStatus { + PENDING + DOWNLOADING + EXTRACTING_AUDIO + TRANSCRIBING + UPLOADING + COMPLETED + FAILED + CANCELLED +} + +enum VideoTranscriptExecutor { + LOCAL + CLOUD_FALLBACK +} + enum AgentTaskStatus { PENDING PROCESSING @@ -574,6 +591,31 @@ model ScrapeJob { @@index([createdAt]) } +model VideoTranscriptTask { + id String @id @default(cuid()) + userId String + platform String + sourceUrl String + status VideoTranscriptTaskStatus @default(PENDING) + executor VideoTranscriptExecutor? + localStartedAt DateTime? + artifacts Json? + result Json? + error String? + startedAt DateTime? + completedAt DateTime? + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + + @@index([userId, createdAt]) + @@index([status, createdAt]) + @@index([executor, createdAt]) + @@index([localStartedAt]) + @@index([sourceUrl]) +} + // ============================================= // Batch Scrape - 批量抓取 // ============================================= diff --git a/apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh b/apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh new file mode 100755 index 000000000..97a497d2b --- /dev/null +++ b/apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh @@ -0,0 +1,380 @@ +#!/usr/bin/env bash +# +# [INPUT]: Mac mini 本机仓库路径 + Whisper 模型路径 + 可选节点标识 +# [OUTPUT]: .env.local-worker + launchd plist + 常驻 local-worker 服务 +# [POS]: Video Transcript local-worker 一键部署脚本(Mac mini) +# +# [PROTOCOL]: 本文件变更时,必须同步更新 apps/anyhunt/server/CLAUDE.md + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT_DEFAULT="$(cd "${SCRIPT_DIR}/../../../../.." && pwd)" + +REPO_ROOT="${REPO_ROOT_DEFAULT}" +SERVER_DIR="" +ENV_FILE="" +MODEL_PATH="" +NODE_ID="$(hostname)" +SERVICE_LABEL="com.anyhunt.video-transcript-local-worker" +LOG_DIR="${HOME}/Library/Logs/anyhunt" +SKIP_DEP_INSTALL="false" +SKIP_BUILD="false" +NO_START="false" + +print_usage() { + cat <<'EOF' +用途: + 在 Mac mini 上为 Video Transcript local-worker 生成并启动 launchd 常驻服务。 + +用法: + setup-local-worker.sh --model-path /ABSOLUTE/PATH/TO/model.bin [options] + +参数: + --model-path Whisper 模型绝对路径(必填,除非 .env.local-worker 已有) + --repo-root 仓库根目录(默认:脚本自动推断) + --env-file local worker 环境文件路径(默认:apps/anyhunt/server/.env.local-worker) + --node-id 本地节点标识(默认:hostname) + --service-label
@@ -107,15 +110,15 @@ function QueueCard({

{stats.waiting}

-

等待

+

Waiting

{stats.active}

-

处理中

+

Active

{stats.failed}

-

失败

+

Failed

@@ -140,7 +143,7 @@ function QueueJobList({ queueName, status }: { queueName: QueueName; status: Que if (!data?.items.length) { return (
-

暂无任务

+

No jobs yet.

); } @@ -150,10 +153,10 @@ function QueueJobList({ queueName, status }: { queueName: QueueName; status: Que ID - 任务名 - 尝试次数 - 时间 - {status === 'failed' && 错误原因} + Name + Attempts + Time + {status === 'failed' && Failed Reason} @@ -163,7 +166,7 @@ function QueueJobList({ queueName, status }: { queueName: QueueName; status: Que {job.name} {job.attemptsMade} - {new Date(job.timestamp).toLocaleTimeString('zh-CN')} + {formatRelativeTime(job.timestamp)} {status === 'failed' && ( @@ -215,7 +218,7 @@ export default function QueuesPage() { return (
- +
@@ -238,19 +241,19 @@ export default function QueuesPage() {
{data.summary.totalWaiting}
-

总等待任务

+

Total waiting jobs

{data.summary.totalActive}
-

正在处理

+

Active jobs

{data.summary.totalFailed}
-

失败任务

+

Failed jobs

@@ -281,10 +284,10 @@ export default function QueuesPage() {
- {QUEUE_LABELS[selectedQueue]} 队列 + {QUEUE_LABELS[selectedQueue] || selectedQueue} Queue {isPaused && ( - 已暂停 + Paused )} @@ -297,12 +300,12 @@ export default function QueuesPage() { {isPaused ? ( <> - 恢复 + Resume ) : ( <> - 暂停 + Pause )} @@ -313,7 +316,7 @@ export default function QueuesPage() { disabled={isRetrying || (selectedStats?.failed ?? 0) === 0} > - 重试全部失败 + Retry all failed
@@ -361,21 +364,21 @@ export default function QueuesPage() { > - 确认操作 + Confirm action {confirmDialog.action === 'retry' && - `确定要重试 ${QUEUE_LABELS[selectedQueue]} 队列中所有失败的任务吗?`} + `Retry all failed jobs in "${QUEUE_LABELS[selectedQueue] || selectedQueue}"?`} {confirmDialog.action === 'clean-completed' && - `确定要清理 ${QUEUE_LABELS[selectedQueue]} 队列中所有已完成的任务吗?`} + `Clean all completed jobs in "${QUEUE_LABELS[selectedQueue] || selectedQueue}"?`} {confirmDialog.action === 'clean-failed' && - `确定要清理 ${QUEUE_LABELS[selectedQueue]} 队列中所有失败的任务吗?`} + `Clean all failed jobs in "${QUEUE_LABELS[selectedQueue] || selectedQueue}"?`} {confirmDialog.action === 'cleanup-stale' && - '确定要清理所有卡住超过 30 分钟的任务吗?这些任务将被标记为失败。'} + 'Cleanup all jobs stuck for more than 30 minutes? Those jobs will be marked as failed.'} - 取消 - 确认 + Cancel + Confirm diff --git a/apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx b/apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx index bf92420b8..fbe1375bd 100644 --- a/apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx +++ b/apps/anyhunt/admin/www/src/pages/VideoTranscriptsPage.tsx @@ -28,6 +28,7 @@ import { TableHeader, TableRow, } from '@anyhunt/ui'; +import { formatRelativeTime } from '@anyhunt/ui/lib'; import { useVideoTranscriptOverview, useVideoTranscriptRuntimeConfig, @@ -271,7 +272,7 @@ export default function VideoTranscriptsPage() { {configQuery.data.audits.map((audit) => ( - {new Date(audit.createdAt).toLocaleString('zh-CN')} + {formatRelativeTime(audit.createdAt)} {audit.actorUserId} {audit.reason} @@ -414,7 +415,7 @@ export default function VideoTranscriptsPage() { {formatBytes(node.memoryFree)} / {formatBytes(node.memoryTotal)}
{formatBytes(node.processRss)} - {new Date(node.updatedAt).toLocaleString('zh-CN')} + {formatRelativeTime(node.updatedAt)} ))}
@@ -455,7 +456,7 @@ export default function VideoTranscriptsPage() { {task.sourceUrl} - {new Date(task.createdAt).toLocaleString('zh-CN')} + {formatRelativeTime(task.createdAt)} {task.sourceUrl}

- {new Date(task.createdAt).toLocaleString('zh-CN')} + {formatRelativeTime(task.createdAt)} {canCancel ? (
From 5b7dd5045ba8d082d9d92aa899103c85aef49281 Mon Sep 17 00:00:00 2001 From: dvlin Date: Tue, 10 Feb 2026 12:59:27 +0800 Subject: [PATCH 09/16] docs(video-transcript): update deployment docs --- CLAUDE.md | 1 + docs/CLAUDE.md | 2 ++ docs/architecture/CLAUDE.md | 1 + .../anyhunt-video-transcript-pipeline.md | 16 +++++++++++++++- docs/index.md | 4 ++-- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 986b1227d..c41369dae 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,7 @@ # Anyhunt 统一平台 > 本文档是 AI Agent 的核心指南。遵循 [agents.md 规范](https://agents.md/)。 +> 最近更新:2026-02-10(Anyhunt 视频转写:补充 worker 独立启动入口与 Docker 角色开关(`ANYHUNT_RUN_MODE`/`ANYHUNT_RUN_MIGRATIONS`),并同步 terminal/executor guard 防止终态覆盖) > 最近更新:2026-02-09(Anyhunt 视频转写补充本地一键部署脚本:`apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`) > 最近更新:2026-02-09(Anyhunt 视频转写补充三节点部署定案(公网简化版):VPS1 API + VPS2 cloud fallback worker + Mac mini local worker) > 最近更新:2026-02-09(Anyhunt 视频转写四轮可靠性修复:cloud 接管后 workspace 失败兜底、local 启动顺序严格对齐 10 分钟窗口起点、duration probe 解析增强、补充对应回归测试) diff --git a/docs/CLAUDE.md b/docs/CLAUDE.md index b46a01d4d..4f97a80b3 100644 --- a/docs/CLAUDE.md +++ b/docs/CLAUDE.md @@ -8,6 +8,7 @@ # docs/ 目录指南 +> 最近更新:2026-02-10(Anyhunt 视频转写补充 worker 独立启动入口 + Docker 角色开关:`ANYHUNT_RUN_MODE`/`ANYHUNT_RUN_MIGRATIONS`,并同步 terminal/executor guard) > 最近更新:2026-02-09(Anyhunt 视频转写方案补充上线前执行清单(Checklist):T-1/T-0、联调验收、回滚预案、24h 观察) > 最近更新:2026-02-09(Anyhunt 视频转写补充本地一键部署脚本:`apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`) > 最近更新:2026-02-09(Anyhunt 视频转写新增“三节点部署详细流程(公网简化版)”:VPS1 API + VPS2 cloud fallback worker + Mac mini local worker) @@ -54,6 +55,7 @@ ## 最近更新 +- Anyhunt 视频链接下载 + 双模式转写架构方案:补充 worker 独立启动入口与 Docker 角色开关(`ANYHUNT_RUN_MODE`/`ANYHUNT_RUN_MIGRATIONS`),并同步 local/cloud 状态推进增加 terminal/executor guard(`docs/architecture/anyhunt-video-transcript-pipeline.md`,2026-02-10:active) - Anyhunt 视频链接下载 + 双模式转写架构方案:新增上线前执行清单(Checklist),用于生产上线前固定打勾执行(T-1/T-0、联调验收、回滚预案、24h 观察)(`docs/architecture/anyhunt-video-transcript-pipeline.md`,2026-02-09:active) - Anyhunt 视频链接下载 + 双模式转写架构方案:补充本地一键部署脚本(`apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`),用于 Mac mini local-worker 环境初始化、`launchd` 注册与启动(`docs/architecture/anyhunt-video-transcript-pipeline.md`,2026-02-09:active) - Anyhunt 视频链接下载 + 双模式转写架构方案:新增“三节点部署详细流程(公网简化版)”,明确 `VPS1(API)+VPS2(cloud fallback)+Mac mini(local)` 的部署步骤、角色开关矩阵与联调验收顺序(`docs/architecture/anyhunt-video-transcript-pipeline.md`,2026-02-09:active) diff --git a/docs/architecture/CLAUDE.md b/docs/architecture/CLAUDE.md index d609826c9..51f4d2c37 100644 --- a/docs/architecture/CLAUDE.md +++ b/docs/architecture/CLAUDE.md @@ -41,6 +41,7 @@ ## 近期更新 +- `anyhunt-video-transcript-pipeline.md`:补充“worker 独立启动入口 + Docker 角色开关”(`ANYHUNT_RUN_MODE` / `ANYHUNT_RUN_MIGRATIONS`),并明确 VPS2/Mac mini 采用最小 worker App 启动以避免误消费其他队列;同步 local/cloud 状态推进增加 terminal/executor guard(2026-02-10)。 - `anyhunt-video-transcript-pipeline.md`:新增“上线前执行清单(Checklist)”章节,固化 T-1/T-0、联调验收、回滚预案与 24h 观察项(2026-02-09)。 - `anyhunt-video-transcript-pipeline.md`:补充本地一键部署脚本(`apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`)说明,统一 Mac mini local-worker 的依赖安装、环境文件写入、`launchd` 注册与启动(2026-02-09)。 - `anyhunt-video-transcript-pipeline.md`:新增“三节点部署详细流程(公网简化版)”,明确 `VPS1(API)+VPS2(cloud fallback)+Mac mini(local)` 的角色开关矩阵、部署步骤与联调验收顺序(2026-02-09)。 diff --git a/docs/architecture/anyhunt-video-transcript-pipeline.md b/docs/architecture/anyhunt-video-transcript-pipeline.md index a3ecb4933..f8bf1498f 100644 --- a/docs/architecture/anyhunt-video-transcript-pipeline.md +++ b/docs/architecture/anyhunt-video-transcript-pipeline.md @@ -1,6 +1,6 @@ --- title: Anyhunt 视频链接下载与双模式转写方案(定案) -date: 2026-02-09 +date: 2026-02-10 scope: anyhunt/server, anyhunt/console, anyhunt/admin, local-host, cloud-fallback, video-transcript status: active --- @@ -18,6 +18,8 @@ status: active ## 0. 最近执行同步 - 2026-02-10:PR Review 修复:local workspace 创建失败不再导致 activeTasks 泄漏/任务卡死;平台域名白名单改为 domain-boundary 校验防止 suffix bypass;cloud fallback 完成态写入前增加终态保护(避免覆盖 CANCELLED/FAILED);队列策略调整:保留默认队列全局 5 分钟 timeout(历史行为),并将 video transcript 队列切到独立 Bull configKey(不继承 5 分钟);长视频上限由命令级 timeout 控制(LOCAL=4h,CLOUD=2h)。 +- 2026-02-10:部署架构补强(避免误消费其他队列):新增 Video Transcript worker 独立启动入口 `apps/anyhunt/server/src/video-transcript/worker.ts` + 最小启动模块 `apps/anyhunt/server/src/video-transcript/video-transcript-worker-app.module.ts`,确保 `VPS2 cloud worker` / `Mac mini local worker` 不加载全量 `AppModule`,不会误消费 `scrape/crawl/digest` 等队列或执行全局定时任务;同时将 local/cloud 的状态推进改为 `updateMany + terminal guard + executor guard`,避免 CANCELLED/接管竞态下被覆盖;Docker 入口新增 `ANYHUNT_RUN_MODE` 与 `ANYHUNT_RUN_MIGRATIONS`(worker 跳过迁移),Mac mini 一键脚本改为启动 `start:video-transcript-worker`。 +- 2026-02-10:最佳实践补齐:URL 入参强制 http(s) 协议校验(DTO + normalize 双层兜底);`VIDEO_TRANSCRIPT_ENABLE_LOCAL_WORKER` / `VIDEO_TRANSCRIPT_ENABLE_CLOUD_FALLBACK_WORKER` 默认值改为 `false`(避免误启 worker);Admin `Queues` 页面用户可见文案统一英文,时间展示改为 `formatRelativeTime`。 - 2026-02-09:补充“17. 上线前执行清单(Checklist)”,覆盖 T-1 准备、T-0 部署、联调验收、回滚预案与 24h 观察项,作为生产上线前固定打勾清单。 - 2026-02-09:新增 Mac mini local-worker 一键部署脚本 `apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`,统一依赖检查、`.env.local-worker` 写入、`launchd` 注册与启动;local 模式仅强制校验 DB/Redis/R2 变量,cloud fallback 变量改为告警提示。 - 2026-02-09:补充部署定案附录(公网简化版):新增 `VPS1 API + VPS2 cloud fallback worker + Mac mini local worker` 三节点详细部署流程、角色环境变量矩阵、`launchd` 常驻步骤与联调验收清单。 @@ -374,6 +376,10 @@ model VideoTranscriptTask { - 两个服务共享同一套数据库与 Redis。 - 两个服务都必须加载相同的基础环境变量(DB/Redis/R2/Workers AI/预算)。 - API 服务必须可水平扩容;cloud fallback worker 默认单副本,按成本策略再扩容。 +- 两个服务使用同一镜像(同 Dockerfile),通过 `ANYHUNT_RUN_MODE` 控制启动方式: + - `api`:启动全量 HTTP API(默认) + - `video-transcript-worker`:仅启动 Video Transcript worker(不加载全量 AppModule) +- 仅允许 `anyhunt-server-api` 执行数据库迁移(默认行为);worker 服务必须设置 `ANYHUNT_RUN_MIGRATIONS=false` 跳过迁移,避免多实例并发迁移与额外 DB 权限要求。 ### 11.3 本地主机(Mac mini) @@ -404,6 +410,9 @@ model VideoTranscriptTask { ### 11.6 环境变量 - 基础:`DATABASE_URL`、`REDIS_URL` +- 进程角色(Docker): + - `ANYHUNT_RUN_MODE=api|video-transcript-worker` + - `ANYHUNT_RUN_MIGRATIONS=true|false`(worker 必须 `false`) - R2:`R2_ACCOUNT_ID`、`R2_ACCESS_KEY_ID`、`R2_SECRET_ACCESS_KEY`、`R2_BUCKET_NAME`、`R2_PUBLIC_URL` - Workers AI:`CF_ACCOUNT_ID`、`CF_WORKERS_AI_API_TOKEN` - 预算: @@ -598,6 +607,8 @@ model VideoTranscriptTask { 2. 配置环境变量: ```env +ANYHUNT_RUN_MODE=api +ANYHUNT_RUN_MIGRATIONS=true VIDEO_TRANSCRIPT_ENABLE_LOCAL_WORKER=false VIDEO_TRANSCRIPT_ENABLE_CLOUD_FALLBACK_WORKER=false VIDEO_TRANSCRIPT_ENABLE_FALLBACK_SCANNER=true @@ -618,6 +629,8 @@ VIDEO_TRANSCRIPT_LOCAL_ENABLED=true 2. 配置环境变量: ```env +ANYHUNT_RUN_MODE=video-transcript-worker +ANYHUNT_RUN_MIGRATIONS=false VIDEO_TRANSCRIPT_ENABLE_LOCAL_WORKER=false VIDEO_TRANSCRIPT_ENABLE_CLOUD_FALLBACK_WORKER=true VIDEO_TRANSCRIPT_ENABLE_FALLBACK_SCANNER=false @@ -646,6 +659,7 @@ bash apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh \ - 写入 local 角色变量(LOCAL 开关、命令路径、模型路径、nodeId) - 强制校验 local 必需变量(DB/Redis/R2);cloud fallback 变量缺失仅告警不阻塞 - 生成启动脚本与 `launchd plist` + - 启动命令固定为 `pnpm start:video-transcript-worker`(最小 worker App,不加载全量 `AppModule`) - 加载并启动 `com.anyhunt.video-transcript-local-worker` 3. 脚本默认日志路径: - `~/Library/Logs/anyhunt/video-transcript-local-worker.log` diff --git a/docs/index.md b/docs/index.md index 6b2dc3935..e5230638a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ --- title: 文档索引(内部协作) -date: 2026-02-09 +date: 2026-02-10 scope: monorepo status: active --- @@ -31,7 +31,7 @@ status: active - API Client 统一封装方案(Anyhunt + Moryflow):`docs/architecture/api-client-unification.md` - Anyhunt Console 公共 API 化与 API Key 明文存储方案:`docs/architecture/anyhunt-console-public-api-key-plan.md` - Anyhunt app/public/apikey 通道路由规范(implemented):`docs/architecture/anyhunt-api-channel-routing.md` -- Anyhunt 视频链接下载 + 双模式转写方案(active,2026-02-09:Step 1~6 已完成代码落地并同步进度看板;四轮可靠性修复已完成;新增“三节点部署详细流程(公网简化版)”:VPS1 API + VPS2 cloud fallback worker + Mac mini local worker;补充本地一键部署脚本 `apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`;新增“上线前执行清单(Checklist)”;Step 7 待压测/上线演练):`docs/architecture/anyhunt-video-transcript-pipeline.md` +- Anyhunt 视频链接下载 + 双模式转写方案(active,2026-02-10:补充 worker 独立启动入口(避免误消费其他队列)与 Docker 角色开关 `ANYHUNT_RUN_MODE`/`ANYHUNT_RUN_MIGRATIONS`;并同步 terminal/executor guard;Step 1~6 已完成代码落地并同步进度看板;Step 7 待压测/上线演练):`docs/architecture/anyhunt-video-transcript-pipeline.md` - Admin 动态配置 LLM Providers/Models(参考 Moryflow):`docs/architecture/admin-llm-provider-config.md` - Anyhunt LLM Provider 对齐进度(AI SDK / Anthropic / Google):`docs/architecture/anyhunt-llm-provider-alignment.md` - 消息列表与输入框 UI 组件抽离方案(Moryflow/Anyhunt 统一):`docs/architecture/ui-message-list-unification.md` From 796afe23149e41ee948ed8ef8951c2fbd6eb7e29 Mon Sep 17 00:00:00 2001 From: dvlin Date: Tue, 10 Feb 2026 14:26:39 +0800 Subject: [PATCH 10/16] fix(anyhunt/video-transcript): harden cancel race and budget eval args --- .../server/src/video-transcript/CLAUDE.md | 1 + .../video-transcript.service.spec.ts | 46 +++++++++++++++---- .../video-transcript-budget.service.ts | 6 +-- .../video-transcript.service.ts | 27 +++++++---- 4 files changed, 60 insertions(+), 20 deletions(-) diff --git a/apps/anyhunt/server/src/video-transcript/CLAUDE.md b/apps/anyhunt/server/src/video-transcript/CLAUDE.md index f81e8a4c9..cf2d8d30c 100644 --- a/apps/anyhunt/server/src/video-transcript/CLAUDE.md +++ b/apps/anyhunt/server/src/video-transcript/CLAUDE.md @@ -11,6 +11,7 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 最近更新 +- cancelTask 竞态修复:改为 `updateMany + terminal guard` 并仅在取消写入成功后再设置 preempt signal,避免并发完成时被错误标记为 `CANCELLED`;预算闸门 Lua `EVAL` 入参显式 `String()` 化,避免浮点参数隐式转换边界 - worker 进程启动方式收敛:`worker.ts` 改为 `createApplicationContext`(不提供 HTTP),避免 worker 暴露无关 controllers;同时将 `VIDEO_TRANSCRIPT_ENABLE_LOCAL_WORKER` / `VIDEO_TRANSCRIPT_ENABLE_CLOUD_FALLBACK_WORKER` 默认值调整为 `false`(必须显式启用),并补齐 URL http(s) 协议校验 - 新增 Video Transcript worker 独立启动入口 `worker.ts` + 最小启动模块 `video-transcript-worker-app.module.ts`,用于 VPS2/Mac mini worker 进程避免加载全量 `AppModule`(防止误消费其他队列与全局定时任务) - LOCAL/CLOUD_FALLBACK 状态推进改为 `updateMany + terminal guard + executor guard`,避免 CANCELLED/接管竞态下被覆盖 diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts index 5b1b0d9b6..dc9226536 100644 --- a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts @@ -32,7 +32,7 @@ describe('VideoTranscriptService', () => { findFirst: Mock; findMany: Mock; count: Mock; - update: Mock; + updateMany: Mock; }; }; @@ -66,7 +66,7 @@ describe('VideoTranscriptService', () => { findFirst: vi.fn(), findMany: vi.fn(), count: vi.fn(), - update: vi.fn(), + updateMany: vi.fn(), }, }; @@ -196,21 +196,28 @@ describe('VideoTranscriptService', () => { userId: 'user_1', status: 'TRANSCRIBING', }); - mockPrisma.videoTranscriptTask.update.mockResolvedValue({ - id: 'task_3', + mockPrisma.videoTranscriptTask.updateMany.mockResolvedValue({ + count: 1, }); const result = await service.cancelTask('user_1', 'task_3'); expect(result).toEqual({ ok: true }); expect(mockRedisService.set).toHaveBeenCalledTimes(1); - expect(mockPrisma.videoTranscriptTask.update).toHaveBeenCalledWith( + expect(mockPrisma.videoTranscriptTask.updateMany).toHaveBeenCalledWith( expect.objectContaining({ - where: { id: 'task_3' }, - data: expect.objectContaining({ + where: { + id: 'task_3', + userId: 'user_1', + status: { + notIn: ['COMPLETED', 'FAILED', 'CANCELLED'], + }, + }, + data: { status: 'CANCELLED', error: 'Cancelled by user', - }), + completedAt: expect.any(Date), + }, }), ); expect(mockLocalQueue.remove).toHaveBeenCalledWith('task_3'); @@ -222,6 +229,29 @@ describe('VideoTranscriptService', () => { ); }); + it('should not overwrite terminal state when cancellation loses the race', async () => { + mockPrisma.videoTranscriptTask.findFirst.mockResolvedValue({ + id: 'task_4', + userId: 'user_1', + status: 'TRANSCRIBING', + }); + mockPrisma.videoTranscriptTask.updateMany.mockResolvedValue({ + count: 0, + }); + + const result = await service.cancelTask('user_1', 'task_4'); + + expect(result).toEqual({ ok: true }); + expect(mockRedisService.set).not.toHaveBeenCalled(); + expect(mockLocalQueue.remove).toHaveBeenCalledWith('task_4'); + expect(mockCloudQueue.remove).toHaveBeenCalledWith( + buildVideoTranscriptFallbackCheckJobId('task_4'), + ); + expect(mockCloudQueue.remove).toHaveBeenCalledWith( + buildVideoTranscriptCloudRunJobId('task_4'), + ); + }); + it('should throw when task does not exist', async () => { mockPrisma.videoTranscriptTask.findFirst.mockResolvedValue(null); diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts index f5b547bdb..8a0934a60 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts @@ -72,9 +72,9 @@ export class VideoTranscriptBudgetService { BUDGET_RESERVE_LUA, 1, key, - estimatedCostUsd, - dailyBudgetUsd, - ttlSeconds, + String(estimatedCostUsd), + String(dailyBudgetUsd), + String(ttlSeconds), ); const result = Array.isArray(raw) ? raw : []; diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts b/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts index d43967dff..0d08dbf21 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts @@ -165,20 +165,29 @@ export class VideoTranscriptService { } if (!this.isTerminalStatus(task.status)) { - await this.redisService.set( - buildVideoTranscriptPreemptKey(taskId), - '1', - VIDEO_TRANSCRIPT_PREEMPT_TTL_SECONDS, - ); - - await this.prisma.videoTranscriptTask.update({ - where: { id: taskId }, + const now = new Date(); + const cancelled = await this.prisma.videoTranscriptTask.updateMany({ + where: { + id: taskId, + userId, + status: { + notIn: ['COMPLETED', 'FAILED', 'CANCELLED'], + }, + }, data: { status: 'CANCELLED', error: 'Cancelled by user', - completedAt: new Date(), + completedAt: now, }, }); + + if (cancelled.count > 0) { + await this.redisService.set( + buildVideoTranscriptPreemptKey(taskId), + '1', + VIDEO_TRANSCRIPT_PREEMPT_TTL_SECONDS, + ); + } } await Promise.allSettled([ From dc8bff02d3ecea05bf2dc328c14eb4c3149348d7 Mon Sep 17 00:00:00 2001 From: dvlin Date: Tue, 10 Feb 2026 14:26:56 +0800 Subject: [PATCH 11/16] docs(architecture): sync video transcript pipeline progress --- docs/architecture/CLAUDE.md | 1 + docs/architecture/anyhunt-video-transcript-pipeline.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/architecture/CLAUDE.md b/docs/architecture/CLAUDE.md index 51f4d2c37..586f4ab13 100644 --- a/docs/architecture/CLAUDE.md +++ b/docs/architecture/CLAUDE.md @@ -41,6 +41,7 @@ ## 近期更新 +- `anyhunt-video-transcript-pipeline.md`:补齐两处回归修复:`cancelTask` 改为 `updateMany + terminal guard` 避免并发完成被覆盖为 `CANCELLED`;预算闸门 Lua `EVAL` 参数显式 `String()` 化(2026-02-10)。 - `anyhunt-video-transcript-pipeline.md`:补充“worker 独立启动入口 + Docker 角色开关”(`ANYHUNT_RUN_MODE` / `ANYHUNT_RUN_MIGRATIONS`),并明确 VPS2/Mac mini 采用最小 worker App 启动以避免误消费其他队列;同步 local/cloud 状态推进增加 terminal/executor guard(2026-02-10)。 - `anyhunt-video-transcript-pipeline.md`:新增“上线前执行清单(Checklist)”章节,固化 T-1/T-0、联调验收、回滚预案与 24h 观察项(2026-02-09)。 - `anyhunt-video-transcript-pipeline.md`:补充本地一键部署脚本(`apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`)说明,统一 Mac mini local-worker 的依赖安装、环境文件写入、`launchd` 注册与启动(2026-02-09)。 diff --git a/docs/architecture/anyhunt-video-transcript-pipeline.md b/docs/architecture/anyhunt-video-transcript-pipeline.md index f8bf1498f..a2b270067 100644 --- a/docs/architecture/anyhunt-video-transcript-pipeline.md +++ b/docs/architecture/anyhunt-video-transcript-pipeline.md @@ -20,6 +20,7 @@ status: active - 2026-02-10:PR Review 修复:local workspace 创建失败不再导致 activeTasks 泄漏/任务卡死;平台域名白名单改为 domain-boundary 校验防止 suffix bypass;cloud fallback 完成态写入前增加终态保护(避免覆盖 CANCELLED/FAILED);队列策略调整:保留默认队列全局 5 分钟 timeout(历史行为),并将 video transcript 队列切到独立 Bull configKey(不继承 5 分钟);长视频上限由命令级 timeout 控制(LOCAL=4h,CLOUD=2h)。 - 2026-02-10:部署架构补强(避免误消费其他队列):新增 Video Transcript worker 独立启动入口 `apps/anyhunt/server/src/video-transcript/worker.ts` + 最小启动模块 `apps/anyhunt/server/src/video-transcript/video-transcript-worker-app.module.ts`,确保 `VPS2 cloud worker` / `Mac mini local worker` 不加载全量 `AppModule`,不会误消费 `scrape/crawl/digest` 等队列或执行全局定时任务;同时将 local/cloud 的状态推进改为 `updateMany + terminal guard + executor guard`,避免 CANCELLED/接管竞态下被覆盖;Docker 入口新增 `ANYHUNT_RUN_MODE` 与 `ANYHUNT_RUN_MIGRATIONS`(worker 跳过迁移),Mac mini 一键脚本改为启动 `start:video-transcript-worker`。 - 2026-02-10:最佳实践补齐:URL 入参强制 http(s) 协议校验(DTO + normalize 双层兜底);`VIDEO_TRANSCRIPT_ENABLE_LOCAL_WORKER` / `VIDEO_TRANSCRIPT_ENABLE_CLOUD_FALLBACK_WORKER` 默认值改为 `false`(避免误启 worker);Admin `Queues` 页面用户可见文案统一英文,时间展示改为 `formatRelativeTime`。 +- 2026-02-10:回归修复:`cancelTask` 改为 `updateMany + terminal guard` 并仅在取消成功后写 preempt signal,避免并发完成任务被错误覆盖为 `CANCELLED`;预算闸门 Lua `EVAL` 参数显式 `String()` 化,避免浮点隐式转换边界。 - 2026-02-09:补充“17. 上线前执行清单(Checklist)”,覆盖 T-1 准备、T-0 部署、联调验收、回滚预案与 24h 观察项,作为生产上线前固定打勾清单。 - 2026-02-09:新增 Mac mini local-worker 一键部署脚本 `apps/anyhunt/server/scripts/video-transcript/setup-local-worker.sh`,统一依赖检查、`.env.local-worker` 写入、`launchd` 注册与启动;local 模式仅强制校验 DB/Redis/R2 变量,cloud fallback 变量改为告警提示。 - 2026-02-09:补充部署定案附录(公网简化版):新增 `VPS1 API + VPS2 cloud fallback worker + Mac mini local worker` 三节点详细部署流程、角色环境变量矩阵、`launchd` 常驻步骤与联调验收清单。 From 065541db3a09be1e6600233e21b35fd8203af737 Mon Sep 17 00:00:00 2001 From: dvlin-dev <59828992+dvlin-dev@users.noreply.github.com> Date: Fri, 6 Mar 2026 16:37:30 +0800 Subject: [PATCH 12/16] fix(anyhunt/video-transcript): finalize cloud preflight failures --- .../server/src/video-transcript/CLAUDE.md | 1 + ...ranscript-cloud-fallback.processor.spec.ts | 52 ++++++++++++ ...deo-transcript-cloud-fallback.processor.ts | 83 ++++++++++++------- 3 files changed, 107 insertions(+), 29 deletions(-) diff --git a/apps/anyhunt/server/src/video-transcript/CLAUDE.md b/apps/anyhunt/server/src/video-transcript/CLAUDE.md index f3201fb7c..556aa94c5 100644 --- a/apps/anyhunt/server/src/video-transcript/CLAUDE.md +++ b/apps/anyhunt/server/src/video-transcript/CLAUDE.md @@ -11,6 +11,7 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 最近更新 +- cloud takeover preflight 收口:`handleCloudRun` 将 `probe/budget/preempt` 纳入统一 `try/catch`;`local-disabled` 模式在接管后 preflight 失败时会写入 `FAILED` 终态,避免任务卡在 `DOWNLOADING/CLOUD_FALLBACK` - 预算闸门精度修复:Lua `EVAL` 返回值改为字符串承载 `current/next/limit`,避免 Redis 数值回复把小数预算截断;新增 `video-transcript-budget.service` 回归测试覆盖该路径 - cancelTask 竞态修复:改为 `updateMany + terminal guard` 并仅在取消写入成功后再设置 preempt signal,避免并发完成时被错误标记为 `CANCELLED`;预算闸门 Lua `EVAL` 入参显式 `String()` 化,避免浮点参数隐式转换边界 - worker 进程启动方式收敛:`worker.ts` 改为 `createApplicationContext`(不提供 HTTP),避免 worker 暴露无关 controllers;同时将 `VIDEO_TRANSCRIPT_ENABLE_LOCAL_WORKER` / `VIDEO_TRANSCRIPT_ENABLE_CLOUD_FALLBACK_WORKER` 默认值调整为 `false`(必须显式启用),并补齐 URL http(s) 协议校验 diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts index 6d1bece98..b49ac7bbd 100644 --- a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts @@ -200,6 +200,58 @@ describe('VideoTranscriptCloudFallbackProcessor', () => { ); }); + it('should mark FAILED when local-disabled preflight fails after cloud takeover', async () => { + mockExecutorService.probeVideoDurationSeconds.mockRejectedValue( + new Error('probe failed'), + ); + mockPrisma.videoTranscriptTask.findUnique = vi + .fn() + .mockResolvedValueOnce({ + id: 'task_1', + userId: 'user_1', + sourceUrl: 'https://youtube.com/watch?v=abc123', + status: 'PENDING', + executor: 'LOCAL', + localStartedAt: null, + startedAt: null, + }) + .mockResolvedValueOnce({ + id: 'task_1', + userId: 'user_1', + sourceUrl: 'https://youtube.com/watch?v=abc123', + status: 'DOWNLOADING', + executor: 'CLOUD_FALLBACK', + localStartedAt: null, + startedAt: null, + }); + + await expect( + processor.process({ + data: { + kind: 'cloud-run', + taskId: 'task_1', + reason: 'local-disabled', + }, + } as any), + ).rejects.toThrow('probe failed'); + + expect(mockPrisma.videoTranscriptTask.updateMany).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + where: expect.objectContaining({ + id: 'task_1', + status: { + notIn: ['COMPLETED', 'FAILED', 'CANCELLED'], + }, + }), + data: expect.objectContaining({ + status: 'FAILED', + error: 'probe failed', + }), + }), + ); + }); + it('should not overwrite CANCELLED when cloud run completes', async () => { const initialTask = { id: 'task_1', diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts index ce7692067..e86505a18 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts @@ -123,31 +123,10 @@ export class VideoTranscriptCloudFallbackProcessor extends WorkerHost { let budgetReservedByProbe = false; const startedAt = task.startedAt ?? new Date(); - if (reason === 'local-disabled') { - cloudOwnershipAcquired = await this.acquireCloudOwnership( - taskId, - 'DOWNLOADING', - startedAt, - ); - if (!cloudOwnershipAcquired) { - return; - } - } - - const probedDurationSec = - await this.executorService.probeVideoDurationSeconds(task.sourceUrl); - - if (probedDurationSec > 0) { - budgetReservation = - await this.budgetService.tryReserveCloudBudget(probedDurationSec); - budgetReservedByProbe = true; - - if (!budgetReservation.allowed) { - await this.handleBudgetExceeded(taskId, reason, cloudOwnershipAcquired); - return; - } + let workspaceDir: string | null = null; - if (reason === 'timeout') { + try { + if (reason === 'local-disabled') { cloudOwnershipAcquired = await this.acquireCloudOwnership( taskId, 'DOWNLOADING', @@ -156,14 +135,39 @@ export class VideoTranscriptCloudFallbackProcessor extends WorkerHost { if (!cloudOwnershipAcquired) { return; } - await this.transcriptService.setPreemptSignal(taskId); - preemptSignaled = true; } - } - let workspaceDir: string | null = null; + const probedDurationSec = + await this.executorService.probeVideoDurationSeconds(task.sourceUrl); + + if (probedDurationSec > 0) { + budgetReservation = + await this.budgetService.tryReserveCloudBudget(probedDurationSec); + budgetReservedByProbe = true; + + if (!budgetReservation.allowed) { + await this.handleBudgetExceeded( + taskId, + reason, + cloudOwnershipAcquired, + ); + return; + } + + if (reason === 'timeout') { + cloudOwnershipAcquired = await this.acquireCloudOwnership( + taskId, + 'DOWNLOADING', + startedAt, + ); + if (!cloudOwnershipAcquired) { + return; + } + await this.transcriptService.setPreemptSignal(taskId); + preemptSignaled = true; + } + } - try { workspaceDir = await this.executorService.createWorkspace(taskId); const videoPath = await this.executorService.downloadVideo( task.sourceUrl, @@ -306,6 +310,27 @@ export class VideoTranscriptCloudFallbackProcessor extends WorkerHost { return; } + if (reason === 'local-disabled') { + const failed = await this.prisma.videoTranscriptTask.updateMany({ + where: { + id: taskId, + status: { + notIn: ['COMPLETED', 'FAILED', 'CANCELLED'], + }, + }, + data: { + status: 'FAILED', + error: error instanceof Error ? error.message : String(error), + completedAt: new Date(), + }, + }); + if (failed.count === 0) { + return; + } + + throw error; + } + if (latest.executor !== 'CLOUD_FALLBACK') { // 执行权已不在 cloud(可能被终态/其他执行器抢占),避免覆盖状态 return; From 6ee2dd841bd6307458cb17e6aa2494cd3949fc2e Mon Sep 17 00:00:00 2001 From: dvlin-dev <59828992+dvlin-dev@users.noreply.github.com> Date: Fri, 6 Mar 2026 16:37:49 +0800 Subject: [PATCH 13/16] fix(anyhunt/console): align video transcript page ui imports --- apps/anyhunt/console/CLAUDE.md | 1 + apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/anyhunt/console/CLAUDE.md b/apps/anyhunt/console/CLAUDE.md index e3b839800..0b4bc8f5b 100644 --- a/apps/anyhunt/console/CLAUDE.md +++ b/apps/anyhunt/console/CLAUDE.md @@ -8,6 +8,7 @@ Anyhunt Dev 用户控制台,用于管理 API Key、查看用量、测试抓取 ## 最近更新 +- Video Transcript Page 回归修复(2026-03-06):`src/pages/VideoTranscriptPage.tsx` 的 UI 导入统一改回 `@moryflow/ui` / `@moryflow/ui/lib`,与 workspace 依赖一致;现有页面 smoke test 继续覆盖该导入链路 - Video Transcript Playground(2026-03-06):新增 `/fetchx/video-transcript`,支持 Session 模式提交 URL、轮询状态、取消任务与产物预览;时间展示统一复用 `formatRelativeTime` - Build/Thinking 类型链路收敛(2026-03-02):`agent-run-panel.tsx` 的 thinking fallback 显式对齐 `AgentThinkingLevelOption`,修复 `visibleParams` 类型收窄丢失;Dockerfile 改为复制完整 workspace 并统一执行 `pnpm build:packages`,避免容器内共享包依赖白名单漂移。 - 类型解析路径对齐(2026-03-02):`tsconfig.app.json` 补齐 `@moryflow/agents-runtime/*` alias,确保 Console 构建与 IDE 类型解析可直接复用共享可见性策略源码。 diff --git a/apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx b/apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx index f58bc688b..91860ffda 100644 --- a/apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx +++ b/apps/anyhunt/console/src/pages/VideoTranscriptPage.tsx @@ -28,8 +28,8 @@ import { Input, PageHeader, Skeleton, -} from '@anyhunt/ui'; -import { formatRelativeTime } from '@anyhunt/ui/lib'; +} from '@moryflow/ui'; +import { formatRelativeTime } from '@moryflow/ui/lib'; import { videoTranscriptFormSchema, useCancelVideoTranscriptTask, From 2dc665334c8a099f4bc4b4a9907922ad5889e901 Mon Sep 17 00:00:00 2001 From: dvlin-dev <59828992+dvlin-dev@users.noreply.github.com> Date: Fri, 6 Mar 2026 16:55:32 +0800 Subject: [PATCH 14/16] fix(anyhunt/video-transcript): close queue and ownership races --- .../server/src/video-transcript/CLAUDE.md | 4 + .../video-transcript-local.processor.spec.ts | 31 +++++++ .../video-transcript.service.spec.ts | 53 ++++++++++++ .../video-transcript-local.processor.ts | 25 ++++++ .../video-transcript.service.ts | 81 ++++++++++++------- 5 files changed, 165 insertions(+), 29 deletions(-) diff --git a/apps/anyhunt/server/src/video-transcript/CLAUDE.md b/apps/anyhunt/server/src/video-transcript/CLAUDE.md index 556aa94c5..b5df024e4 100644 --- a/apps/anyhunt/server/src/video-transcript/CLAUDE.md +++ b/apps/anyhunt/server/src/video-transcript/CLAUDE.md @@ -11,6 +11,8 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 最近更新 +- createTask 入队原子性收口:任务先创建、后入队的路径在 queue add 失败时会同步删除仍未启动的 `PENDING` 记录,避免 Redis/队列异常留下无 job 对应的孤儿任务 +- local ownership guard 收紧:LOCAL worker 在创建 workspace 前先检查 preempt/当前执行权,`markLocalStarted` 仅允许从 `executor IS NULL | LOCAL` 进入,避免 redelivery/stalled job 把已接管的 `CLOUD_FALLBACK` 再写回 `LOCAL` - cloud takeover preflight 收口:`handleCloudRun` 将 `probe/budget/preempt` 纳入统一 `try/catch`;`local-disabled` 模式在接管后 preflight 失败时会写入 `FAILED` 终态,避免任务卡在 `DOWNLOADING/CLOUD_FALLBACK` - 预算闸门精度修复:Lua `EVAL` 返回值改为字符串承载 `current/next/limit`,避免 Redis 数值回复把小数预算截断;新增 `video-transcript-budget.service` 回归测试覆盖该路径 - cancelTask 竞态修复:改为 `updateMany + terminal guard` 并仅在取消写入成功后再设置 preempt signal,避免并发完成时被错误标记为 `CANCELLED`;预算闸门 Lua `EVAL` 入参显式 `String()` 化,避免浮点参数隐式转换边界 @@ -42,6 +44,8 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 关键约束 +- `createTask` 在队列写入失败时必须清理未启动的 `PENDING` 记录,不能向用户/Admin 暴露无 job 的孤儿任务。 +- LOCAL worker 只能在执行权仍为空或已属于 LOCAL 时写入 `executor=LOCAL`;一旦 cloud takeover 成功,任何 redelivery/stalled local job 都只能 preempt 退出,不能回写 ownership。 - 10 分钟窗口从 `localStartedAt` 起算,排队时间不计入。 - fallback 检查到点后仅做条件判断;最终以 DB 状态裁决。 - fallback 补偿扫描器每 30 秒兜底超时任务(仅补偿入队,不改状态裁决逻辑)。 diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-local.processor.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-local.processor.spec.ts index d3e046494..0badb29a9 100644 --- a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-local.processor.spec.ts +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-local.processor.spec.ts @@ -131,4 +131,35 @@ describe('VideoTranscriptLocalProcessor', () => { }), ); }); + + it('should not reclaim LOCAL executor after cloud takeover', async () => { + mockPrisma.videoTranscriptTask.findUnique = vi.fn( + (args?: { select?: { status: true; executor: true } }) => { + if (args?.select) { + return Promise.resolve({ + status: 'DOWNLOADING', + executor: 'CLOUD_FALLBACK', + }); + } + + return Promise.resolve({ + id: 'task_1', + userId: 'user_1', + sourceUrl: 'https://youtube.com/watch?v=abc123', + status: 'DOWNLOADING', + executor: 'CLOUD_FALLBACK', + }); + }, + ); + + await expect( + processor.process({ data: { taskId: 'task_1' } } as any), + ).resolves.toBeUndefined(); + + expect(mockExecutorService.createWorkspace).not.toHaveBeenCalled(); + expect(mockPrisma.$executeRaw).not.toHaveBeenCalled(); + expect(mockPrisma.videoTranscriptTask.updateMany).not.toHaveBeenCalled(); + expect(mockHeartbeatService.incrementActiveTasks).toHaveBeenCalledTimes(1); + expect(mockHeartbeatService.decrementActiveTasks).toHaveBeenCalledTimes(1); + }); }); diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts index dc9226536..1a1a5a628 100644 --- a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript.service.spec.ts @@ -29,6 +29,7 @@ describe('VideoTranscriptService', () => { let mockPrisma: { videoTranscriptTask: { create: Mock; + deleteMany: Mock; findFirst: Mock; findMany: Mock; count: Mock; @@ -63,6 +64,7 @@ describe('VideoTranscriptService', () => { mockPrisma = { videoTranscriptTask: { create: vi.fn(), + deleteMany: vi.fn().mockResolvedValue({ count: 1 }), findFirst: vi.fn(), findMany: vi.fn(), count: vi.fn(), @@ -170,6 +172,57 @@ describe('VideoTranscriptService', () => { expect(mockLocalQueue.add).not.toHaveBeenCalled(); }); + it('should delete created task when local queue enqueue fails', async () => { + mockPrisma.videoTranscriptTask.create.mockResolvedValue({ + id: 'task_local_fail', + status: 'PENDING', + }); + mockLocalQueue.add.mockRejectedValue(new Error('redis unavailable')); + + await expect( + service.createTask('user_1', 'https://youtube.com/watch?v=abc123'), + ).rejects.toThrow('redis unavailable'); + + expect(mockPrisma.videoTranscriptTask.deleteMany).toHaveBeenCalledWith({ + where: { + id: 'task_local_fail', + status: 'PENDING', + executor: null, + startedAt: null, + localStartedAt: null, + }, + }); + }); + + it('should delete created task when cloud queue enqueue fails', async () => { + mockRuntimeConfigService.getSnapshot.mockResolvedValue({ + localEnabled: false, + source: 'override', + overrideRaw: 'false', + }); + mockPrisma.videoTranscriptTask.create.mockResolvedValue({ + id: 'task_cloud_fail', + status: 'PENDING', + }); + mockCloudQueue.add.mockRejectedValue( + new Error('cloud queue unavailable'), + ); + + await expect( + service.createTask('user_1', 'https://youtube.com/watch?v=abc123'), + ).rejects.toThrow('cloud queue unavailable'); + + expect(mockPrisma.videoTranscriptTask.deleteMany).toHaveBeenCalledWith({ + where: { + id: 'task_cloud_fail', + status: 'PENDING', + executor: null, + startedAt: null, + localStartedAt: null, + }, + }); + }); + it('should reject invalid URL', async () => { await expect( service.createTask('user_1', 'not-a-valid-url'), diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-local.processor.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-local.processor.ts index e9429eaba..4572f27e3 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-local.processor.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-local.processor.ts @@ -50,6 +50,7 @@ export class VideoTranscriptLocalProcessor extends WorkerHost { let localStarted = false; try { + await this.ensureCanStartLocally(taskId); workspaceDir = await this.executorService.createWorkspace(taskId); await this.markLocalStarted(taskId); @@ -185,6 +186,26 @@ export class VideoTranscriptLocalProcessor extends WorkerHost { } } + private async ensureCanStartLocally(taskId: string): Promise { + const preempted = await this.transcriptService.isPreempted(taskId); + if (preempted) { + throw new VideoTranscriptPreemptedError(taskId); + } + + const task = await this.prisma.videoTranscriptTask.findUnique({ + where: { id: taskId }, + select: { status: true, executor: true }, + }); + + if ( + !task || + task.executor === 'CLOUD_FALLBACK' || + this.transcriptService.isTerminalStatus(task.status) + ) { + throw new VideoTranscriptPreemptedError(taskId); + } + } + private async ensureNotPreempted(taskId: string): Promise { const preempted = await this.transcriptService.isPreempted(taskId); if (preempted) { @@ -216,6 +237,10 @@ export class VideoTranscriptLocalProcessor extends WorkerHost { "error" = NULL, "updatedAt" = NOW() WHERE "id" = ${taskId} + AND ( + "executor" IS NULL + OR "executor" = 'LOCAL'::"VideoTranscriptExecutor" + ) AND "status" NOT IN ( 'COMPLETED'::"VideoTranscriptTaskStatus", 'FAILED'::"VideoTranscriptTaskStatus", diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts b/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts index 0d08dbf21..a9f60ba09 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript.service.ts @@ -73,36 +73,41 @@ export class VideoTranscriptService { }, }); - if (await this.isLocalPathEnabled()) { - await this.localQueue.add( - 'video-transcript-local', - { taskId: task.id }, - { - jobId: task.id, - attempts: 1, - removeOnComplete: 100, - removeOnFail: 500, - }, - ); - } else { - await this.cloudQueue.add( - 'video-transcript-cloud', - { - kind: 'cloud-run', - taskId: task.id, - reason: 'local-disabled', - }, - { - jobId: buildVideoTranscriptCloudRunJobId(task.id), - attempts: 2, - backoff: { - type: 'exponential', - delay: 5000, + try { + if (await this.isLocalPathEnabled()) { + await this.localQueue.add( + 'video-transcript-local', + { taskId: task.id }, + { + jobId: task.id, + attempts: 1, + removeOnComplete: 100, + removeOnFail: 500, }, - removeOnComplete: 100, - removeOnFail: 500, - }, - ); + ); + } else { + await this.cloudQueue.add( + 'video-transcript-cloud', + { + kind: 'cloud-run', + taskId: task.id, + reason: 'local-disabled', + }, + { + jobId: buildVideoTranscriptCloudRunJobId(task.id), + attempts: 2, + backoff: { + type: 'exponential', + delay: 5000, + }, + removeOnComplete: 100, + removeOnFail: 500, + }, + ); + } + } catch (error) { + await this.cleanupPendingTask(task.id); + throw error; } return task; @@ -291,6 +296,24 @@ export class VideoTranscriptService { return snapshot.localEnabled; } + private async cleanupPendingTask(taskId: string): Promise { + try { + await this.prisma.videoTranscriptTask.deleteMany({ + where: { + id: taskId, + status: 'PENDING', + executor: null, + startedAt: null, + localStartedAt: null, + }, + }); + } catch (cleanupError) { + this.logger.error( + `Failed to cleanup pending video transcript task ${taskId} after enqueue error: ${cleanupError instanceof Error ? cleanupError.message : String(cleanupError)}`, + ); + } + } + private normalizeSourceUrl(rawUrl: string): string { let parsed: URL; try { From aa6aa703e19ced80ac3136cd9ac5563a074f9a2b Mon Sep 17 00:00:00 2001 From: dvlin-dev <59828992+dvlin-dev@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:18:25 +0800 Subject: [PATCH 15/16] fix(anyhunt/video-transcript): preserve retries and toggle audit state --- .../server/src/video-transcript/CLAUDE.md | 4 + .../video-transcript-admin.service.spec.ts | 130 ++++++++++++++++++ ...ranscript-cloud-fallback.processor.spec.ts | 4 +- .../video-transcript-admin.service.ts | 57 +++++--- ...deo-transcript-cloud-fallback.processor.ts | 2 +- ...video-transcript-runtime-config.service.ts | 14 ++ 6 files changed, 189 insertions(+), 22 deletions(-) create mode 100644 apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-admin.service.spec.ts diff --git a/apps/anyhunt/server/src/video-transcript/CLAUDE.md b/apps/anyhunt/server/src/video-transcript/CLAUDE.md index b5df024e4..e781131ee 100644 --- a/apps/anyhunt/server/src/video-transcript/CLAUDE.md +++ b/apps/anyhunt/server/src/video-transcript/CLAUDE.md @@ -11,6 +11,8 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 最近更新 +- timeout pre-check 重试语义收口:`CLOUD_FALLBACK` 在 timeout 路径接管前如果 `probe/budget/preempt` 失败,不再吞掉异常,而是让 cloud-run job 失败并走 BullMQ 重试,避免稳定 `jobId` + 已完成 job 记录导致后续 scanner 无法再次接管 +- Admin runtime toggle 回滚补齐:`updateLocalEnabled` 写 Redis override 后若审计落库失败,会恢复到之前的 runtime snapshot,避免“路由已切换但 API 报错且无审计”的部分成功状态 - createTask 入队原子性收口:任务先创建、后入队的路径在 queue add 失败时会同步删除仍未启动的 `PENDING` 记录,避免 Redis/队列异常留下无 job 对应的孤儿任务 - local ownership guard 收紧:LOCAL worker 在创建 workspace 前先检查 preempt/当前执行权,`markLocalStarted` 仅允许从 `executor IS NULL | LOCAL` 进入,避免 redelivery/stalled job 把已接管的 `CLOUD_FALLBACK` 再写回 `LOCAL` - cloud takeover preflight 收口:`handleCloudRun` 将 `probe/budget/preempt` 纳入统一 `try/catch`;`local-disabled` 模式在接管后 preflight 失败时会写入 `FAILED` 终态,避免任务卡在 `DOWNLOADING/CLOUD_FALLBACK` @@ -44,6 +46,8 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 关键约束 +- timeout cloud-run 在接管前遇到瞬时异常时必须抛错保留 retry 语义,不能把 job 记成成功返回;否则 stable `jobId` 的 completed 记录会阻断后续补偿入队。 +- Admin `localEnabled` 运行时开关与审计无法做分布式原子提交时,必须以“写审计失败则回滚 Redis override”为准则,避免系统实际状态与 API 结果/审计日志分叉。 - `createTask` 在队列写入失败时必须清理未启动的 `PENDING` 记录,不能向用户/Admin 暴露无 job 的孤儿任务。 - LOCAL worker 只能在执行权仍为空或已属于 LOCAL 时写入 `executor=LOCAL`;一旦 cloud takeover 成功,任何 redelivery/stalled local job 都只能 preempt 退出,不能回写 ownership。 - 10 分钟窗口从 `localStartedAt` 起算,排队时间不计入。 diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-admin.service.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-admin.service.spec.ts new file mode 100644 index 000000000..8c1bc0389 --- /dev/null +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-admin.service.spec.ts @@ -0,0 +1,130 @@ +/** + * [INPUT]: Admin runtime toggle 更新请求与依赖 mock + * [OUTPUT]: 运行时开关更新/审计回滚行为断言 + * [POS]: Video Transcript Admin Service 回归测试 + * + * [PROTOCOL]: 本文件变更时,必须更新此 Header 及所属目录 CLAUDE.md + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { VideoTranscriptAdminService } from '../video-transcript-admin.service'; + +describe('VideoTranscriptAdminService', () => { + let service: VideoTranscriptAdminService; + + let mockPrisma: any; + let mockBudgetService: any; + let mockHeartbeatService: any; + let mockRuntimeConfigService: any; + let mockTranscriptService: any; + let mockLocalQueue: any; + let mockCloudQueue: any; + + beforeEach(() => { + mockPrisma = { + adminAuditLog: { + create: vi.fn().mockResolvedValue({ + id: 'audit_1', + createdAt: new Date('2026-03-06T08:00:00.000Z'), + }), + }, + }; + + mockBudgetService = {}; + mockHeartbeatService = {}; + mockRuntimeConfigService = { + getSnapshot: vi.fn().mockResolvedValue({ + localEnabled: true, + source: 'env', + overrideRaw: null, + }), + setLocalEnabledOverride: vi.fn().mockResolvedValue(undefined), + restoreSnapshot: vi.fn().mockResolvedValue(undefined), + }; + mockTranscriptService = { + toPrismaJson: vi.fn((value: unknown) => value), + }; + mockLocalQueue = {}; + mockCloudQueue = {}; + + service = new VideoTranscriptAdminService( + mockPrisma, + mockBudgetService, + mockHeartbeatService, + mockRuntimeConfigService, + mockTranscriptService, + mockLocalQueue, + mockCloudQueue, + ); + }); + + it('should write audit after updating localEnabled override', async () => { + const result = await service.updateLocalEnabled({ + actorUserId: 'admin_1', + enabled: false, + reason: 'maintenance window', + }); + + expect( + mockRuntimeConfigService.setLocalEnabledOverride, + ).toHaveBeenCalledWith(false); + expect(mockPrisma.adminAuditLog.create).toHaveBeenCalledWith({ + data: { + actorUserId: 'admin_1', + targetUserId: null, + action: 'VIDEO_TRANSCRIPT_LOCAL_ENABLED_UPDATED', + reason: 'maintenance window', + metadata: { + previous: { + localEnabled: true, + source: 'env', + overrideRaw: null, + }, + current: { + localEnabled: false, + source: 'override', + overrideRaw: 'false', + }, + }, + }, + select: { + id: true, + createdAt: true, + }, + }); + expect(mockRuntimeConfigService.restoreSnapshot).not.toHaveBeenCalled(); + expect(result).toEqual({ + localEnabled: false, + source: 'override', + overrideRaw: 'false', + auditLogId: 'audit_1', + updatedAt: new Date('2026-03-06T08:00:00.000Z'), + }); + }); + + it('should restore previous runtime snapshot when audit write fails', async () => { + const previous = { + localEnabled: true, + source: 'env' as const, + overrideRaw: null, + }; + mockRuntimeConfigService.getSnapshot.mockResolvedValue(previous); + mockPrisma.adminAuditLog.create.mockRejectedValue( + new Error('db unavailable'), + ); + + await expect( + service.updateLocalEnabled({ + actorUserId: 'admin_1', + enabled: false, + }), + ).rejects.toThrow('db unavailable'); + + expect( + mockRuntimeConfigService.setLocalEnabledOverride, + ).toHaveBeenCalledWith(false); + expect(mockRuntimeConfigService.restoreSnapshot).toHaveBeenCalledWith( + previous, + ); + }); +}); diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts index b49ac7bbd..299799f76 100644 --- a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts @@ -86,7 +86,7 @@ describe('VideoTranscriptCloudFallbackProcessor', () => { ); }); - it('should not fail task when timeout pre-check fails before takeover', async () => { + it('should throw when timeout pre-check fails before takeover so cloud-run can retry', async () => { await expect( processor.process({ data: { @@ -95,7 +95,7 @@ describe('VideoTranscriptCloudFallbackProcessor', () => { reason: 'timeout', }, } as any), - ).resolves.toBeUndefined(); + ).rejects.toThrow('download failed'); expect(mockTranscriptService.setPreemptSignal).not.toHaveBeenCalled(); expect(mockPrisma.videoTranscriptTask.updateMany).not.toHaveBeenCalledWith( diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-admin.service.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-admin.service.ts index e52379906..d1cafb3f8 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-admin.service.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-admin.service.ts @@ -6,7 +6,7 @@ * [PROTOCOL]: 本文件变更时,必须更新此 Header 及所属目录 CLAUDE.md */ -import { Injectable } from '@nestjs/common'; +import { Injectable, Logger } from '@nestjs/common'; import { InjectQueue } from '@nestjs/bullmq'; import { Queue } from 'bullmq'; import { PrismaService } from '../prisma/prisma.service'; @@ -32,6 +32,8 @@ const BUDGET_EXCEEDED_ERROR_MESSAGE = 'Cloud fallback daily budget exceeded'; @Injectable() export class VideoTranscriptAdminService { + private readonly logger = new Logger(VideoTranscriptAdminService.name); + constructor( private readonly prisma: PrismaService, private readonly budgetService: VideoTranscriptBudgetService, @@ -299,29 +301,46 @@ export class VideoTranscriptAdminService { reason?: string; }) { const previous = await this.runtimeConfigService.getSnapshot(); - await this.runtimeConfigService.setLocalEnabledOverride(params.enabled); - const current = await this.runtimeConfigService.getSnapshot(); + const current = { + localEnabled: params.enabled, + source: 'override' as const, + overrideRaw: params.enabled ? 'true' : 'false', + }; const normalizedReason = params.reason?.trim() ? params.reason.trim() : `Set local enabled to ${params.enabled}`; - const audit = await this.prisma.adminAuditLog.create({ - data: { - actorUserId: params.actorUserId, - targetUserId: null, - action: LOCAL_ENABLED_AUDIT_ACTION, - reason: normalizedReason, - metadata: this.transcriptService.toPrismaJson({ - previous, - current, - }), - }, - select: { - id: true, - createdAt: true, - }, - }); + await this.runtimeConfigService.setLocalEnabledOverride(params.enabled); + + let audit: { id: string; createdAt: Date }; + try { + audit = await this.prisma.adminAuditLog.create({ + data: { + actorUserId: params.actorUserId, + targetUserId: null, + action: LOCAL_ENABLED_AUDIT_ACTION, + reason: normalizedReason, + metadata: this.transcriptService.toPrismaJson({ + previous, + current, + }), + }, + select: { + id: true, + createdAt: true, + }, + }); + } catch (error) { + try { + await this.runtimeConfigService.restoreSnapshot(previous); + } catch (rollbackError) { + this.logger.error( + `Failed to rollback localEnabled override after audit failure: ${rollbackError instanceof Error ? rollbackError.message : String(rollbackError)}`, + ); + } + throw error; + } return { localEnabled: current.localEnabled, diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts index e86505a18..9a1b04321 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts @@ -307,7 +307,7 @@ export class VideoTranscriptCloudFallbackProcessor extends WorkerHost { this.logger.warn( `Cloud fallback pre-check failed before takeover for task ${taskId}: ${error instanceof Error ? error.message : String(error)}`, ); - return; + throw error; } if (reason === 'local-disabled') { diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-runtime-config.service.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-runtime-config.service.ts index 50b992b3c..7749e0eda 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-runtime-config.service.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-runtime-config.service.ts @@ -55,6 +55,20 @@ export class VideoTranscriptRuntimeConfigService { ); } + async restoreSnapshot( + snapshot: VideoTranscriptRuntimeConfigSnapshot, + ): Promise { + if (snapshot.source === 'override' && snapshot.overrideRaw !== null) { + await this.redisService.set( + VIDEO_TRANSCRIPT_LOCAL_ENABLED_OVERRIDE_KEY, + snapshot.overrideRaw, + ); + return; + } + + await this.redisService.del(VIDEO_TRANSCRIPT_LOCAL_ENABLED_OVERRIDE_KEY); + } + private getEnvDefault(): boolean { return parseBooleanEnv( this.configService.get('VIDEO_TRANSCRIPT_LOCAL_ENABLED'), From 28d91c109d17f1755d9f5ebbd6a3f783f696537a Mon Sep 17 00:00:00 2001 From: dvlin-dev <59828992+dvlin-dev@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:57:59 +0800 Subject: [PATCH 16/16] fix(anyhunt/video-transcript): rollback timeout budget reservations --- .../server/src/video-transcript/CLAUDE.md | 2 ++ .../video-transcript-budget.service.spec.ts | 23 +++++++++++++ ...ranscript-cloud-fallback.processor.spec.ts | 34 +++++++++++++++++++ .../video-transcript-budget.service.ts | 33 ++++++++++++++++++ ...deo-transcript-cloud-fallback.processor.ts | 3 ++ .../anyhunt-video-transcript-pipeline.md | 1 + 6 files changed, 96 insertions(+) diff --git a/apps/anyhunt/server/src/video-transcript/CLAUDE.md b/apps/anyhunt/server/src/video-transcript/CLAUDE.md index e781131ee..4e64bd6bd 100644 --- a/apps/anyhunt/server/src/video-transcript/CLAUDE.md +++ b/apps/anyhunt/server/src/video-transcript/CLAUDE.md @@ -11,6 +11,7 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 最近更新 +- timeout budget rollback 收口:timeout 路径在 `duration probe` 后预占 cloud budget,但若 `acquireCloudOwnership` 竞争失败会立即释放这笔预占,避免 local 已完成/取消的竞态场景泄漏当日预算 - timeout pre-check 重试语义收口:`CLOUD_FALLBACK` 在 timeout 路径接管前如果 `probe/budget/preempt` 失败,不再吞掉异常,而是让 cloud-run job 失败并走 BullMQ 重试,避免稳定 `jobId` + 已完成 job 记录导致后续 scanner 无法再次接管 - Admin runtime toggle 回滚补齐:`updateLocalEnabled` 写 Redis override 后若审计落库失败,会恢复到之前的 runtime snapshot,避免“路由已切换但 API 报错且无审计”的部分成功状态 - createTask 入队原子性收口:任务先创建、后入队的路径在 queue add 失败时会同步删除仍未启动的 `PENDING` 记录,避免 Redis/队列异常留下无 job 对应的孤儿任务 @@ -46,6 +47,7 @@ Video Transcript 模块提供四平台视频链接(抖音/Bilibili/小红书/Y ## 关键约束 +- timeout 路径若在 probe 预算后丢失 `CLOUD_FALLBACK` 执行权,必须回滚这次 budget reservation;只有真正进入 cloud takeover 的任务才允许占用当日预算。 - timeout cloud-run 在接管前遇到瞬时异常时必须抛错保留 retry 语义,不能把 job 记成成功返回;否则 stable `jobId` 的 completed 记录会阻断后续补偿入队。 - Admin `localEnabled` 运行时开关与审计无法做分布式原子提交时,必须以“写审计失败则回滚 Redis override”为准则,避免系统实际状态与 API 结果/审计日志分叉。 - `createTask` 在队列写入失败时必须清理未启动的 `PENDING` 记录,不能向用户/Admin 暴露无 job 的孤儿任务。 diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-budget.service.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-budget.service.spec.ts index b86b9e1d4..d11e7784f 100644 --- a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-budget.service.spec.ts +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-budget.service.spec.ts @@ -8,6 +8,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { VideoTranscriptBudgetService } from '../video-transcript-budget.service'; +import { buildVideoTranscriptBudgetKey } from '../video-transcript.constants'; describe('VideoTranscriptBudgetService', () => { let service: VideoTranscriptBudgetService; @@ -61,4 +62,26 @@ describe('VideoTranscriptBudgetService', () => { expect(reservation.dailyBudgetUsd).toBe(20); expect(reservation.dayKey).toBe('2026-03-06'); }); + + it('releases reserved budget using the reservation dayKey', async () => { + const reservation = { + allowed: true, + estimatedCostUsd: 0.25, + usageAfterReserveUsd: 1.5, + dailyBudgetUsd: 20, + dayKey: '2026-03-05', + timezone: 'Asia/Shanghai', + }; + + await service.releaseCloudBudgetReservation(reservation); + + const [lua, keyCount, key, amount] = + mockRedisService.client.eval.mock.calls[0] ?? []; + + expect(lua).toContain("redis.call('DEL', key)"); + expect(lua).toContain("redis.call('SET', key, tostring(next), 'EX', ttl)"); + expect(keyCount).toBe(1); + expect(key).toBe(buildVideoTranscriptBudgetKey('2026-03-05')); + expect(amount).toBe('0.25'); + }); }); diff --git a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts index 299799f76..d2f0c27e6 100644 --- a/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts +++ b/apps/anyhunt/server/src/video-transcript/__tests__/video-transcript-cloud-fallback.processor.spec.ts @@ -70,6 +70,7 @@ describe('VideoTranscriptCloudFallbackProcessor', () => { dayKey: '2026-02-09', timezone: 'Asia/Shanghai', }), + releaseCloudBudgetReservation: vi.fn().mockResolvedValue(undefined), }; mockCloudQueue = { @@ -200,6 +201,39 @@ describe('VideoTranscriptCloudFallbackProcessor', () => { ); }); + it('should release probed budget when timeout takeover loses ownership race', async () => { + mockExecutorService.probeVideoDurationSeconds.mockResolvedValue(120); + mockPrisma.videoTranscriptTask.updateMany = vi.fn((args: any) => { + if (args?.data?.executor === 'CLOUD_FALLBACK') { + return Promise.resolve({ count: 0 }); + } + return Promise.resolve({ count: 1 }); + }); + + await expect( + processor.process({ + data: { + kind: 'cloud-run', + taskId: 'task_1', + reason: 'timeout', + }, + } as any), + ).resolves.toBeUndefined(); + + expect(mockBudgetService.tryReserveCloudBudget).toHaveBeenCalledWith(120); + expect( + mockBudgetService.releaseCloudBudgetReservation, + ).toHaveBeenCalledWith( + expect.objectContaining({ + allowed: true, + estimatedCostUsd: 0.01, + dayKey: '2026-02-09', + }), + ); + expect(mockTranscriptService.setPreemptSignal).not.toHaveBeenCalled(); + expect(mockExecutorService.createWorkspace).not.toHaveBeenCalled(); + }); + it('should mark FAILED when local-disabled preflight fails after cloud takeover', async () => { mockExecutorService.probeVideoDurationSeconds.mockRejectedValue( new Error('probe failed'), diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts index d498f67f4..fe1a5180b 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-budget.service.ts @@ -33,6 +33,22 @@ redis.call('SET', key, tostring(next), 'EX', ttl) return {1, tostring(next), tostring(limit)} `; +const BUDGET_RELEASE_LUA = ` +local key = KEYS[1] +local sub = tonumber(ARGV[1]) +local ttl = tonumber(ARGV[2]) +local current = tonumber(redis.call('GET', key) or '0') +local next = current - sub + +if next <= 0 then + redis.call('DEL', key) + return '0' +end + +redis.call('SET', key, tostring(next), 'EX', ttl) +return tostring(next) +`; + @Injectable() export class VideoTranscriptBudgetService { constructor( @@ -91,6 +107,23 @@ export class VideoTranscriptBudgetService { }; } + async releaseCloudBudgetReservation( + reservation: VideoTranscriptBudgetReservation, + ): Promise { + if (!reservation.allowed || reservation.estimatedCostUsd <= 0) { + return; + } + + const ttlSeconds = 3 * 24 * 60 * 60; + await this.redisService.client.eval( + BUDGET_RELEASE_LUA, + 1, + buildVideoTranscriptBudgetKey(reservation.dayKey), + String(reservation.estimatedCostUsd), + String(ttlSeconds), + ); + } + async getCurrentBudgetUsage(): Promise<{ dayKey: string; timezone: string; diff --git a/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts b/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts index 9a1b04321..a0e94751d 100644 --- a/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts +++ b/apps/anyhunt/server/src/video-transcript/video-transcript-cloud-fallback.processor.ts @@ -161,6 +161,9 @@ export class VideoTranscriptCloudFallbackProcessor extends WorkerHost { startedAt, ); if (!cloudOwnershipAcquired) { + await this.budgetService.releaseCloudBudgetReservation( + budgetReservation, + ); return; } await this.transcriptService.setPreemptSignal(taskId); diff --git a/docs/design/anyhunt/features/anyhunt-video-transcript-pipeline.md b/docs/design/anyhunt/features/anyhunt-video-transcript-pipeline.md index e559ee03a..d9e9475f9 100644 --- a/docs/design/anyhunt/features/anyhunt-video-transcript-pipeline.md +++ b/docs/design/anyhunt/features/anyhunt-video-transcript-pipeline.md @@ -17,6 +17,7 @@ status: active ## 0. 最近执行同步 +- 2026-03-06:补充 timeout budget rollback:timeout 路径在 `yt-dlp duration probe` 后若已预占 cloud budget,但 `acquireCloudOwnership` 因 local 完成/取消等竞态失败,会立即释放该预占,避免未真正接管 cloud 的任务泄漏每日预算。 - 2026-03-06:文档路径收口到 `docs/design/anyhunt/features/anyhunt-video-transcript-pipeline.md`,并同步清理 `docs/architecture/*` 旧引用;预算闸门 Lua 返回值改为字符串承载小数,避免 Redis `EVAL` 数值回复截断 `usageAfterReserveUsd`。 - 2026-02-10:PR Review 修复:local workspace 创建失败不再导致 activeTasks 泄漏/任务卡死;平台域名白名单改为 domain-boundary 校验防止 suffix bypass;cloud fallback 完成态写入前增加终态保护(避免覆盖 CANCELLED/FAILED);队列策略调整:保留默认队列全局 5 分钟 timeout(历史行为),并将 video transcript 队列切到独立 Bull configKey(不继承 5 分钟);长视频上限由命令级 timeout 控制(LOCAL=4h,CLOUD=2h)。 - 2026-02-10:部署架构补强(避免误消费其他队列):新增 Video Transcript worker 独立启动入口 `apps/anyhunt/server/src/video-transcript/worker.ts` + 最小启动模块 `apps/anyhunt/server/src/video-transcript/video-transcript-worker-app.module.ts`,确保 `VPS2 cloud worker` / `Mac mini local worker` 不加载全量 `AppModule`,不会误消费 `scrape/crawl/digest` 等队列或执行全局定时任务;同时将 local/cloud 的状态推进改为 `updateMany + terminal guard + executor guard`,避免 CANCELLED/接管竞态下被覆盖;Docker 入口新增 `ANYHUNT_RUN_MODE` 与 `ANYHUNT_RUN_MIGRATIONS`(worker 跳过迁移),Mac mini 一键脚本改为启动 `start:video-transcript-worker`。