diff --git a/.env.example b/.env.example index 342a7b8..a40f70f 100644 --- a/.env.example +++ b/.env.example @@ -62,19 +62,18 @@ OPENROUTER_API_KEY=your-openrouter-api-key-here # ============================================================================ # Each agent type has its own model and provider for optimal performance. -# Annotation Model (best quality/cost: Mistral-Small-3.2-24B) -# 100% faithful rate, $0.18/M output tokens -ANNOTATION_MODEL=mistralai/mistral-small-3.2-24b-instruct -ANNOTATION_PROVIDER=mistral +# Annotation Model (Claude Haiku 4.5 via Anthropic - high quality with prompt caching) +ANNOTATION_MODEL=anthropic/claude-haiku-4.5 +ANNOTATION_PROVIDER=anthropic -# Evaluation/Assessment Model (fast quality checks: GPT-OSS-120B via Groq) +# Evaluation/Assessment Model (Qwen3.5-122B via Alibaba - fast and cost-effective) # Used for evaluation, assessment, and feedback agents -EVALUATION_MODEL=openai/gpt-oss-120b -EVALUATION_PROVIDER=groq +EVALUATION_MODEL=qwen/qwen3.5-122b-a10b +EVALUATION_PROVIDER=alibaba -# Vision Model (image description: Qwen3-VL via deepinfra) -VISION_MODEL=qwen/qwen3-vl-30b-a3b-instruct -VISION_PROVIDER=deepinfra/fp8 +# Vision Model (Qwen3.5-122B via Alibaba - accepts vision, fast) +VISION_MODEL=qwen/qwen3.5-122b-a10b +VISION_PROVIDER=alibaba # ============================================================================ # Alternative Models diff --git a/deploy/README.md b/deploy/README.md index 7bed336..f94ce8d 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -269,17 +269,18 @@ AUDIT_LOG_FILE=/var/log/hed-bot/audit.log # CORS Configuration (optional extra origins) # EXTRA_CORS_ORIGINS=https://staging.hed-bot.pages.dev,https://dev.hed-bot.pages.dev -# LLM Configuration (Cerebras + OpenRouter for ultra-fast inference) +# LLM Configuration (OpenRouter with Alibaba for fast inference) LLM_PROVIDER=openrouter OPENROUTER_API_KEY=your_openrouter_key_here -LLM_PROVIDER_PREFERENCE=Cerebras LLM_TEMPERATURE=0.1 -# Model configuration (Cerebras-optimized defaults) -ANNOTATION_MODEL=openai/gpt-oss-120b -EVALUATION_MODEL=qwen/qwen3-235b-a22b-2507 -ASSESSMENT_MODEL=openai/gpt-oss-120b -FEEDBACK_MODEL=openai/gpt-oss-120b +# Model configuration +ANNOTATION_MODEL=anthropic/claude-haiku-4.5 +ANNOTATION_PROVIDER=anthropic +EVALUATION_MODEL=qwen/qwen3.5-122b-a10b +EVALUATION_PROVIDER=alibaba +VISION_MODEL=qwen/qwen3.5-122b-a10b +VISION_PROVIDER=alibaba # Optional: HED Schema and Validator paths (if not using defaults) # HED_SCHEMA_DIR=/path/to/hed-schemas diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index c2266f6..6e5fe77 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -22,10 +22,12 @@ services: # OpenRouter Configuration - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - LLM_PROVIDER_PREFERENCE=${LLM_PROVIDER_PREFERENCE:-} - - ANNOTATION_MODEL=${ANNOTATION_MODEL:-openai/gpt-oss-120b} - - EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3-235b-a22b-2507} - - ASSESSMENT_MODEL=${ASSESSMENT_MODEL:-openai/gpt-oss-120b} - - FEEDBACK_MODEL=${FEEDBACK_MODEL:-openai/gpt-oss-120b} + - ANNOTATION_MODEL=${ANNOTATION_MODEL:-anthropic/claude-haiku-4.5} + - ANNOTATION_PROVIDER=${ANNOTATION_PROVIDER:-anthropic} + - EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3.5-122b-a10b} + - EVALUATION_PROVIDER=${EVALUATION_PROVIDER:-alibaba} + - VISION_MODEL=${VISION_MODEL:-qwen/qwen3.5-122b-a10b} + - VISION_PROVIDER=${VISION_PROVIDER:-alibaba} # LLM Settings - LLM_TEMPERATURE=${LLM_TEMPERATURE:-0.1} diff --git a/docker-compose.yml b/docker-compose.yml index 3229dd2..567e026 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,10 +42,12 @@ services: # OpenRouter Configuration (used when LLM_PROVIDER=openrouter) - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - LLM_PROVIDER_PREFERENCE=${LLM_PROVIDER_PREFERENCE} - - ANNOTATION_MODEL=${ANNOTATION_MODEL:-openai/gpt-oss-120b} - - EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3-235b-a22b-2507} - - ASSESSMENT_MODEL=${ASSESSMENT_MODEL:-openai/gpt-oss-120b} - - FEEDBACK_MODEL=${FEEDBACK_MODEL:-openai/gpt-oss-120b} + - ANNOTATION_MODEL=${ANNOTATION_MODEL:-anthropic/claude-haiku-4.5} + - ANNOTATION_PROVIDER=${ANNOTATION_PROVIDER:-anthropic} + - EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3.5-122b-a10b} + - EVALUATION_PROVIDER=${EVALUATION_PROVIDER:-alibaba} + - VISION_MODEL=${VISION_MODEL:-qwen/qwen3.5-122b-a10b} + - VISION_PROVIDER=${VISION_PROVIDER:-alibaba} # Ollama Configuration (used when LLM_PROVIDER=ollama) - LLM_BASE_URL=${LLM_BASE_URL:-http://ollama:11434} diff --git a/frontend/index.html b/frontend/index.html index cf0a139..eb0a633 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -1235,7 +1235,7 @@

Status

Learn more
- Models: Claude Haiku 4.5 (annotation) · Qwen3-VL-30B (vision) · GPT-OSS-120B (evaluation) · To change models or use programmatically, install HEDit CLI + Models: Claude Haiku 4.5 (annotation) · Qwen3.5-122B (vision) · Qwen3.5-122B (evaluation) · To change models or use programmatically, install HEDit CLI
diff --git a/pyproject.toml b/pyproject.toml index 72dc906..a7cd80b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "hedit" -version = "0.7.8a1" +version = "0.7.9.dev0" description = "Multi-agent system for HED annotation generation and validation" readme = "PKG_README.md" requires-python = ">=3.12" diff --git a/src/api/main.py b/src/api/main.py index 812e3d1..ad78924 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -99,8 +99,8 @@ def create_openrouter_workflow( api_key: OpenRouter API key annotation_model: Model for annotation (default: ANNOTATION_MODEL env or Claude Haiku 4.5) annotation_provider: Provider for annotation model (default: ANNOTATION_PROVIDER env or "anthropic") - eval_model: Model for eval/assessment/feedback (default: EVALUATION_MODEL env or GPT-OSS-120B) - eval_provider: Provider for eval models (default: EVALUATION_PROVIDER env or "groq") + eval_model: Model for eval/assessment/feedback (default: EVALUATION_MODEL env or Qwen3.5-122B) + eval_provider: Provider for eval models (default: EVALUATION_PROVIDER env or "alibaba") temperature: LLM temperature (default: 0.1) user_id: User ID for cache optimization (derived from API key if not provided) schema_dir: Path to HED schemas (None = fetch from GitHub) @@ -113,8 +113,8 @@ def create_openrouter_workflow( # Apply defaults from environment default_annotation_model = os.getenv("ANNOTATION_MODEL", "anthropic/claude-haiku-4.5") default_annotation_provider = os.getenv("ANNOTATION_PROVIDER", "anthropic") - default_eval_model = os.getenv("EVALUATION_MODEL", "qwen/qwen3.5-397b-a17b") - default_eval_provider = os.getenv("EVALUATION_PROVIDER") + default_eval_model = os.getenv("EVALUATION_MODEL", "qwen/qwen3.5-122b-a10b") + default_eval_provider = os.getenv("EVALUATION_PROVIDER", "alibaba") # Resolve final values: parameter > env var > default actual_annotation_model = get_model_name(annotation_model or default_annotation_model) @@ -240,8 +240,8 @@ def create_byok_vision_agent( Configured VisionAgent using the user's key and model settings """ # Use user-provided settings or fall back to server defaults - default_vision_model = os.getenv("VISION_MODEL", "qwen/qwen3-vl-32b-instruct") - default_vision_provider = os.getenv("VISION_PROVIDER", "novita") + default_vision_model = os.getenv("VISION_MODEL", "qwen/qwen3.5-122b-a10b") + default_vision_provider = os.getenv("VISION_PROVIDER", "alibaba") actual_model = vision_model if vision_model else default_vision_model actual_temperature = temperature if temperature is not None else 0.3 @@ -354,9 +354,9 @@ def get_default_path(docker_path: str, local_path: str) -> str | None: # Log configuration (env vars are read by create_openrouter_workflow) print("Using OpenRouter with models:") print(f" Annotation: {os.getenv('ANNOTATION_MODEL', 'anthropic/claude-haiku-4.5')}") - print(f" Evaluation: {os.getenv('EVALUATION_MODEL', 'qwen/qwen3.5-397b-a17b')}") + print(f" Evaluation: {os.getenv('EVALUATION_MODEL', 'qwen/qwen3.5-122b-a10b')}") print(f" Provider (annotation): {os.getenv('ANNOTATION_PROVIDER', 'anthropic')}") - print(f" Provider (eval): {os.getenv('EVALUATION_PROVIDER', '') or '(auto-routed)'}") + print(f" Provider (eval): {os.getenv('EVALUATION_PROVIDER', 'alibaba')}") workflow = create_openrouter_workflow( api_key=openrouter_api_key, @@ -395,8 +395,8 @@ def get_default_path(docker_path: str, local_path: str) -> str | None: # Initialize vision agent (only for OpenRouter) if llm_provider == "openrouter": - vision_model = os.getenv("VISION_MODEL", "qwen/qwen3-vl-32b-instruct") - vision_provider = os.getenv("VISION_PROVIDER", "novita") + vision_model = os.getenv("VISION_MODEL", "qwen/qwen3.5-122b-a10b") + vision_provider = os.getenv("VISION_PROVIDER", "alibaba") print(f"Initializing vision model: {vision_model} (provider: {vision_provider})") @@ -486,6 +486,7 @@ def get_default_path(docker_path: str, local_path: str) -> str | None: "X-OpenRouter-Key", # BYOK mode "X-OpenRouter-Model", # BYOK model override "X-OpenRouter-Vision-Model", # BYOK vision model override + "X-OpenRouter-Vision-Provider", # BYOK vision provider override "X-OpenRouter-Provider", # BYOK provider preference "X-OpenRouter-Temperature", # BYOK temperature override "X-OpenRouter-Eval-Model", # BYOK eval model override @@ -754,6 +755,9 @@ async def annotate_from_image( # Check for model override headers (from frontend dropdown or CLI) model_override = request.model or req.headers.get("x-openrouter-model") vision_model_override = request.vision_model or req.headers.get("x-openrouter-vision-model") + vision_provider_override = request.vision_provider or req.headers.get( + "x-openrouter-vision-provider" + ) provider_override = request.provider or req.headers.get("x-openrouter-provider") eval_model_override = req.headers.get("x-openrouter-eval-model") eval_provider_override = req.headers.get("x-openrouter-eval-provider") @@ -782,14 +786,20 @@ async def annotate_from_image( temperature=temperature, user_id_override=user_id_override, ) + # Vision uses its own provider; fall back to annotation provider only if + # a custom vision_model was specified without an explicit vision_provider + vision_provider = vision_provider_override or ( + provider_override if vision_model_override else None + ) active_vision_agent = create_byok_vision_agent( openrouter_key, vision_model=vision_model_override, - provider=provider_override, + provider=vision_provider, temperature=temperature, user_id_override=user_id_override, ) except Exception as e: + logging.exception("Failed to initialize BYOK agents") raise HTTPException( status_code=500, detail=f"Failed to initialize BYOK agents: {str(e)}" ) from e @@ -815,9 +825,10 @@ async def annotate_from_image( validator_path=_byok_config.get("validator_path"), use_js_validator=_byok_config.get("use_js_validator", True), ) - # Note: Vision agent uses its own provider (novita for qwen-vl) - # Only pass provider_override to vision if a custom vision_model was specified - vision_provider = provider_override if vision_model_override else None + # Note: Vision agent uses the vision-specific provider, not the annotation provider + vision_provider = vision_provider_override or ( + provider_override if vision_model_override else None + ) active_vision_agent = create_byok_vision_agent( server_api_key, vision_model=vision_model_override, @@ -1329,6 +1340,9 @@ async def annotate_from_image_stream( # Determine which workflow and vision agent to use (same logic as /annotate-from-image) model_override = request.model or req.headers.get("x-openrouter-model") vision_model_override = request.vision_model or req.headers.get("x-openrouter-vision-model") + vision_provider_override = request.vision_provider or req.headers.get( + "x-openrouter-vision-provider" + ) provider_override = request.provider or req.headers.get("x-openrouter-provider") eval_model_override = req.headers.get("x-openrouter-eval-model") eval_provider_override = req.headers.get("x-openrouter-eval-provider") @@ -1355,14 +1369,20 @@ async def annotate_from_image_stream( temperature=temperature, user_id_override=user_id_override, ) + # Vision uses its own provider; fall back to annotation provider only if + # a custom vision_model was specified without an explicit vision_provider + vision_provider = vision_provider_override or ( + provider_override if vision_model_override else None + ) active_vision_agent = create_byok_vision_agent( openrouter_key, vision_model=vision_model_override, - provider=provider_override, + provider=vision_provider, temperature=temperature, user_id_override=user_id_override, ) except Exception as e: + logging.exception("Failed to initialize BYOK agents") raise HTTPException( status_code=500, detail=f"Failed to initialize BYOK agents: {str(e)}" ) from e @@ -1386,9 +1406,10 @@ async def annotate_from_image_stream( validator_path=_byok_config.get("validator_path"), use_js_validator=_byok_config.get("use_js_validator", True), ) - # Note: Vision agent uses its own provider (novita for qwen-vl) - # Only pass provider_override to vision if a custom vision_model was specified - vision_provider = provider_override if vision_model_override else None + # Note: Vision agent uses the vision-specific provider, not the annotation provider + vision_provider = vision_provider_override or ( + provider_override if vision_model_override else None + ) active_vision_agent = create_byok_vision_agent( server_api_key, vision_model=vision_model_override, diff --git a/src/api/models.py b/src/api/models.py index 5d78102..75a34bd 100644 --- a/src/api/models.py +++ b/src/api/models.py @@ -41,12 +41,12 @@ class AnnotationRequest(BaseModel): model: str | None = Field( default=None, description="Override model for annotation (BYOK mode only, e.g., 'openai/gpt-4o')", - examples=["anthropic/claude-haiku-4.5", "qwen/qwen3.5-122b-a10b", "qwen/qwen3.5-397b-a17b"], + examples=["anthropic/claude-haiku-4.5", "qwen/qwen3.5-122b-a10b"], ) provider: str | None = Field( default=None, - description="Override provider preference (BYOK mode only, e.g., 'Cerebras')", - examples=["Cerebras", "deepinfra/fp8", None], + description="Override provider preference (BYOK mode only, e.g., 'anthropic')", + examples=["anthropic", "alibaba", None], ) temperature: float | None = Field( default=None, @@ -177,12 +177,17 @@ class ImageAnnotationRequest(BaseModel): vision_model: str | None = Field( default=None, description="Override vision model for image description (BYOK mode only)", - examples=["qwen/qwen3-vl-32b-instruct", "qwen/qwen3-vl-235b-a22b-instruct"], + examples=["qwen/qwen3.5-122b-a10b", "qwen/qwen3-vl-235b-a22b-instruct"], + ) + vision_provider: str | None = Field( + default=None, + description="Override vision model provider (BYOK mode only, e.g., 'alibaba')", + examples=["alibaba", "novita", None], ) provider: str | None = Field( default=None, - description="Override provider preference (BYOK mode only, e.g., 'Cerebras')", - examples=["Cerebras", "deepinfra/fp8", None], + description="Override annotation provider preference (BYOK mode only, e.g., 'anthropic')", + examples=["anthropic", "alibaba", None], ) temperature: float | None = Field( default=None, diff --git a/src/cli/client.py b/src/cli/client.py index aeaf8b8..60b8808 100644 --- a/src/cli/client.py +++ b/src/cli/client.py @@ -139,7 +139,14 @@ def _handle_response(self, response: httpx.Response) -> dict[str, Any]: raise APIError( "Service unavailable", status_code=503, - detail="The API is temporarily unavailable. Please try again later.", + detail=detail or "The API is temporarily unavailable. Please try again later.", + ) + elif response.status_code == 504: + raise APIError( + "Gateway timeout", + status_code=504, + detail="The server took too long to respond. Try a faster model/provider " + "or use --standalone mode.", ) else: raise APIError( diff --git a/src/cli/config.py b/src/cli/config.py index c1e1fb5..7d3fef2 100644 --- a/src/cli/config.py +++ b/src/cli/config.py @@ -41,13 +41,13 @@ DEFAULT_MODEL = "anthropic/claude-haiku-4.5" DEFAULT_PROVIDER = "anthropic" -# Evaluation model: Qwen3.5-397B MoE (most capable Qwen, high throughput) -DEFAULT_EVAL_MODEL = "qwen/qwen3.5-397b-a17b" -DEFAULT_EVAL_PROVIDER = None +# Evaluation model: Qwen3.5-122B MoE (fast, capable, cost-effective via Alibaba) +DEFAULT_EVAL_MODEL = "qwen/qwen3.5-122b-a10b" +DEFAULT_EVAL_PROVIDER = "alibaba" -# Vision model: Qwen3-VL-32B for image descriptions -DEFAULT_VISION_MODEL = "qwen/qwen3-vl-32b-instruct" -DEFAULT_VISION_PROVIDER = None +# Vision model: Qwen3.5-122B MoE (accepts vision, fast via Alibaba) +DEFAULT_VISION_MODEL = "qwen/qwen3.5-122b-a10b" +DEFAULT_VISION_PROVIDER = "alibaba" class CredentialsConfig(BaseModel): @@ -69,12 +69,12 @@ class ModelsConfig(BaseModel): ) eval_provider: str | None = Field( default=DEFAULT_EVAL_PROVIDER, - description="Provider for evaluation model (None = OpenRouter auto-routes)", + description="Provider for evaluation model (default: alibaba)", ) vision: str = Field(default=DEFAULT_VISION_MODEL, description="Vision model for images") vision_provider: str | None = Field( default=DEFAULT_VISION_PROVIDER, - description="Provider for vision model (deepinfra/fp8 for qwen-vl)", + description="Provider for vision model (alibaba for qwen)", ) temperature: float = Field(default=0.1, ge=0.0, le=1.0, description="Model temperature") @@ -237,8 +237,8 @@ def get_effective_config( api_url: Override API URL model: Override model (if non-default, clears provider unless explicitly set) eval_model: Override evaluation model (for consistent benchmarking) - eval_provider: Override provider for evaluation model (e.g., "Cerebras") - provider: Override provider preference (e.g., "Cerebras") + eval_provider: Override provider for evaluation model (e.g., "alibaba") + provider: Override provider preference (e.g., "anthropic") temperature: Override temperature schema_version: Override schema version output_format: Override output format diff --git a/src/cli/local_executor.py b/src/cli/local_executor.py index 65e6853..2719c67 100644 --- a/src/cli/local_executor.py +++ b/src/cli/local_executor.py @@ -68,6 +68,7 @@ def __init__( eval_model: str | None = None, eval_provider: str | None = None, vision_model: str | None = None, + vision_provider: str | None = None, provider: str | None = None, temperature: float = 0.1, schema_dir: Path | str | None = None, @@ -78,9 +79,10 @@ def __init__( Args: api_key: OpenRouter API key (required for LLM operations, optional for health/validate) model: Model for text annotation (default: anthropic/claude-haiku-4.5) - eval_model: Model for evaluation/assessment agents (default: qwen/qwen3.5-397b-a17b) - eval_provider: Provider for evaluation model (None = OpenRouter auto-routes) - vision_model: Model for image annotation (default: qwen/qwen3-vl-32b-instruct) + eval_model: Model for evaluation/assessment agents (default: qwen/qwen3.5-122b-a10b) + eval_provider: Provider for evaluation model (default: alibaba) + vision_model: Model for image annotation (default: qwen/qwen3.5-122b-a10b) + vision_provider: Provider for vision model (default: alibaba) provider: Provider preference (cleared if custom model specified) temperature: LLM temperature (0.0-1.0) schema_dir: Optional directory with JSON schemas (None = fetch from GitHub) @@ -102,7 +104,7 @@ def __init__( self._eval_model = eval_model or DEFAULT_EVAL_MODEL self._eval_provider = eval_provider or DEFAULT_EVAL_PROVIDER self._vision_model = vision_model or DEFAULT_VISION_MODEL - self._vision_provider = DEFAULT_VISION_PROVIDER + self._vision_provider = vision_provider or DEFAULT_VISION_PROVIDER self._temperature = temperature self._schema_dir = Path(schema_dir) if schema_dir else None self._user_id = user_id # Custom user ID (None = use auto-generated machine ID) diff --git a/src/cli/main.py b/src/cli/main.py index ec04943..ea44d27 100644 --- a/src/cli/main.py +++ b/src/cli/main.py @@ -204,6 +204,7 @@ def get_executor( eval_model=config.models.evaluation, eval_provider=config.models.eval_provider, vision_model=config.models.vision, + vision_provider=config.models.vision_provider, provider=config.models.provider, temperature=config.models.temperature, user_id=user_id, diff --git a/src/utils/openrouter_llm.py b/src/utils/openrouter_llm.py index f0110fc..c38b9ac 100644 --- a/src/utils/openrouter_llm.py +++ b/src/utils/openrouter_llm.py @@ -28,11 +28,11 @@ def create_openrouter_llm( to include cache_control markers for 90% cost reduction on cache hits. Args: - model: Model identifier (e.g., "anthropic/claude-haiku-4.5", "qwen/qwen3.5-397b-a17b") + model: Model identifier (e.g., "anthropic/claude-haiku-4.5", "qwen/qwen3.5-122b-a10b") api_key: OpenRouter API key (defaults to OPENROUTER_API_KEY env var) temperature: Sampling temperature (0.0-1.0) max_tokens: Maximum tokens to generate - provider: Specific provider to use (e.g., "Cerebras", "Anthropic") + provider: Specific provider to use (e.g., "anthropic", "alibaba") user_id: User identifier for cache optimization (sticky routing) enable_caching: Enable Anthropic prompt caching. If None (default), auto-enables for Anthropic Claude models. diff --git a/src/version.py b/src/version.py index ec58a13..1f5e6e6 100644 --- a/src/version.py +++ b/src/version.py @@ -1,7 +1,7 @@ """Version information for HEDit.""" -__version__ = "0.7.8a1" -__version_info__ = (0, 7, 8, "alpha") +__version__ = "0.7.9.dev0" +__version_info__ = (0, 7, 9, "dev") def get_version() -> str: