Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,18 @@ OPENROUTER_API_KEY=your-openrouter-api-key-here
# ============================================================================
# Each agent type has its own model and provider for optimal performance.

# Annotation Model (best quality/cost: Mistral-Small-3.2-24B)
# 100% faithful rate, $0.18/M output tokens
ANNOTATION_MODEL=mistralai/mistral-small-3.2-24b-instruct
ANNOTATION_PROVIDER=mistral
# Annotation Model (Claude Haiku 4.5 via Anthropic - high quality with prompt caching)
ANNOTATION_MODEL=anthropic/claude-haiku-4.5
ANNOTATION_PROVIDER=anthropic

# Evaluation/Assessment Model (fast quality checks: GPT-OSS-120B via Groq)
# Evaluation/Assessment Model (Qwen3.5-122B via Alibaba - fast and cost-effective)
# Used for evaluation, assessment, and feedback agents
EVALUATION_MODEL=openai/gpt-oss-120b
EVALUATION_PROVIDER=groq
EVALUATION_MODEL=qwen/qwen3.5-122b-a10b
EVALUATION_PROVIDER=alibaba

# Vision Model (image description: Qwen3-VL via deepinfra)
VISION_MODEL=qwen/qwen3-vl-30b-a3b-instruct
VISION_PROVIDER=deepinfra/fp8
# Vision Model (Qwen3.5-122B via Alibaba - accepts vision, fast)
VISION_MODEL=qwen/qwen3.5-122b-a10b
VISION_PROVIDER=alibaba

# ============================================================================
# Alternative Models
Expand Down
15 changes: 8 additions & 7 deletions deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,17 +269,18 @@ AUDIT_LOG_FILE=/var/log/hed-bot/audit.log
# CORS Configuration (optional extra origins)
# EXTRA_CORS_ORIGINS=https://staging.hed-bot.pages.dev,https://dev.hed-bot.pages.dev

# LLM Configuration (Cerebras + OpenRouter for ultra-fast inference)
# LLM Configuration (OpenRouter with Alibaba for fast inference)
LLM_PROVIDER=openrouter
OPENROUTER_API_KEY=your_openrouter_key_here
LLM_PROVIDER_PREFERENCE=Cerebras
LLM_TEMPERATURE=0.1

# Model configuration (Cerebras-optimized defaults)
ANNOTATION_MODEL=openai/gpt-oss-120b
EVALUATION_MODEL=qwen/qwen3-235b-a22b-2507
ASSESSMENT_MODEL=openai/gpt-oss-120b
FEEDBACK_MODEL=openai/gpt-oss-120b
# Model configuration
ANNOTATION_MODEL=anthropic/claude-haiku-4.5
ANNOTATION_PROVIDER=anthropic
EVALUATION_MODEL=qwen/qwen3.5-122b-a10b
EVALUATION_PROVIDER=alibaba
VISION_MODEL=qwen/qwen3.5-122b-a10b
VISION_PROVIDER=alibaba

# Optional: HED Schema and Validator paths (if not using defaults)
# HED_SCHEMA_DIR=/path/to/hed-schemas
Expand Down
10 changes: 6 additions & 4 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ services:
# OpenRouter Configuration
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
- LLM_PROVIDER_PREFERENCE=${LLM_PROVIDER_PREFERENCE:-}
- ANNOTATION_MODEL=${ANNOTATION_MODEL:-openai/gpt-oss-120b}
- EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3-235b-a22b-2507}
- ASSESSMENT_MODEL=${ASSESSMENT_MODEL:-openai/gpt-oss-120b}
- FEEDBACK_MODEL=${FEEDBACK_MODEL:-openai/gpt-oss-120b}
- ANNOTATION_MODEL=${ANNOTATION_MODEL:-anthropic/claude-haiku-4.5}
- ANNOTATION_PROVIDER=${ANNOTATION_PROVIDER:-anthropic}
- EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3.5-122b-a10b}
- EVALUATION_PROVIDER=${EVALUATION_PROVIDER:-alibaba}
- VISION_MODEL=${VISION_MODEL:-qwen/qwen3.5-122b-a10b}
- VISION_PROVIDER=${VISION_PROVIDER:-alibaba}

# LLM Settings
- LLM_TEMPERATURE=${LLM_TEMPERATURE:-0.1}
Expand Down
10 changes: 6 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ services:
# OpenRouter Configuration (used when LLM_PROVIDER=openrouter)
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
- LLM_PROVIDER_PREFERENCE=${LLM_PROVIDER_PREFERENCE}
- ANNOTATION_MODEL=${ANNOTATION_MODEL:-openai/gpt-oss-120b}
- EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3-235b-a22b-2507}
- ASSESSMENT_MODEL=${ASSESSMENT_MODEL:-openai/gpt-oss-120b}
- FEEDBACK_MODEL=${FEEDBACK_MODEL:-openai/gpt-oss-120b}
- ANNOTATION_MODEL=${ANNOTATION_MODEL:-anthropic/claude-haiku-4.5}
- ANNOTATION_PROVIDER=${ANNOTATION_PROVIDER:-anthropic}
- EVALUATION_MODEL=${EVALUATION_MODEL:-qwen/qwen3.5-122b-a10b}
- EVALUATION_PROVIDER=${EVALUATION_PROVIDER:-alibaba}
- VISION_MODEL=${VISION_MODEL:-qwen/qwen3.5-122b-a10b}
- VISION_PROVIDER=${VISION_PROVIDER:-alibaba}

# Ollama Configuration (used when LLM_PROVIDER=ollama)
- LLM_BASE_URL=${LLM_BASE_URL:-http://ollama:11434}
Expand Down
2 changes: 1 addition & 1 deletion frontend/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -1235,7 +1235,7 @@ <h3>Status</h3>
<a href="https://docs.annotation.garden/projects/hedit/telemetry" target="_blank" rel="noopener noreferrer" title="Learn more about telemetry">Learn more</a>
</div>
<div class="model-info">
<span>Models: <a href="https://openrouter.ai/anthropic/claude-haiku-4.5" target="_blank" rel="noopener noreferrer">Claude Haiku 4.5</a> (annotation) · <a href="https://openrouter.ai/qwen/qwen3-vl-30b-a3b-instruct" target="_blank" rel="noopener noreferrer">Qwen3-VL-30B</a> (vision) · <a href="https://openrouter.ai/openai/gpt-oss-120b" target="_blank" rel="noopener noreferrer">GPT-OSS-120B</a> (evaluation) · To change models or use programmatically, install <a href="https://pypi.org/project/hedit/" target="_blank" rel="noopener noreferrer">HEDit CLI</a></span>
<span>Models: <a href="https://openrouter.ai/anthropic/claude-haiku-4.5" target="_blank" rel="noopener noreferrer">Claude Haiku 4.5</a> (annotation) · <a href="https://openrouter.ai/qwen/qwen3.5-122b-a10b" target="_blank" rel="noopener noreferrer">Qwen3.5-122B</a> (vision) · <a href="https://openrouter.ai/qwen/qwen3.5-122b-a10b" target="_blank" rel="noopener noreferrer">Qwen3.5-122B</a> (evaluation) · To change models or use programmatically, install <a href="https://pypi.org/project/hedit/" target="_blank" rel="noopener noreferrer">HEDit CLI</a></span>
</div>

<script src="config.js"></script>
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "hedit"
version = "0.7.8a1"
version = "0.7.9.dev0"
description = "Multi-agent system for HED annotation generation and validation"
readme = "PKG_README.md"
requires-python = ">=3.12"
Expand Down
57 changes: 39 additions & 18 deletions src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def create_openrouter_workflow(
api_key: OpenRouter API key
annotation_model: Model for annotation (default: ANNOTATION_MODEL env or Claude Haiku 4.5)
annotation_provider: Provider for annotation model (default: ANNOTATION_PROVIDER env or "anthropic")
eval_model: Model for eval/assessment/feedback (default: EVALUATION_MODEL env or GPT-OSS-120B)
eval_provider: Provider for eval models (default: EVALUATION_PROVIDER env or "groq")
eval_model: Model for eval/assessment/feedback (default: EVALUATION_MODEL env or Qwen3.5-122B)
eval_provider: Provider for eval models (default: EVALUATION_PROVIDER env or "alibaba")
temperature: LLM temperature (default: 0.1)
user_id: User ID for cache optimization (derived from API key if not provided)
schema_dir: Path to HED schemas (None = fetch from GitHub)
Expand All @@ -113,8 +113,8 @@ def create_openrouter_workflow(
# Apply defaults from environment
default_annotation_model = os.getenv("ANNOTATION_MODEL", "anthropic/claude-haiku-4.5")
default_annotation_provider = os.getenv("ANNOTATION_PROVIDER", "anthropic")
default_eval_model = os.getenv("EVALUATION_MODEL", "qwen/qwen3.5-397b-a17b")
default_eval_provider = os.getenv("EVALUATION_PROVIDER")
default_eval_model = os.getenv("EVALUATION_MODEL", "qwen/qwen3.5-122b-a10b")
default_eval_provider = os.getenv("EVALUATION_PROVIDER", "alibaba")

# Resolve final values: parameter > env var > default
actual_annotation_model = get_model_name(annotation_model or default_annotation_model)
Expand Down Expand Up @@ -240,8 +240,8 @@ def create_byok_vision_agent(
Configured VisionAgent using the user's key and model settings
"""
# Use user-provided settings or fall back to server defaults
default_vision_model = os.getenv("VISION_MODEL", "qwen/qwen3-vl-32b-instruct")
default_vision_provider = os.getenv("VISION_PROVIDER", "novita")
default_vision_model = os.getenv("VISION_MODEL", "qwen/qwen3.5-122b-a10b")
default_vision_provider = os.getenv("VISION_PROVIDER", "alibaba")

actual_model = vision_model if vision_model else default_vision_model
actual_temperature = temperature if temperature is not None else 0.3
Expand Down Expand Up @@ -354,9 +354,9 @@ def get_default_path(docker_path: str, local_path: str) -> str | None:
# Log configuration (env vars are read by create_openrouter_workflow)
print("Using OpenRouter with models:")
print(f" Annotation: {os.getenv('ANNOTATION_MODEL', 'anthropic/claude-haiku-4.5')}")
print(f" Evaluation: {os.getenv('EVALUATION_MODEL', 'qwen/qwen3.5-397b-a17b')}")
print(f" Evaluation: {os.getenv('EVALUATION_MODEL', 'qwen/qwen3.5-122b-a10b')}")
print(f" Provider (annotation): {os.getenv('ANNOTATION_PROVIDER', 'anthropic')}")
print(f" Provider (eval): {os.getenv('EVALUATION_PROVIDER', '') or '(auto-routed)'}")
print(f" Provider (eval): {os.getenv('EVALUATION_PROVIDER', 'alibaba')}")

workflow = create_openrouter_workflow(
api_key=openrouter_api_key,
Expand Down Expand Up @@ -395,8 +395,8 @@ def get_default_path(docker_path: str, local_path: str) -> str | None:

# Initialize vision agent (only for OpenRouter)
if llm_provider == "openrouter":
vision_model = os.getenv("VISION_MODEL", "qwen/qwen3-vl-32b-instruct")
vision_provider = os.getenv("VISION_PROVIDER", "novita")
vision_model = os.getenv("VISION_MODEL", "qwen/qwen3.5-122b-a10b")
vision_provider = os.getenv("VISION_PROVIDER", "alibaba")

print(f"Initializing vision model: {vision_model} (provider: {vision_provider})")

Expand Down Expand Up @@ -486,6 +486,7 @@ def get_default_path(docker_path: str, local_path: str) -> str | None:
"X-OpenRouter-Key", # BYOK mode
"X-OpenRouter-Model", # BYOK model override
"X-OpenRouter-Vision-Model", # BYOK vision model override
"X-OpenRouter-Vision-Provider", # BYOK vision provider override
"X-OpenRouter-Provider", # BYOK provider preference
"X-OpenRouter-Temperature", # BYOK temperature override
"X-OpenRouter-Eval-Model", # BYOK eval model override
Expand Down Expand Up @@ -754,6 +755,9 @@ async def annotate_from_image(
# Check for model override headers (from frontend dropdown or CLI)
model_override = request.model or req.headers.get("x-openrouter-model")
vision_model_override = request.vision_model or req.headers.get("x-openrouter-vision-model")
vision_provider_override = request.vision_provider or req.headers.get(
"x-openrouter-vision-provider"
)
provider_override = request.provider or req.headers.get("x-openrouter-provider")
eval_model_override = req.headers.get("x-openrouter-eval-model")
eval_provider_override = req.headers.get("x-openrouter-eval-provider")
Expand Down Expand Up @@ -782,14 +786,20 @@ async def annotate_from_image(
temperature=temperature,
user_id_override=user_id_override,
)
# Vision uses its own provider; fall back to annotation provider only if
# a custom vision_model was specified without an explicit vision_provider
vision_provider = vision_provider_override or (
provider_override if vision_model_override else None
)
active_vision_agent = create_byok_vision_agent(
openrouter_key,
vision_model=vision_model_override,
provider=provider_override,
provider=vision_provider,
temperature=temperature,
user_id_override=user_id_override,
)
except Exception as e:
logging.exception("Failed to initialize BYOK agents")
raise HTTPException(
status_code=500, detail=f"Failed to initialize BYOK agents: {str(e)}"
) from e
Expand All @@ -815,9 +825,10 @@ async def annotate_from_image(
validator_path=_byok_config.get("validator_path"),
use_js_validator=_byok_config.get("use_js_validator", True),
)
# Note: Vision agent uses its own provider (novita for qwen-vl)
# Only pass provider_override to vision if a custom vision_model was specified
vision_provider = provider_override if vision_model_override else None
# Note: Vision agent uses the vision-specific provider, not the annotation provider
vision_provider = vision_provider_override or (
provider_override if vision_model_override else None
)
active_vision_agent = create_byok_vision_agent(
server_api_key,
vision_model=vision_model_override,
Expand Down Expand Up @@ -1329,6 +1340,9 @@ async def annotate_from_image_stream(
# Determine which workflow and vision agent to use (same logic as /annotate-from-image)
model_override = request.model or req.headers.get("x-openrouter-model")
vision_model_override = request.vision_model or req.headers.get("x-openrouter-vision-model")
vision_provider_override = request.vision_provider or req.headers.get(
"x-openrouter-vision-provider"
)
provider_override = request.provider or req.headers.get("x-openrouter-provider")
eval_model_override = req.headers.get("x-openrouter-eval-model")
eval_provider_override = req.headers.get("x-openrouter-eval-provider")
Expand All @@ -1355,14 +1369,20 @@ async def annotate_from_image_stream(
temperature=temperature,
user_id_override=user_id_override,
)
# Vision uses its own provider; fall back to annotation provider only if
# a custom vision_model was specified without an explicit vision_provider
vision_provider = vision_provider_override or (
provider_override if vision_model_override else None
)
active_vision_agent = create_byok_vision_agent(
openrouter_key,
vision_model=vision_model_override,
provider=provider_override,
provider=vision_provider,
temperature=temperature,
user_id_override=user_id_override,
)
except Exception as e:
logging.exception("Failed to initialize BYOK agents")
raise HTTPException(
status_code=500, detail=f"Failed to initialize BYOK agents: {str(e)}"
) from e
Expand All @@ -1386,9 +1406,10 @@ async def annotate_from_image_stream(
validator_path=_byok_config.get("validator_path"),
use_js_validator=_byok_config.get("use_js_validator", True),
)
# Note: Vision agent uses its own provider (novita for qwen-vl)
# Only pass provider_override to vision if a custom vision_model was specified
vision_provider = provider_override if vision_model_override else None
# Note: Vision agent uses the vision-specific provider, not the annotation provider
vision_provider = vision_provider_override or (
provider_override if vision_model_override else None
)
active_vision_agent = create_byok_vision_agent(
server_api_key,
vision_model=vision_model_override,
Expand Down
17 changes: 11 additions & 6 deletions src/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ class AnnotationRequest(BaseModel):
model: str | None = Field(
default=None,
description="Override model for annotation (BYOK mode only, e.g., 'openai/gpt-4o')",
examples=["anthropic/claude-haiku-4.5", "qwen/qwen3.5-122b-a10b", "qwen/qwen3.5-397b-a17b"],
examples=["anthropic/claude-haiku-4.5", "qwen/qwen3.5-122b-a10b"],
)
provider: str | None = Field(
default=None,
description="Override provider preference (BYOK mode only, e.g., 'Cerebras')",
examples=["Cerebras", "deepinfra/fp8", None],
description="Override provider preference (BYOK mode only, e.g., 'anthropic')",
examples=["anthropic", "alibaba", None],
)
temperature: float | None = Field(
default=None,
Expand Down Expand Up @@ -177,12 +177,17 @@ class ImageAnnotationRequest(BaseModel):
vision_model: str | None = Field(
default=None,
description="Override vision model for image description (BYOK mode only)",
examples=["qwen/qwen3-vl-32b-instruct", "qwen/qwen3-vl-235b-a22b-instruct"],
examples=["qwen/qwen3.5-122b-a10b", "qwen/qwen3-vl-235b-a22b-instruct"],
)
vision_provider: str | None = Field(
default=None,
description="Override vision model provider (BYOK mode only, e.g., 'alibaba')",
examples=["alibaba", "novita", None],
)
provider: str | None = Field(
default=None,
description="Override provider preference (BYOK mode only, e.g., 'Cerebras')",
examples=["Cerebras", "deepinfra/fp8", None],
description="Override annotation provider preference (BYOK mode only, e.g., 'anthropic')",
examples=["anthropic", "alibaba", None],
)
temperature: float | None = Field(
default=None,
Expand Down
9 changes: 8 additions & 1 deletion src/cli/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,14 @@ def _handle_response(self, response: httpx.Response) -> dict[str, Any]:
raise APIError(
"Service unavailable",
status_code=503,
detail="The API is temporarily unavailable. Please try again later.",
detail=detail or "The API is temporarily unavailable. Please try again later.",
)
elif response.status_code == 504:
raise APIError(
"Gateway timeout",
status_code=504,
detail="The server took too long to respond. Try a faster model/provider "
"or use --standalone mode.",
)
else:
raise APIError(
Expand Down
20 changes: 10 additions & 10 deletions src/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@
DEFAULT_MODEL = "anthropic/claude-haiku-4.5"
DEFAULT_PROVIDER = "anthropic"

# Evaluation model: Qwen3.5-397B MoE (most capable Qwen, high throughput)
DEFAULT_EVAL_MODEL = "qwen/qwen3.5-397b-a17b"
DEFAULT_EVAL_PROVIDER = None
# Evaluation model: Qwen3.5-122B MoE (fast, capable, cost-effective via Alibaba)
DEFAULT_EVAL_MODEL = "qwen/qwen3.5-122b-a10b"
DEFAULT_EVAL_PROVIDER = "alibaba"

# Vision model: Qwen3-VL-32B for image descriptions
DEFAULT_VISION_MODEL = "qwen/qwen3-vl-32b-instruct"
DEFAULT_VISION_PROVIDER = None
# Vision model: Qwen3.5-122B MoE (accepts vision, fast via Alibaba)
DEFAULT_VISION_MODEL = "qwen/qwen3.5-122b-a10b"
DEFAULT_VISION_PROVIDER = "alibaba"


class CredentialsConfig(BaseModel):
Expand All @@ -69,12 +69,12 @@ class ModelsConfig(BaseModel):
)
eval_provider: str | None = Field(
default=DEFAULT_EVAL_PROVIDER,
description="Provider for evaluation model (None = OpenRouter auto-routes)",
description="Provider for evaluation model (default: alibaba)",
)
vision: str = Field(default=DEFAULT_VISION_MODEL, description="Vision model for images")
vision_provider: str | None = Field(
default=DEFAULT_VISION_PROVIDER,
description="Provider for vision model (deepinfra/fp8 for qwen-vl)",
description="Provider for vision model (alibaba for qwen)",
)
temperature: float = Field(default=0.1, ge=0.0, le=1.0, description="Model temperature")

Expand Down Expand Up @@ -237,8 +237,8 @@ def get_effective_config(
api_url: Override API URL
model: Override model (if non-default, clears provider unless explicitly set)
eval_model: Override evaluation model (for consistent benchmarking)
eval_provider: Override provider for evaluation model (e.g., "Cerebras")
provider: Override provider preference (e.g., "Cerebras")
eval_provider: Override provider for evaluation model (e.g., "alibaba")
provider: Override provider preference (e.g., "anthropic")
temperature: Override temperature
schema_version: Override schema version
output_format: Override output format
Expand Down
Loading
Loading