Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 82 additions & 79 deletions .github/actions/classify-failure/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,87 +28,90 @@ runs:
LOG_FILE="${{ inputs.log_file }}"
EXIT_CODE="${{ inputs.exit_code }}"

CLASS=$(python3 -c "
import re, sys

log = ''
try:
with open('$LOG_FILE', 'r') as f:
log = f.read()
except:
log = ''

exit_code = int('$EXIT_CODE') if '$EXIT_CODE'.isdigit() else 1

# Network timeout patterns
if any(p in log.lower() for p in [
'timeout', 'timed out', 'connection refused', 'name resolution',
'ssl', 'tls', 'network', 'dns', 'errno', 'etimedout',
'econnrefused', 'econnreset', 'httperror', '502', '503', '504'
]):
print('network_timeout')
sys.exit(0)

# Permission denied patterns
if any(p in log.lower() for p in [
'permission denied', 'forbidden', '401', '403',
'authentication failed', 'token expired', 'unauthorized',
'secret', 'not allowed'
]):
print('permission_denied')
sys.exit(0)

# Race condition patterns
if any(p in log.lower() for p in [
'conflict', '409', 'race', 'concurrent',
'another process', 'locked', 'busy',
'already exists', 'duplicate'
]):
print('race_condition')
sys.exit(0)

# Flaky test patterns
if any(p in log.lower() for p in [
'flaky', 'intermittent', 'sometimes fails',
'passed on retry', 'random', 'unstable'
]):
print('flaky_test')
sys.exit(0)

# Test failures (likely flaky if retry helps)
if any(p in log.lower() for p in [
'assertionerror', 'assert', 'test failed',
'pytest', 'unittest', 'coverage'
]):
print('flaky_test')
sys.exit(0)

# Real bug patterns
if any(p in log.lower() for p in [
'syntaxerror', 'typeerror', 'nameerror', 'attributeerror',
'importerror', 'modulenotfounderror', 'keyerror', 'indexerror',
'valueerror', 'zerodivisionerror', 'segmentation fault',
'core dumped', 'fatal', 'panic'
]):
print('real_bug')
sys.exit(0)

print('unknown')
")
export LOG_FILE EXIT_CODE

CLASS=$(python3 <<'PY'
import os
import sys

log = ""
try:
with open(os.environ.get("LOG_FILE", ""), encoding="utf-8", errors="replace") as f:
log = f.read()
except OSError:
log = ""

exit_code_raw = os.environ.get("EXIT_CODE", "1")
exit_code = int(exit_code_raw) if exit_code_raw.isdigit() else 1
log_lower = log.lower()

if any(p in log_lower for p in [
"timeout", "timed out", "connection refused", "name resolution",
"ssl", "tls", "network", "dns", "errno", "etimedout",
"econnrefused", "econnreset", "httperror", "502", "503", "504",
]):
print("network_timeout")
sys.exit(0)

if any(p in log_lower for p in [
"permission denied", "forbidden", "401", "403",
"authentication failed", "token expired", "unauthorized",
"secret", "not allowed",
]):
print("permission_denied")
sys.exit(0)

if any(p in log_lower for p in [
"conflict", "409", "race", "concurrent",
"another process", "locked", "busy",
"already exists", "duplicate",
]):
print("race_condition")
sys.exit(0)

if any(p in log_lower for p in [
"flaky", "intermittent", "sometimes fails",
"passed on retry", "random", "unstable",
]):
print("flaky_test")
sys.exit(0)

if any(p in log_lower for p in [
"assertionerror", "assert", "test failed",
"pytest", "unittest", "coverage",
]):
print("flaky_test")
sys.exit(0)

if any(p in log_lower for p in [
"syntaxerror", "typeerror", "nameerror", "attributeerror",
"importerror", "modulenotfounderror", "keyerror", "indexerror",
"valueerror", "zerodivisionerror", "segmentation fault",
"core dumped", "fatal", "panic",
]):
print("real_bug")
sys.exit(0)

print("unknown")
PY
)


# Generate summary
SUMMARY=$(python3 -c "
log = ''
try:
with open('$LOG_FILE', 'r') as f:
lines = f.readlines()
# Get last 3 non-empty lines
relevant = [l.strip() for l in lines if l.strip()][-3:]
summary = ' | '.join(relevant)[:200]
print(summary if summary else 'No log content available')
except:
print('Could not read log file')
")
SUMMARY=$(python3 <<'PY'
import os

try:
with open(os.environ.get("LOG_FILE", ""), encoding="utf-8", errors="replace") as f:
lines = f.readlines()
relevant = [line.strip() for line in lines if line.strip()][-3:]
summary = " | ".join(relevant)[:200]
print(summary if summary else "No log content available")
except OSError:
print("Could not read log file")
PY
)


# Determine should_retry
if [ "$CLASS" = "real_bug" ]; then
Expand Down
11 changes: 7 additions & 4 deletions .github/actions/retry/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,22 @@ runs:
BASE_SECONDS="${{ inputs.backoff_base_seconds }}"
RETRY_CODES="${{ inputs.retry_on_exit_code }}"
SHELL_CMD="${{ inputs.shell_override }}"
LOG_FILE="${GITHUB_WORKSPACE:-$PWD}/.retry-output.log"
: > "$LOG_FILE"
ATTEMPT=0
FINAL_CODE=0

while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
ATTEMPT=$((ATTEMPT + 1))
echo "::group::Attempt $ATTEMPT/$MAX_ATTEMPTS"

# Run the command
$SHELL_CMD -c "${{ inputs.run }}"
EXIT_CODE=$?
# Run the command and preserve its output for downstream classification.
echo "Command: ${{ inputs.run }}" | tee -a "$LOG_FILE"
$SHELL_CMD -c "${{ inputs.run }}" 2>&1 | tee -a "$LOG_FILE"
EXIT_CODE=${PIPESTATUS[0]}
FINAL_CODE=$EXIT_CODE

echo "Exit code: $EXIT_CODE"
echo "Exit code: $EXIT_CODE" | tee -a "$LOG_FILE"
echo "::endgroup::"

if [ $EXIT_CODE -eq 0 ]; then
Expand Down
15 changes: 10 additions & 5 deletions .github/workflows/pr-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ jobs:
- name: Agent Quality Score
if: steps.scope.outputs.scope == 'full'
id: score
continue-on-error: true
uses: ./.github/actions/score-agent
with:
pr-number: ${{ github.event.pull_request.number || github.event.inputs.pr_number }}
Expand All @@ -90,7 +91,7 @@ jobs:
gh-token: ${{ secrets.SHELDON_PAT || secrets.GITHUB_TOKEN }}

- name: Reject Low-Quality PR
if: steps.scope.outputs.scope == 'full' && steps.score.outputs.verdict == 'fail'
if: steps.scope.outputs.scope == 'full' && steps.score.outcome == 'success' && steps.score.outputs.verdict == 'fail'
env:
GH_TOKEN: ${{ secrets.SHELDON_PAT || secrets.GITHUB_TOKEN }}
run: |
Expand Down Expand Up @@ -290,16 +291,20 @@ jobs:

# 1. Quality Score (full scope only)
if [ "$SCOPE" = "full" ]; then
REPORT+="### 📊 Quality Score: ${SCORE:-?}/100"
REPORT+="### 📊 Quality Score"
REPORT+=$'\n\n'
if [ -n "$REASONS" ]; then
if [ -z "$SCORE" ]; then
REPORT+="⚠️ Quality score unavailable; continuing with hard gates."
elif [ -n "$REASONS" ]; then
REPORT+="Score: ${SCORE}/100"
REPORT+=$'\n\n'
REPORT+="**Deductions:**"
REPORT+=$'\n'
echo "$REASONS" | while IFS= read -r line; do
[ -n "$line" ] && REPORT+="- ${line}"$'\n'
done
else
REPORT+="No deductions."
REPORT+="Score: ${SCORE}/100 — no deductions."
fi
REPORT+=$'\n\n'
fi
Expand Down Expand Up @@ -398,7 +403,7 @@ jobs:
fi

if [ "$SCOPE" = "full" ]; then
if [ "$SCORE" -lt 40 ] 2>/dev/null; then
if [ -n "$SCORE" ] && [ "$SCORE" -lt 40 ] 2>/dev/null; then
REPORT+="❌ Quality Score ${SCORE}/40 below threshold."$'\n'
VERDICT_PASS=false
fi
Expand Down
24 changes: 20 additions & 4 deletions hub/master/token_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Optional
import json
import os
import warnings


class TokenManager:
Expand Down Expand Up @@ -45,7 +46,6 @@ def _load_tokens(self):
# Fallback: plaintext JSON file with owner-only permissions (chmod 600)
# WARNING: This is NOT encrypted. Do NOT rely on this for high-security environments.
# On shared hosts or compromised machines, any process running as this user can read the file.
import warnings
warnings.warn(
"Master token fallback: storing tokens as plaintext JSON in ~/.hermes-tokens. "
"This is NOT encrypted. For production, ensure keyring is available or use a secrets manager."
Expand All @@ -55,11 +55,27 @@ def _load_tokens(self):
with open(token_path, 'r') as f:
self._tokens = json.load(f)
self._cleanup_expired()
# Ensure file is only readable by owner
os.chmod(token_path, 0o600)
self._restrict_plaintext_file(token_path)
except FileNotFoundError:
self._tokens = {}

def _restrict_plaintext_file(self, token_path: str) -> bool:
"""Best-effort owner-only permissions for the plaintext fallback file.

Returns True when POSIX mode bits confirm 0600. On Windows, Python's
chmod/stat mode bits do not express owner-only ACLs, so callers must
treat this fallback as weaker than keyring-backed storage.
"""
if os.name == "nt":
warnings.warn(
"Master token fallback on Windows cannot guarantee POSIX 0600 "
"owner-only permissions. Use keyring or a secrets manager for production."
)
return False

os.chmod(token_path, 0o600)
return (os.stat(token_path).st_mode & 0o777) == 0o600

def _save_tokens(self):
"""Save tokens — use keyring if available, else plaintext JSON fallback (NOT encrypted)"""
if self._keyring_available:
Expand All @@ -76,7 +92,7 @@ def _save_tokens(self):
os.makedirs(os.path.dirname(token_path), exist_ok=True)
with open(token_path, 'w') as f:
json.dump(self._tokens, f, default=str)
os.chmod(token_path, 0o600)
self._restrict_plaintext_file(token_path)

def _cleanup_expired(self):
"""Remove expired tokens"""
Expand Down
31 changes: 19 additions & 12 deletions misakanet/tools/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,28 @@ def _connect(telemetry_path: str | Path) -> sqlite3.Connection:
path = Path(telemetry_path)
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(path))
conn.execute(
"""
CREATE TABLE IF NOT EXISTS search_telemetry (
query TEXT,
timestamp REAL,
latency_ms REAL,
cache_hit INTEGER
try:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS search_telemetry (
query TEXT,
timestamp REAL,
latency_ms REAL,
cache_hit INTEGER
)
"""
)
"""
)
return conn
conn.commit()
return conn
except Exception:
conn.close()
raise


def read_dashboard_data(telemetry_path: str | Path = DEFAULT_TELEMETRY_PATH) -> dict[str, Any]:
"""Read summary metrics and recent rows from the telemetry database."""
with _connect(telemetry_path) as conn:
conn = _connect(telemetry_path)
try:
total_searches = int(
conn.execute("SELECT COUNT(*) FROM search_telemetry").fetchone()[0]
)
Expand All @@ -62,6 +68,8 @@ def read_dashboard_data(telemetry_path: str | Path = DEFAULT_TELEMETRY_PATH) ->
LIMIT 20
"""
).fetchall()
finally:
conn.close()

saved_time_ms = 0.0
if hit_count and avg_hit_latency is not None and avg_miss_latency is not None:
Expand All @@ -84,7 +92,6 @@ def read_dashboard_data(telemetry_path: str | Path = DEFAULT_TELEMETRY_PATH) ->
],
}


def _format_timestamp(timestamp: float) -> str:
if timestamp <= 0:
return "-"
Expand Down
Loading
Loading