eAI/routing/models.yaml at main · OpenScanAI/eAI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# eAI model routing — single source of truth.
# Every agent's `model:` frontmatter must resolve to an entry here.
# Verified against the Anthropic Models API catalog (2026-06).
version: 1

orchestrators:
  prime:
    name: Claude Fable 5
    model: claude-fable-5
    provider: anthropic
    context_window: 1000000
    max_output: 128000
    pricing_per_mtok: { input: 10.00, output: 50.00 }
    thinking: adaptive            # explicit `disabled` returns 400 on Fable 5 — omit instead
    effort_default: high          # sweep medium/high/xhigh per route; reserve max for hardest calls
    use_for:
      - mission decomposition across divisions
      - architecture decisions and trade-off resolution
      - irreversible or consensus-critical calls
      - conflict resolution between worker outputs
      - final review of high-stakes deliverables
    budget: "≤5 activations per mission phase — Prime is the scalpel, not the hammer"

  lead:
    name: Claude Opus 4.8
    model: claude-opus-4-8
    provider: anthropic
    context_window: 1000000
    max_output: 128000
    pricing_per_mtok: { input: 5.00, output: 25.00 }
    thinking: adaptive
    effort_default: high          # xhigh for long agentic coordination runs
    use_for:
      - default orchestrator for day-to-day pipelines
      - task decomposition and worker assignment
      - dev<->QA loop management (max 3 retries, then escalate)
      - code-review gates and merge decisions
      - handling worker escalations

workers:
  senior:
    name: Claude Sonnet 4.6
    model: claude-sonnet-4-6
    provider: anthropic
    context_window: 1000000
    max_output: 64000
    pricing_per_mtok: { input: 3.00, output: 15.00 }
    thinking: adaptive
    effort_default: medium        # high for security analysis and architecture work
    use_for:
      - correctness-critical implementation
      - security analysis, threat modeling, audit work
      - complex debugging and multi-system reasoning
      - database/schema design, SRE, infrastructure judgment calls

  bulk:
    name: Kimi K2
    model: kimi-k2                # family alias — pin a dated snapshot in production
    snapshots: [kimi-k2-turbo-preview, kimi-k2-0905-preview, kimi-k2-thinking]
    provider: moonshot
    runtime:
      # Preferred: Claude Code itself, pointed at Moonshot's Anthropic-compatible
      # endpoint (real Kimi tokens). See scripts/claude-eai.sh --bulk.
      claude_code_lane:
        base_url: https://api.moonshot.ai/anthropic   # China: https://api.moonshot.cn/anthropic
        auth_env: MOONSHOT_API_KEY                    # exported as ANTHROPIC_AUTH_TOKEN (bearer)
      kimi_cli: true              # or any OpenAI-compatible client -> https://api.moonshot.ai/v1
    fallback: claude-sonnet-4-6   # used when installing into a single-vendor Anthropic lane
    use_for:
      - high-volume code generation and scaffolding
      - framework/CMS implementation (themes, plugins, CRUD)
      - UI implementation from an approved spec
      - long-context mechanical refactors
      - cost-efficient parallel fan-out across many files

  fast:
    name: Claude Haiku 4.5
    model: claude-haiku-4-5
    provider: anthropic
    context_window: 200000
    max_output: 64000
    pricing_per_mtok: { input: 1.00, output: 5.00 }
    use_for:
      - lookups, grep/glob sweeps, read-only codebase exploration
      - formatting, linting, doc cleanup
      - classification and report collation
      - simple, well-specified single-file fixes

escalation:
  chain: [fast, bulk, senior, lead, prime]
  triggers:
    - condition: task requires architecture or irreversible decision
      route_to: prime
    - condition: worker fails QA gate 3 times on the same task
      route_to: lead
    - condition: worker discovers scope is larger than assigned
      route_to: lead
    - condition: two workers produce conflicting outputs
      route_to: lead            # lead resolves; lead escalates to prime only if architectural
    - condition: security-sensitive finding in non-security task
      route_to: senior          # security worker on senior tier verifies before lead decides
  anti_patterns:
    - "Never invert the hierarchy: orchestrators plan and review, workers execute."
    - "Never burn Prime on routine bug fixes or 'which file does this go in' questions."
    - "Never switch a thread's model mid-conversation — it invalidates the prompt cache. Spawn a subagent on the other tier instead."