diff --git a/Cargo.toml b/Cargo.toml index f124e2c..b3a9c48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,8 @@ log = "0.4" env_logger = "0.11" num_cpus = "1.16" tokio-test = "0.4" +flatbuffers = "24.3" +flatbuffers-build = "24.3" [profile.dev] opt-level = 0 diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..c0c30b8 --- /dev/null +++ b/build.rs @@ -0,0 +1,15 @@ +use std::path::PathBuf; + +fn main() { + let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); + + // Compile FlatBuffers schemas to Rust + flatbuffers_build::compile_flatbuffers_files( + &["proto/schema.fbs", "proto/metadata.fbs"], + &["proto/"], + &out_dir, + ) + .expect("Failed to compile FlatBuffers schemas"); + + println!("cargo:rerun-if-changed=proto/"); +} diff --git a/docs/M1.2.1-FLATBUFFERS-SCHEMA.md b/docs/M1.2.1-FLATBUFFERS-SCHEMA.md new file mode 100644 index 0000000..d4cf907 --- /dev/null +++ b/docs/M1.2.1-FLATBUFFERS-SCHEMA.md @@ -0,0 +1,297 @@ +# M1.2.1: FlatBuffers Message Schema + +## Overview + +This document describes the comprehensive FlatBuffers schema (`proto/schema.fbs`) that defines all inter-module message types for the MiMi cognitive architecture. The schema is the **single source of truth** for message serialization/deserialization across Rust and C++ components communicating via the Zenoh Message Bus. + +## Design Principles + +### 1. Zero-Copy Deserialization +- All messages use FlatBuffers for direct, zero-copy access to binary data +- No intermediate allocation or unpacking needed +- Enables sub-millisecond deserialization latency + +### 2. Type Safety & Versioning +- Enum-based message types prevent runtime type confusion +- Protocol version field enables backward compatibility +- Union types ensure type-safe polymorphism + +### 3. Trace Context & Observability +- Every message carries `TraceContext` for distributed tracing +- Correlation IDs link requests across the entire processing pipeline +- Latency tracking per message hop + +### 4. Budget-Aware Routing +- Priority levels (CRITICAL, HIGH, MEDIUM, LOW) for gating system +- Complexity estimates guide routing decisions (Tier 1/2/3) +- Token cost tracking for budget management + +### 5. Semantic Integrity +- Tone profiles capture emotional/voice metadata (Liliana) +- Mood state embedded in responses +- Security flags from Odlaguna gate included in messages + +## Schema Structure + +### Core Enumerations + +#### `IntentType` (Beatrice Classification) +- QUERY: Information request +- ACTION: Task execution request +- SKILL_CREATION: System-generated skill +- RESPONSE: Reply to prior message +- ERROR: Error notification +- EVENT: System event (state change) +- CONTROL: Administrative control + +#### `ComplexityEstimate` (Gating System) +- TRIVIAL: Cache hit, <50ms (Tier 1 Liliana) +- SOCIAL: Greeting/small-talk (Tier 1 Liliana) +- SIMPLE: Known skill, <200ms (Tier 2 Beatrice+Echidna) +- MODERATE: Skill + light reasoning (Tier 2) +- COMPLEX: Full cognitive pipeline (Tier 3) + +#### `GatingTier` (Routing Decision) +- TIER_1_REFLEX: Liliana cache + template +- TIER_2_AUTOMATED: Beatrice + Echidna skill +- TIER_3_COGNITIVE: Full pipeline (Priscilla + Pandora + Echidna + LLM) + +#### `ResponseStatus` (Execution Outcome) +- SUCCESS: Task completed successfully +- PARTIAL: Partial completion (timeout, partial results) +- FAILURE: Task failed +- TIMEOUT: Execution timeout exceeded +- REJECTED: Rejected by Odlaguna gate + +#### `CacheLayer` (Liliana Cache Classification) +- NONE: Cache miss +- EXACT_MATCH: Exact cache hit +- SEMANTIC: Semantic similarity match +- TEMPLATE: Template-based response + +### Message Types + +#### User Input (Beatrice → Mimi) +``` +UserInput { + user_message: string, // Raw user text + user_id: string, // User identifier + session_id: string, // Conversation session + channel: string // Input channel (cli, http, etc.) +} +``` + +#### Intent Classification (Beatrice → Mimi/Liliana) +``` +IntentClassified { + intent_type: IntentType, + confidence: float, // [0,1] classification confidence + entity_tags: [string], // Extracted entities + is_social: bool, + estimated_complexity: ComplexityEstimate, + raw_intent_text: string +} +``` + +#### Liliana Response (Liliana → Odlaguna) +``` +LilianaResponse { + response: string, + mood: Mood, // Current mood state + confidence: float, // Response appropriateness confidence + tone_profile: ToneProfile, // Voice/personality metadata + cached_response: CachedResponse, + fallback_metadata: string // JSON fallback hint +} +``` + +#### Task Execution (Mimi → Ryzu) +``` +TaskExecution { + task_id: string, + skill_id: string, + skill_name: string, + priority: Priority, + parameters: [byte], // Serialized skill params + timeout_ms: uint32, + routing_decision: RoutingDecision +} +``` + +#### Execution Result (Ryzu → Mimi) +``` +ExecutionResult { + task_id: string, + success: bool, + status: ResponseStatus, + output: string, + error: string, + execution_time_ms: uint32, + tokens_consumed: uint32 // For budget tracking +} +``` + +#### Memory Update (Mimi → Pandora) +``` +MemoryUpdate { + update_id: string, + entity_id: string, // Neo4j node ID + action: string, // create, update, link, delete + entity_type: string, // concept, skill, interaction + properties: string, // JSON + heatmap_decay: float, // Thermal decay [0,1] + retention_priority: Priority +} +``` + +#### Odlaguna Gate (Odlaguna → Liliana) +``` +OdalgunaGate { + message_id: string, + allowed: bool, + modifications: [Modification], // Suggested changes + security_flags: SecurityFlags, + confidence: float, // Gate confidence [0,1] + latency_ms: uint32 +} +``` + +#### Error Report (Any Module → Odlaguna) +``` +ErrorReport { + error_id: string, + severity: Priority, + module: string, // Where error occurred + error_message: string, + error_type: string, // Classification + stack_trace: string, // Debug-only + context_json: string // Additional context +} +``` + +### Unified Message Envelope + +All messages wrap their payload in a `Message` type: + +``` +Message { + version: uint32, // Protocol version + trace: TraceContext, // Distributed tracing + body_type: MessageBodyType, // Discriminant + body: MessageBody, // Union of all message types + routing_hint: string // Target topic/route hint +} +``` + +**Root type:** `Message` + +## Code Generation + +### Rust + +**Build Script** (`build.rs`): +```rust +flatbuffers_build::compile_flatbuffers_files( + &["proto/schema.fbs", "proto/metadata.fbs"], + &["proto/"], + &out_dir, +).expect("Failed to compile FlatBuffers schemas"); +``` + +**Output:** Rust bindings generated to `$OUT_DIR/mimi_protocol.rs` + +**Usage:** +```rust +use mimi::protocol::*; +use flatbuffers::FlatBufferBuilder; + +let mut builder = FlatBufferBuilder::new(1024); +let mut msg_builder = builders::MessageBuilder::new(&mut builder); +let intent = msg_builder.create_intent_classified( + IntentType::QUERY, + 0.85, + false, + ComplexityEstimate::SIMPLE +); +``` + +**Validation:** +```rust +use mimi::protocol::validation::*; + +validate_intent_classified(&intent)?; +validate_mood(&mood)?; +``` + +### C++ + +**Generation Script** (`scripts/generate_cpp_bindings.py`): +```bash +python3 scripts/generate_cpp_bindings.py +``` + +**Output:** C++ headers generated to `proto/generated/cpp/` + +**Usage:** +```cpp +#include "proto/generated/cpp/schema_generated.h" +using namespace MiMi::Protocol; + +// Access zero-copy data +auto message = GetMessage(buffer); +auto body = message->body_as_UserInput(); +``` + +## Integration with Zenoh Topics + +Messages are published/subscribed on topic hierarchies mapped to message types: + +| Message Type | Zenoh Topic | Direction | +|---|---|---| +| UserInput | `beatrice/input` | Beatrice → Mimi | +| IntentClassified | `beatrice/intent` | Beatrice → Liliana/Mimi | +| LilianaResponse | `liliana/response` | Liliana → Odlaguna | +| TaskExecution | `mimi/task/execute` | Mimi → Ryzu | +| ExecutionResult | `mimi/task/result` | Ryzu → Mimi | +| MemoryUpdate | `pandora/memory/update` | Mimi → Pandora | +| OdalgunaGate | `odlaguna/gate` | Odlaguna → Liliana | +| ErrorReport | `sys/error` | Any → Odlaguna | +| SkillPublish | `echidna/skill/publish` | Echidna → Odlaguna | +| EventNotification | `sys/events` | Any → Bus | + +## Latency & Performance Targets + +| Operation | Budget | +|---|---| +| Serialization (Rust) | < 100μs | +| Deserialization (zero-copy) | < 50μs | +| Zenoh broker latency | < 1ms | +| End-to-end message hop | < 5ms | + +## Backward Compatibility + +Protocol version field enables future evolution: +- Version mismatch detection +- Graceful downgrade/upgrade paths +- Schema evolution without breaking existing deployments + +## Testing + +Unit tests included in `src/protocol.rs`: + +```bash +cargo test --lib protocol::tests +``` + +Tests validate: +- Message builder correctness +- Mood/intent validation +- Round-trip serialization +- Boundary conditions + +## Future Extensions + +Reserved for M1.3+: +- Compression options (for large skill binaries) +- Streaming message batching +- End-to-end encryption metadata diff --git a/proto/metadata.fbs b/proto/metadata.fbs new file mode 100644 index 0000000..099862a --- /dev/null +++ b/proto/metadata.fbs @@ -0,0 +1,21 @@ +namespace MiMi.Protocol; + +table RuntimeMetrics { + start_time_ms: uint64; + end_time_ms: uint64; + duration_ms: uint32; + cpu_percent: float; + memory_mb: uint32; + tokens_input: uint32; + tokens_output: uint32; +} + +table BudgetSnapshot { + tier1_used: uint32; + tier1_limit: uint32; + tier2_used: uint32; + tier2_limit: uint32; + tier3_used: uint32; + tier3_limit: uint32; + timestamp_ms: uint64; +} diff --git a/proto/schema.fbs b/proto/schema.fbs index bae3c7e..1de6db7 100644 --- a/proto/schema.fbs +++ b/proto/schema.fbs @@ -1,258 +1,334 @@ -// MiMi Message Schema (FlatBuffers) -// Comprehensive protocol definition for distributed message passing +namespace MiMi.Protocol; -namespace mimi.protocol; - -// ============================================================================ -// Enumerations -// ============================================================================ +include "metadata.fbs"; enum IntentType : byte { - UNKNOWN = 0, - QUERY = 1, - EXECUTE = 2, - SKILL_PUBLISH = 3, - STATE_UPDATE = 4, - MEMORY_UPDATE = 5, - ERROR_REPORT = 6, - CONTROL = 7 + QUERY = 0, + ACTION = 1, + SKILL_CREATION = 2, + RESPONSE = 3, + ERROR = 4, + EVENT = 5, + CONTROL = 6 } enum Priority : byte { - LOW = 0, - NORMAL = 1, - HIGH = 2, - CRITICAL = 3 + CRITICAL = 0, + HIGH = 1, + MEDIUM = 2, + LOW = 3 } enum ComplexityEstimate : byte { TRIVIAL = 0, - SIMPLE = 1, - MODERATE = 2, - COMPLEX = 3, - EXTREME = 4 + SOCIAL = 1, + SIMPLE = 2, + MODERATE = 3, + COMPLEX = 4 +} + +enum GatingTier : byte { + TIER_1_REFLEX = 0, + TIER_2_AUTOMATED = 1, + TIER_3_COGNITIVE = 2 } enum ResponseStatus : byte { SUCCESS = 0, - PENDING = 1, - ERROR = 2, + PARTIAL = 1, + FAILURE = 2, TIMEOUT = 3, - INVALID = 4, - UNAUTHORIZED = 5, - RATE_LIMITED = 6 + REJECTED = 4 } enum CacheLayer : byte { NONE = 0, - L1_LOCAL = 1, - L2_PROCESS = 2, - L3_DISTRIBUTED = 3 + EXACT_MATCH = 1, + SEMANTIC = 2, + TEMPLATE = 3 } enum MessageBodyType : byte { - NONE = 0, - TEXT = 1, - BINARY = 2, - JSON = 3, - PROTOBUF = 4 + USER_INPUT = 0, + INTENT_CLASSIFIED = 1, + LILIANA_RESPONSE = 2, + TASK_EXECUTION = 3, + EXECUTION_RESULT = 4, + SKILL_PUBLISH = 5, + ODLAGUNA_GATE = 6, + MEMORY_UPDATE = 7, + EVENT_NOTIFICATION = 8, + ERROR_REPORT = 9, + CONTROL_MESSAGE = 10 } // ============================================================================ -// Trace Context (Distributed Tracing) +// METADATA & CORRELATION // ============================================================================ table TraceContext { - correlation_id: string; - parent_id: string; - span_id: string; - trace_id: string; - timestamp_ns: uint64; - parent_timestamp_ns: uint64; + correlation_id: string (required); // Unique ID for full request trace + parent_id: string; // Parent message ID for nested calls + span_id: string; // This message's span ID + timestamp_ms: uint64 (required); // Message creation time + source_module: string (required); // Originating module (beatrice, liliana, etc.) } -// ============================================================================ -// Mood & Personality -// ============================================================================ - -table MoodState { - energy: float; - confidence: float; - curiosity: float; - caution: float; - collaborative: float; - timestamp_ms: uint64; +table Mood { + curiosity: float; // [0,1] novelty interest + confidence: float; // [0,1] task success expectation + frustration: float; // [0,1] repeated failure state + engagement: float; // [0,1] interaction intensity } table ToneProfile { - formality: float; // 0.0 = casual, 1.0 = formal - creativity: float; // 0.0 = literal, 1.0 = creative - verbosity: float; // 0.0 = terse, 1.0 = verbose - humor_level: float; // 0.0 = none, 1.0 = high + politeness: float; // [0,1] courtesy level + formality: float; // [0,1] casual vs. formal + confidence_stated: float; // [0,1] confidence in response quality } // ============================================================================ -// Core Message Types +// BEATRICE → MIMI: USER INPUT & INTENT CLASSIFICATION // ============================================================================ table UserInput { - text: string; - intent_hint: string; - context_tags: [string]; - metadata: [KeyValuePair]; + user_message: string (required); // Raw user text + user_id: string (required); // User identifier + session_id: string (required); // Conversation session + channel: string; // Input channel (cli, http, websocket, etc.) } table IntentClassified { - intent_type: IntentType; - confidence: float; - entities: [Entity]; - context: string; + intent_type: IntentType; // Classified intent (QUERY, ACTION, etc.) + confidence: float; // [0,1] classification confidence + entity_tags: [string]; // Extracted entities (e.g., ["repo_name", "count"]) + is_social: bool; // Is this a social/trivial request? + estimated_complexity: ComplexityEstimate; // Gating system complexity + raw_intent_text: string; // Parsed intent string for logging } -table Entity { - name: string; - type: string; - value: string; - confidence: float; -} +// ============================================================================ +// LILIANA ↔ SYSTEM: MOOD, CACHE, REFLEX RESPONSES +// ============================================================================ -table KeyValuePair { - key: string; - value: string; +table CachedResponse { + response: string (required); + confidence: float; // [0,1] confidence in cache fitness + cache_layer: CacheLayer; + cache_hit: bool; + latency_ms: uint32; + timestamp_created: uint64; + hit_count: uint32; } -table SkillPublish { - skill_id: string; - skill_name: string; - version: string; - description: string; - inputs: [ParameterSchema]; - outputs: [ParameterSchema]; - tags: [string]; +table LilianaResponse { + response: string (required); + mood: Mood (required); + confidence: float; // Overall confidence in response + tone_profile: ToneProfile; + cached_response: CachedResponse; // If Tier 1 hit + fallback_metadata: string; // JSON: fallback reason, routing hint } -table ParameterSchema { - name: string; - type: string; - description: string; - required: bool; - default_value: string; +// ============================================================================ +// MIMI COMMANDER: TASK CREATION & ROUTING +// ============================================================================ + +table RoutingDecision { + target_tier: GatingTier (required); + reason: string; // Why this tier was chosen + estimated_cost_tokens: uint32; // Token budget estimate + estimated_latency_ms: uint32; // Expected response time + budget_remaining: uint64; // Tokens left in this tier } table TaskExecution { - task_id: string; - skill_id: string; - priority: Priority; - timeout_ms: uint32; - inputs: [KeyValuePair]; - context: string; + task_id: string (required); + skill_id: string (required); // Skill to execute + skill_name: string; // Human-readable name + priority: Priority (required); + parameters: [byte]; // Serialized skill-specific params + timeout_ms: uint32; // Task timeout (ms) + routing_decision: RoutingDecision; } +// ============================================================================ +// EXECUTION RESULTS & ERROR HANDLING +// ============================================================================ + table ExecutionResult { - task_id: string; + task_id: string (required); + success: bool; status: ResponseStatus; - output: string; + output: string; // Result output + error: string; // Error message if failed execution_time_ms: uint32; - error_message: string; + tokens_consumed: uint32; // For gating budget tracking + error_type: string; // e.g., "timeout", "network", "validation" } -table MemoryUpdate { - graph_id: string; - node_id: string; - operation: string; // CREATE, UPDATE, DELETE, QUERY - data: [KeyValuePair]; - timestamp_ms: uint64; - heat: float; +table ErrorReport { + error_id: string (required); + severity: Priority; // How critical is this error? + module: string (required); // Where error occurred + error_message: string (required); + error_type: string; // Error classification + stack_trace: string; // Stack trace (debug only) + context_json: string; // Additional context as JSON + timestamp_ms: uint64 (required); +} + +// ============================================================================ +// SKILL LIFECYCLE: CREATION, REVIEW, DEPLOYMENT +// ============================================================================ + +table SkillMetadata { + skill_id: string (required); + name: string (required); + version: string; + language: string; // "rhai" or "wasm" + author: string; + description: string; + created_at: uint64; + modified_at: uint64; + execution_count: uint64; + success_rate: float; // [0,1] + avg_latency_ms: float; + estimated_token_cost: uint32; + tags: [string]; // Capability tags +} + +table SkillCode { + skill_id: string (required); + language: string; // "rhai" or "wasm" + code: [byte] (required); // Script or WASM binary + dependencies: [string]; // External dependencies + compilation_flags: string; +} + +table SkillPublish { + metadata: SkillMetadata (required); + code: SkillCode (required); + approval_required: bool; +} + +// ============================================================================ +// ODLAGUNA: SECURITY GATING & VALIDATION +// ============================================================================ + +table SecurityFlags { + has_suspicious_patterns: bool; + potential_bias_detected: bool; + tone_misalignment: bool; + contradicts_prior: bool; + requires_review: bool; + rejected_reason: string; } table OdalgunaGate { - task_id: string; - complexity_score: ComplexityEstimate; - energy_required: float; - security_level: uint8; - approved: bool; - rationale: string; + message_id: string (required); + allowed: bool; + modifications: [Modification]; // Suggested content changes + security_flags: SecurityFlags; + confidence: float; // [0,1] gate confidence + latency_ms: uint32; } -table LilianaResponse { - mood_state: MoodState; - tone_profile: ToneProfile; - generated_response: string; - confidence: float; - alternatives: [string]; +table Modification { + type: string; // "tone_adjust", "content_trim", etc. + reason: string; + before: string; + after: string; } -table ErrorReport { - error_id: string; - error_type: string; - message: string; - stack_trace: string; - context: string; - timestamp_ms: uint64; - severity: uint8; +// ============================================================================ +// PANDORA: MEMORY & CONTEXT UPDATES +// ============================================================================ + +table MemoryUpdate { + update_id: string (required); + entity_id: string (required); // Neo4j node ID + action: string; // "create", "update", "link", "delete" + entity_type: string; // "concept", "skill", "interaction", etc. + properties: string; // JSON properties + heatmap_decay: float; // Thermal decay factor [0,1] + retention_priority: Priority; + timestamp_ms: uint64 (required); +} + +table MemoryQuery { + query_id: string (required); + entity_type: string; + filters: string; // JSON query filters + max_results: uint32; + heatmap_threshold: float; // Min heat for inclusion +} + +// ============================================================================ +// PRISCILLA: CRITIQUE & ANALYSIS +// ============================================================================ + +table CritiqueRequest { + critique_id: string (required); + target_text: string (required); // Text to critique + critique_type: string; // "logic", "safety", "coherence", etc. + context: string; // Background context (JSON) + max_tokens: uint32; } +table CritiqueResponse { + critique_id: string (required); + assessment: string; // Critique result + confidence: float; // [0,1] confidence in critique + suggested_revisions: [string]; +} + +// ============================================================================ +// SYSTEM EVENTS & NOTIFICATIONS +// ============================================================================ + table EventNotification { - event_type: string; - event_id: string; - source: string; - data: [KeyValuePair]; - timestamp_ms: uint64; + event_id: string (required); + event_type: string; // "skill_loaded", "memory_updated", etc. + event_data: string; // JSON event payload + severity: Priority; + timestamp_ms: uint64 (required); } table ControlMessage { - command: string; - target: string; - parameters: [KeyValuePair]; + control_id: string (required); + command: string; // "shutdown", "reload_skills", etc. + parameters: string; // JSON parameters + requires_ack: bool; } // ============================================================================ -// Message Envelope +// UNIFIED MESSAGE ENVELOPE // ============================================================================ union MessageBody { UserInput, IntentClassified, - SkillPublish, + LilianaResponse, TaskExecution, ExecutionResult, - MemoryUpdate, + SkillPublish, OdalgunaGate, - LilianaResponse, - ErrorReport, + MemoryUpdate, + MemoryQuery, + CritiqueRequest, + CritiqueResponse, EventNotification, + ErrorReport, ControlMessage } table Message { - version: uint16; - message_id: string; - message_type: IntentType; - body: MessageBody; - - // Routing & Metadata - source: string; - destination: string; - topic: string; - priority: Priority; - complexity: ComplexityEstimate; - cache_layer: CacheLayer; - - // Trace Context - trace: TraceContext; - - // Timestamps - created_at_ms: uint64; - expires_at_ms: uint64; - - // Mood/Personality - mood: MoodState; - tone: ToneProfile; - - // Flags - requires_response: bool; - is_compressed: bool; + version: uint32; // Protocol version (for compatibility) + trace: TraceContext (required); body_type: MessageBodyType; + body: MessageBody (required); + routing_hint: string; // Target topic/route hint } root_type Message; diff --git a/scripts/generate_cpp_bindings.py b/scripts/generate_cpp_bindings.py new file mode 100644 index 0000000..799039d --- /dev/null +++ b/scripts/generate_cpp_bindings.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +""" +FlatBuffers C++ code generation for MiMi protocol schemas. +Generates C++ header files from .fbs schema files. +""" + +import subprocess +import sys +from pathlib import Path + +SCRIPT_DIR = Path(__file__).resolve().parent +PROTO_DIR = SCRIPT_DIR / "proto" +CPP_OUT_DIR = SCRIPT_DIR / "proto" / "generated" / "cpp" +SCHEMAS = [ + "proto/schema.fbs", + "proto/metadata.fbs", +] + +def main(): + CPP_OUT_DIR.mkdir(parents=True, exist_ok=True) + + for schema in SCHEMAS: + schema_path = SCRIPT_DIR / schema + if not schema_path.exists(): + print(f"ERROR: Schema file not found: {schema_path}", file=sys.stderr) + return 1 + + print(f"Generating C++ bindings for {schema}...") + cmd = [ + "flatc", + "--cpp", + f"--cpp-std c++17", + "-o", str(CPP_OUT_DIR), + "-I", str(PROTO_DIR), + str(schema_path), + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"ERROR: flatc compilation failed for {schema}", file=sys.stderr) + print(result.stderr, file=sys.stderr) + return 1 + + print(f"✓ Generated C++ headers for {schema}") + + print(f"\nC++ bindings generated to: {CPP_OUT_DIR}") + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/validate_schemas.py b/scripts/validate_schemas.py new file mode 100644 index 0000000..fa7e7aa --- /dev/null +++ b/scripts/validate_schemas.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +""" +Simple FlatBuffers schema validator. +Checks basic syntax without requiring the full flatc compiler. +""" + +import re +import sys +from pathlib import Path + +def validate_schema(filepath): + """Basic schema validation.""" + content = Path(filepath).read_text() + + errors = [] + + if "namespace" not in content: + errors.append("Missing 'namespace' declaration") + + if "root_type" not in content: + errors.append("Missing 'root_type' declaration") + + table_matches = re.findall(r'table (\w+)', content) + if not table_matches: + errors.append("No 'table' definitions found") + + enum_matches = re.findall(r'enum (\w+)', content) + if not enum_matches: + errors.append("No 'enum' definitions found") + + if re.search(r'required\);', content) and not re.search(r'required;', content): + errors.append("Improper 'required' syntax found") + + unmatched_braces = content.count('{') - content.count('}') + if unmatched_braces != 0: + errors.append(f"Unmatched braces: +{unmatched_braces}") + + return errors + +def main(): + schemas = [ + "proto/schema.fbs", + "proto/metadata.fbs", + ] + + all_errors = {} + for schema in schemas: + path = Path(schema) + if not path.exists(): + print(f"ERROR: Schema not found: {schema}") + return 1 + + print(f"Validating {schema}...") + errors = validate_schema(path) + if errors: + all_errors[schema] = errors + else: + print(f" ✓ {schema} syntax OK") + + if all_errors: + print("\nValidation errors found:") + for schema, errors in all_errors.items(): + print(f"\n{schema}:") + for err in errors: + print(f" - {err}") + return 1 + + print("\nAll schemas valid!") + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/protocol.rs b/src/protocol.rs new file mode 100644 index 0000000..24b0cb1 --- /dev/null +++ b/src/protocol.rs @@ -0,0 +1,245 @@ +pub mod generated { + include!(concat!(env!("OUT_DIR"), "/mimi_protocol.rs")); +} + +pub use generated::*; + +use std::time::{SystemTime, UNIX_EPOCH}; + +pub fn now_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_millis() as u64 +} + +pub mod builders { + use super::*; + use flatbuffers::{FlatBufferBuilder, WIPOffset}; + + pub struct MessageBuilder<'a> { + builder: &'a mut FlatBufferBuilder<'a>, + } + + impl<'a> MessageBuilder<'a> { + pub fn new(builder: &'a mut FlatBufferBuilder<'a>) -> Self { + Self { builder } + } + + pub fn create_user_input( + &mut self, + user_message: &str, + user_id: &str, + session_id: &str, + channel: &str, + ) -> WIPOffset { + let msg = self.builder.create_string(user_message); + let uid = self.builder.create_string(user_id); + let sid = self.builder.create_string(session_id); + let ch = self.builder.create_string(channel); + + UserInput::create( + self.builder, + &UserInputArgs { + user_message: Some(msg), + user_id: Some(uid), + session_id: Some(sid), + channel: Some(ch), + }, + ) + } + + pub fn create_trace_context( + &mut self, + correlation_id: &str, + source_module: &str, + ) -> WIPOffset { + let cid = self.builder.create_string(correlation_id); + let src = self.builder.create_string(source_module); + + TraceContext::create( + self.builder, + &TraceContextArgs { + correlation_id: Some(cid), + timestamp_ms: now_ms(), + source_module: Some(src), + parent_id: None, + span_id: None, + }, + ) + } + + pub fn create_mood( + &mut self, + curiosity: f32, + confidence: f32, + frustration: f32, + engagement: f32, + ) -> WIPOffset { + Mood::create( + self.builder, + &MoodArgs { + curiosity, + confidence, + frustration, + engagement, + }, + ) + } + + pub fn create_intent_classified( + &mut self, + intent_type: IntentType, + confidence: f32, + is_social: bool, + complexity: ComplexityEstimate, + ) -> WIPOffset { + IntentClassified::create( + self.builder, + &IntentClassifiedArgs { + intent_type, + confidence, + entity_tags: None, + is_social, + estimated_complexity: complexity, + raw_intent_text: None, + }, + ) + } + + pub fn create_task_execution( + &mut self, + task_id: &str, + skill_id: &str, + skill_name: &str, + priority: Priority, + timeout_ms: u32, + ) -> WIPOffset { + let tid = self.builder.create_string(task_id); + let sid = self.builder.create_string(skill_id); + let sname = self.builder.create_string(skill_name); + + TaskExecution::create( + self.builder, + &TaskExecutionArgs { + task_id: Some(tid), + skill_id: Some(sid), + skill_name: Some(sname), + priority, + parameters: None, + timeout_ms, + routing_decision: None, + }, + ) + } + + pub fn create_execution_result( + &mut self, + task_id: &str, + success: bool, + status: ResponseStatus, + output: &str, + execution_time_ms: u32, + ) -> WIPOffset { + let tid = self.builder.create_string(task_id); + let out = self.builder.create_string(output); + + ExecutionResult::create( + self.builder, + &ExecutionResultArgs { + task_id: Some(tid), + success, + status, + output: Some(out), + error: None, + execution_time_ms, + tokens_consumed: 0, + error_type: None, + }, + ) + } + } +} + +pub mod validation { + use super::*; + + pub fn validate_message(msg: &Message) -> Result<(), String> { + if msg.version() == 0 { + return Err("Invalid protocol version".to_string()); + } + + if msg.trace().is_none() { + return Err("Missing trace context".to_string()); + } + + if msg.body().is_none() { + return Err("Missing message body".to_string()); + } + + Ok(()) + } + + pub fn validate_intent_classified(intent: &IntentClassified) -> Result<(), String> { + if !(0.0..=1.0).contains(&intent.confidence()) { + return Err("Confidence must be [0, 1]".to_string()); + } + Ok(()) + } + + pub fn validate_mood(mood: &Mood) -> Result<(), String> { + let values = [ + mood.curiosity(), + mood.confidence(), + mood.frustration(), + mood.engagement(), + ]; + + for &val in &values { + if !(0.0..=1.0).contains(&val) { + return Err("Mood values must be [0, 1]".to_string()); + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use flatbuffers::FlatBufferBuilder; + + #[test] + fn test_message_builder_user_input() { + let mut builder = FlatBufferBuilder::new(1024); + let mut msg_builder = builders::MessageBuilder::new(&mut builder); + + let _user_input = msg_builder.create_user_input("Hello Mimi", "user123", "sess456", "cli"); + + assert!(true); + } + + #[test] + fn test_mood_validation() { + let mut builder = FlatBufferBuilder::new(256); + let mood = builders::MessageBuilder::new(&mut builder).create_mood(0.5, 0.7, 0.2, 0.8); + + builder.finish(mood, None); + let buf = builder.finished_data(); + + let root = flatbuffers::root::(buf).expect("Failed to deserialize"); + assert!(validation::validate_mood(&root).is_ok()); + } + + #[test] + fn test_invalid_mood_validation() { + let mut builder = FlatBufferBuilder::new(256); + let mood = builders::MessageBuilder::new(&mut builder).create_mood(1.5, 0.7, 0.2, 0.8); + + builder.finish(mood, None); + let buf = builder.finished_data(); + + let root = flatbuffers::root::(buf).expect("Failed to deserialize"); + assert!(validation::validate_mood(&root).is_err()); + } +}