diff --git a/CHANGELOG.md b/CHANGELOG.md index 61b3e90..8f431ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to CLX will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project adheres to [Semantic Versioning](https://semver.org/). +## [0.7.2] - 2026-05-02 + +### Fixed +- Auto-recall (`UserPromptSubmit` hook) silently produced no semantic + context when embeddings were routed to Azure OpenAI. The recall path + called `embed(query, None)`; Azure rejects `None` with + `DeploymentNotFound` (only Ollama tolerated it via its own + baked-in default). `RecallEngine` now accepts an explicit embedding + model via `with_embedding_model(...)`, and both production callers + (`clx-hook` auto-recall and `clx-mcp` `clx_recall` tool) pass the + configured `llm.embeddings.model`. FTS5 fallback was working all + along, but the headline semantic-recall feature was dark on + Azure-routed embeddings until this fix. + ## [0.7.1] - 2026-05-02 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 278b57e..afdcd3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -549,7 +549,7 @@ checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "clx" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "assert_cmd", @@ -579,7 +579,7 @@ dependencies = [ [[package]] name = "clx-core" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "chrono", @@ -608,7 +608,7 @@ dependencies = [ [[package]] name = "clx-hook" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "chrono", @@ -627,7 +627,7 @@ dependencies = [ [[package]] name = "clx-mcp" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 79fe3cb..56d98db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ ] [workspace.package] -version = "0.7.1" +version = "0.7.2" edition = "2024" license = "MPL-2.0" authors = ["CLX Contributors"] diff --git a/crates/clx-core/src/recall.rs b/crates/clx-core/src/recall.rs index 5e29003..7c2c5cd 100644 --- a/crates/clx-core/src/recall.rs +++ b/crates/clx-core/src/recall.rs @@ -69,6 +69,11 @@ pub struct RecallEngine<'a> { /// active: `check_model_mismatch` returns the stored vs. configured pair /// when they differ. configured_model_ident: Option, + /// The bare embedding model / deployment name to pass to the backend + /// when generating the query embedding. Required for backends that do + /// not have a baked-in default model (e.g., `AzureOpenAIBackend`). Optional because + /// Ollama tolerates `None` by falling back to its configured default. + embedding_model: Option, } impl<'a> RecallEngine<'a> { @@ -84,9 +89,20 @@ impl<'a> RecallEngine<'a> { ollama, embedding_store, configured_model_ident: None, + embedding_model: None, } } + /// Attach the bare embedding model / deployment name. Required for + /// Azure-routed embeddings (Azure backend errors with + /// `DeploymentNotFound` when called with `None`); Ollama tolerates + /// missing model and falls back to its config default. + #[must_use] + pub fn with_embedding_model(mut self, model: impl Into) -> Self { + self.embedding_model = Some(model.into()); + self + } + /// Attach the configured embedding model identifier so that mismatch /// detection works. The identifier should be `":"`. #[must_use] @@ -153,8 +169,9 @@ impl<'a> RecallEngine<'a> { emb_store: &EmbeddingStore, config: &RecallQueryConfig, ) -> Vec { - // Generate embedding for the query - let embedding = match ollama.embed(query, None).await { + // Generate embedding for the query. Pass the configured embedding + // model so backends without a baked-in default (Azure) work. + let embedding = match ollama.embed(query, self.embedding_model.as_deref()).await { Ok(emb) => emb, Err(e) => { warn!("Recall semantic embedding failed: {e}"); @@ -886,4 +903,30 @@ mod tests { search_type, } } + + /// Regression for the 0.7.1 bug: `auto_recall` passed `None` for the + /// embeddings model, which Azure rejects with `DeploymentNotFound`. + /// 0.7.2 plumbs the configured model through `with_embedding_model`. + /// This test asserts the builder stores the model so `try_semantic` + /// will pass it to the backend. + #[test] + fn embedding_model_builder_persists_value() { + let storage = Storage::open_in_memory().unwrap(); + let engine = + RecallEngine::new(&storage, None, None).with_embedding_model("text-embedding-3-small"); + assert_eq!( + engine.embedding_model.as_deref(), + Some("text-embedding-3-small") + ); + } + + #[test] + fn embedding_model_default_is_none_for_back_compat() { + let storage = Storage::open_in_memory().unwrap(); + let engine = RecallEngine::new(&storage, None, None); + assert!( + engine.embedding_model.is_none(), + "default must be None so existing callers keep relying on Ollama's baked-in default" + ); + } } diff --git a/crates/clx-hook/src/hooks/subagent.rs b/crates/clx-hook/src/hooks/subagent.rs index a9bcae4..8fdf51e 100644 --- a/crates/clx-hook/src/hooks/subagent.rs +++ b/crates/clx-hook/src/hooks/subagent.rs @@ -119,8 +119,11 @@ async fn do_recall(prompt: &str, config: &clx_core::config::Config) -> Option