From cb8b748ab40a1c421ad4a1ff083dcb42caf88749 Mon Sep 17 00:00:00 2001 From: blackax Date: Sat, 2 May 2026 16:39:59 -0700 Subject: [PATCH 1/3] fix(recall): pass configured embeddings model to backend; bug surfaced on Azure (0.7.2) --- CHANGELOG.md | 14 ++++++++ Cargo.lock | 8 ++--- Cargo.toml | 2 +- crates/clx-core/src/recall.rs | 47 +++++++++++++++++++++++++-- crates/clx-hook/src/hooks/subagent.rs | 5 ++- crates/clx-mcp/src/tools/recall.rs | 3 +- 6 files changed, 70 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61b3e90..8f431ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to CLX will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project adheres to [Semantic Versioning](https://semver.org/). +## [0.7.2] - 2026-05-02 + +### Fixed +- Auto-recall (`UserPromptSubmit` hook) silently produced no semantic + context when embeddings were routed to Azure OpenAI. The recall path + called `embed(query, None)`; Azure rejects `None` with + `DeploymentNotFound` (only Ollama tolerated it via its own + baked-in default). `RecallEngine` now accepts an explicit embedding + model via `with_embedding_model(...)`, and both production callers + (`clx-hook` auto-recall and `clx-mcp` `clx_recall` tool) pass the + configured `llm.embeddings.model`. FTS5 fallback was working all + along, but the headline semantic-recall feature was dark on + Azure-routed embeddings until this fix. + ## [0.7.1] - 2026-05-02 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 278b57e..afdcd3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -549,7 +549,7 @@ checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "clx" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "assert_cmd", @@ -579,7 +579,7 @@ dependencies = [ [[package]] name = "clx-core" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "chrono", @@ -608,7 +608,7 @@ dependencies = [ [[package]] name = "clx-hook" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "chrono", @@ -627,7 +627,7 @@ dependencies = [ [[package]] name = "clx-mcp" -version = "0.7.1" +version = "0.7.2" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 79fe3cb..56d98db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ ] [workspace.package] -version = "0.7.1" +version = "0.7.2" edition = "2024" license = "MPL-2.0" authors = ["CLX Contributors"] diff --git a/crates/clx-core/src/recall.rs b/crates/clx-core/src/recall.rs index 5e29003..7a26a43 100644 --- a/crates/clx-core/src/recall.rs +++ b/crates/clx-core/src/recall.rs @@ -69,6 +69,11 @@ pub struct RecallEngine<'a> { /// active: `check_model_mismatch` returns the stored vs. configured pair /// when they differ. configured_model_ident: Option, + /// The bare embedding model / deployment name to pass to the backend + /// when generating the query embedding. Required for backends that do + /// not have a baked-in default model (Azure OpenAI). Optional because + /// Ollama tolerates `None` by falling back to its configured default. + embedding_model: Option, } impl<'a> RecallEngine<'a> { @@ -84,9 +89,20 @@ impl<'a> RecallEngine<'a> { ollama, embedding_store, configured_model_ident: None, + embedding_model: None, } } + /// Attach the bare embedding model / deployment name. Required for + /// Azure-routed embeddings (Azure backend errors with + /// `DeploymentNotFound` when called with `None`); Ollama tolerates + /// missing model and falls back to its config default. + #[must_use] + pub fn with_embedding_model(mut self, model: impl Into) -> Self { + self.embedding_model = Some(model.into()); + self + } + /// Attach the configured embedding model identifier so that mismatch /// detection works. The identifier should be `":"`. #[must_use] @@ -153,8 +169,9 @@ impl<'a> RecallEngine<'a> { emb_store: &EmbeddingStore, config: &RecallQueryConfig, ) -> Vec { - // Generate embedding for the query - let embedding = match ollama.embed(query, None).await { + // Generate embedding for the query. Pass the configured embedding + // model so backends without a baked-in default (Azure) work. + let embedding = match ollama.embed(query, self.embedding_model.as_deref()).await { Ok(emb) => emb, Err(e) => { warn!("Recall semantic embedding failed: {e}"); @@ -886,4 +903,30 @@ mod tests { search_type, } } + + /// Regression for the 0.7.1 bug: auto_recall passed `None` for the + /// embeddings model, which Azure rejects with `DeploymentNotFound`. + /// 0.7.2 plumbs the configured model through `with_embedding_model`. + /// This test asserts the builder stores the model so `try_semantic` + /// will pass it to the backend. + #[test] + fn embedding_model_builder_persists_value() { + let storage = Storage::open_in_memory().unwrap(); + let engine = RecallEngine::new(&storage, None, None) + .with_embedding_model("text-embedding-3-small"); + assert_eq!( + engine.embedding_model.as_deref(), + Some("text-embedding-3-small") + ); + } + + #[test] + fn embedding_model_default_is_none_for_back_compat() { + let storage = Storage::open_in_memory().unwrap(); + let engine = RecallEngine::new(&storage, None, None); + assert!( + engine.embedding_model.is_none(), + "default must be None so existing callers keep relying on Ollama's baked-in default" + ); + } } diff --git a/crates/clx-hook/src/hooks/subagent.rs b/crates/clx-hook/src/hooks/subagent.rs index a9bcae4..8fdf51e 100644 --- a/crates/clx-hook/src/hooks/subagent.rs +++ b/crates/clx-hook/src/hooks/subagent.rs @@ -119,8 +119,11 @@ async fn do_recall(prompt: &str, config: &clx_core::config::Config) -> Option Date: Sat, 2 May 2026 16:41:19 -0700 Subject: [PATCH 2/3] style(recall): backticks in docstrings --- crates/clx-core/src/recall.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/clx-core/src/recall.rs b/crates/clx-core/src/recall.rs index 7a26a43..4a09ccf 100644 --- a/crates/clx-core/src/recall.rs +++ b/crates/clx-core/src/recall.rs @@ -71,7 +71,7 @@ pub struct RecallEngine<'a> { configured_model_ident: Option, /// The bare embedding model / deployment name to pass to the backend /// when generating the query embedding. Required for backends that do - /// not have a baked-in default model (Azure OpenAI). Optional because + /// not have a baked-in default model (e.g., AzureOpenAIBackend). Optional because /// Ollama tolerates `None` by falling back to its configured default. embedding_model: Option, } @@ -904,7 +904,7 @@ mod tests { } } - /// Regression for the 0.7.1 bug: auto_recall passed `None` for the + /// Regression for the 0.7.1 bug: `auto_recall` passed `None` for the /// embeddings model, which Azure rejects with `DeploymentNotFound`. /// 0.7.2 plumbs the configured model through `with_embedding_model`. /// This test asserts the builder stores the model so `try_semantic` @@ -912,8 +912,8 @@ mod tests { #[test] fn embedding_model_builder_persists_value() { let storage = Storage::open_in_memory().unwrap(); - let engine = RecallEngine::new(&storage, None, None) - .with_embedding_model("text-embedding-3-small"); + let engine = + RecallEngine::new(&storage, None, None).with_embedding_model("text-embedding-3-small"); assert_eq!( engine.embedding_model.as_deref(), Some("text-embedding-3-small") From 7190e25f03d94de4aaec66825b66def0d4048c8d Mon Sep 17 00:00:00 2001 From: blackax Date: Sat, 2 May 2026 16:42:10 -0700 Subject: [PATCH 3/3] style(recall): backtick AzureOpenAIBackend in docstring --- crates/clx-core/src/recall.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/clx-core/src/recall.rs b/crates/clx-core/src/recall.rs index 4a09ccf..7c2c5cd 100644 --- a/crates/clx-core/src/recall.rs +++ b/crates/clx-core/src/recall.rs @@ -71,7 +71,7 @@ pub struct RecallEngine<'a> { configured_model_ident: Option, /// The bare embedding model / deployment name to pass to the backend /// when generating the query embedding. Required for backends that do - /// not have a baked-in default model (e.g., AzureOpenAIBackend). Optional because + /// not have a baked-in default model (e.g., `AzureOpenAIBackend`). Optional because /// Ollama tolerates `None` by falling back to its configured default. embedding_model: Option, }