From f43eea242624cdfb161a6e9f1c213461a5431cff Mon Sep 17 00:00:00 2001 From: konard Date: Thu, 30 Apr 2026 04:07:31 +0000 Subject: [PATCH 1/6] Initial commit with task details Adding .gitkeep for PR creation (default mode). This file will be removed when the task is complete. Issue: https://github.com/link-foundation/link-cli/issues/67 --- .gitkeep | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitkeep diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 0000000..552d228 --- /dev/null +++ b/.gitkeep @@ -0,0 +1 @@ +# .gitkeep file auto-generated at 2026-04-30T04:07:31.434Z for PR creation at branch issue-67-d67d72474036 for issue https://github.com/link-foundation/link-cli/issues/67 \ No newline at end of file From ae76ce0177f6fd9d859665d0efd1c475f0b8f6e6 Mon Sep 17 00:00:00 2001 From: konard Date: Thu, 30 Apr 2026 04:23:29 +0000 Subject: [PATCH 2/6] feat: improve Rust query parity --- .gitkeep | 1 - docs/case-studies/issue-67/README.md | 51 ++ .../20260430_041900_rust_query_parity.md | 5 + rust/src/query_processor.rs | 829 ++++++++++++++---- .../query_processor_csharp_parity_tests.rs | 207 +++++ 5 files changed, 928 insertions(+), 165 deletions(-) delete mode 100644 .gitkeep create mode 100644 docs/case-studies/issue-67/README.md create mode 100644 rust/changelog.d/20260430_041900_rust_query_parity.md create mode 100644 rust/tests/query_processor_csharp_parity_tests.rs diff --git a/.gitkeep b/.gitkeep deleted file mode 100644 index 552d228..0000000 --- a/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -# .gitkeep file auto-generated at 2026-04-30T04:07:31.434Z for PR creation at branch issue-67-d67d72474036 for issue https://github.com/link-foundation/link-cli/issues/67 \ No newline at end of file diff --git a/docs/case-studies/issue-67/README.md b/docs/case-studies/issue-67/README.md new file mode 100644 index 0000000..c6326aa --- /dev/null +++ b/docs/case-studies/issue-67/README.md @@ -0,0 +1,51 @@ +# Issue 67 Case Study: Rust Implementation Parity + +Source issue: + +This case study captures the requirements from issue 67, the external components checked while planning the work, and the implementation path for bringing the Rust CLI into parity with the C# implementation. + +## External Components Reviewed + +As of 2026-04-30: + +- `linksplatform/doublets-rs`: GitHub repository , latest GitHub release `v0.3.0`; crates.io package appears as `doublets = "0.3.0"`. +- `link-foundation/links-notation`: GitHub repository , latest GitHub release `0.13.0_csharp`; crates.io package appears as `links-notation = "0.13.0"`. +- `linksplatform/Data.Doublets.Sequences`: GitHub repository , latest GitHub release `csharp_0.6.5`. +- CI/CD templates requested by the issue: + - + - + - + +## Requirement Inventory + +| Requirement | Current status | Solution plan | +| --- | --- | --- | +| Use the latest `doublets-rs` and `links-notation` as a Rust basis. | The current Rust package still uses local storage/parser components. The current published Rust crates are `doublets = "0.3.0"` and `links-notation = "0.13.0"`. | Introduce these crates behind small adapter modules so CLI behavior remains stable while storage and notation parsing are swapped incrementally. Start with parser fixtures, then storage fixtures, then binary compatibility fixtures. | +| Reimplement sequence support in pure Rust based on `Data.Doublets.Sequences`. | The Rust implementation has link storage and query operations, but no dedicated sequence layer matching the C# package. | Port sequence primitives as a separate Rust module with C# fixture parity tests for creation, traversal, Unicode text, deletion, and persistence. | +| Match C# Unicode support and binary file compatibility. | Existing Rust tests cover named links and persistence basics. Cross-runtime binary fixtures are not yet complete. | Add C#-generated binary fixtures and Rust-generated binary fixtures, then verify both runtimes can read each file without data loss. Include non-ASCII names and multi-codepoint text cases. | +| Support the same CLI options, features, and tests as C#. | The repository already has C# and Rust test suites. This PR closes concrete query semantics gaps found against the C# `AdvancedMixedQueryProcessor` behavior. | Continue converting C# tests into Rust parity tests by feature area: storage, parser, query processor, CLI commands, persistence, and sequences. | +| Keep C# under `./csharp`, Rust under `./rust`, and provide separate workflows. | The repository already has `csharp/`, `rust/`, `.github/workflows/csharp.yml`, and `.github/workflows/rust.yml`. | Preserve this layout. Treat future parity work as package-local changes unless a shared workflow or script must change. | +| Compare CI/CD templates and reuse best practices. | Rust and C# workflows exist, and Rust has changelog fragment based release automation. | Audit the requested templates in a follow-up pass focused on workflow drift: permissions, cache keys, test matrix, linting, changelog validation, release trigger, and artifact publishing. | +| Collect issue data in `./docs/case-studies/issue-67`. | This document satisfies the requested repository-local case study folder. | Keep this document updated as additional parity gaps are discovered or closed. | +| Plan and execute in one pull request. | PR 68 is the working pull request for this issue branch. | Keep all issue-67 implementation, tests, documentation, and release notes in PR 68. | + +## Implemented In This PR + +This PR focuses on query processor parity gaps that were blocking Rust behavior from matching C# query semantics: + +- Accepts the unwrapped query form used by C# examples: `restriction substitution`. +- Deletes all links that match a structural restriction pattern instead of only deleting explicit link IDs. +- Supports wildcard and variable matching across nested link patterns. +- Applies variable-driven swaps and replacements using solution bindings from the restriction side. +- Returns matched changes for no-op variable substitutions, matching the C# behavior. +- Reuses existing structural links for named composite substitutions before applying a new name, avoiding accidental duplicate leaf creation. + +The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs`. + +## Next Parity Work + +1. Add dependency adapter experiments for `doublets` and `links-notation` without replacing public CLI behavior in a single step. +2. Build cross-runtime fixture tests for binary file compatibility and Unicode names/text. +3. Port sequence primitives from the C# sequence package into Rust with fixture-driven tests. +4. Expand Rust CLI tests until every C# CLI behavior has a corresponding Rust assertion. +5. Run a workflow-template audit against the requested C#, Rust, and JS pipeline templates and apply only concrete drift fixes. diff --git a/rust/changelog.d/20260430_041900_rust_query_parity.md b/rust/changelog.d/20260430_041900_rust_query_parity.md new file mode 100644 index 0000000..fbb2b45 --- /dev/null +++ b/rust/changelog.d/20260430_041900_rust_query_parity.md @@ -0,0 +1,5 @@ +--- +bump: minor +--- + +Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, and named composite rename behavior. diff --git a/rust/src/query_processor.rs b/rust/src/query_processor.rs index 8623045..8eb358e 100644 --- a/rust/src/query_processor.rs +++ b/rust/src/query_processor.rs @@ -34,6 +34,50 @@ pub struct QueryProcessor { trace: bool, } +#[derive(Clone, Debug, Eq, PartialEq)] +struct Pattern { + index: String, + source: Option>, + target: Option>, +} + +impl Pattern { + fn new(index: String, source: Option, target: Option) -> Self { + Self { + index, + source: source.map(Box::new), + target: target.map(Box::new), + } + } + + fn is_leaf(&self) -> bool { + self.source.is_none() && self.target.is_none() + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct ResolvedLink { + index: u32, + source: u32, + target: u32, + name: Option, +} + +impl ResolvedLink { + fn new(index: u32, source: u32, target: u32, name: Option) -> Self { + Self { + index, + source, + target, + name, + } + } + + fn to_link(&self) -> Link { + Link::new(self.index, self.source, self.target) + } +} + impl QueryProcessor { /// Creates a new QueryProcessor pub fn new(trace: bool) -> Self { @@ -67,19 +111,17 @@ impl QueryProcessor { return Ok(vec![]); } - // We expect something like (( restriction ) ( substitution )) - let outer_link = &parsed_links[0]; - let outer_values = match &outer_link.values { - Some(v) if v.len() >= 2 => v, + // Accept both the wrapped form `((restriction) (substitution))` and + // the C# parser-compatible form `restriction substitution`. + let (restriction_link, substitution_link) = match &parsed_links[0].values { + Some(values) if values.len() >= 2 => (&values[0], &values[1]), + _ if parsed_links.len() >= 2 => (&parsed_links[0], &parsed_links[1]), _ => { - self.trace_msg("[ProcessQuery] Outer link has fewer than 2 sub-links, returning."); + self.trace_msg("[ProcessQuery] Query has fewer than 2 links, returning."); return Ok(vec![]); } }; - let restriction_link = &outer_values[0]; - let substitution_link = &outer_values[1]; - self.trace_msg(&format!( "[ProcessQuery] Restriction link => Id={:?} Values.Count={}", restriction_link.id, @@ -127,14 +169,19 @@ impl QueryProcessor { self.trace_msg( "[ProcessQuery] Restriction non-empty, substitution empty => deletion scenario.", ); - if let Some(values) = &restriction_link.values { - for link_to_delete in values { - let delete_id = self.resolve_link_id(storage, link_to_delete)?; - if delete_id != 0 && storage.exists(delete_id) { - let before = storage.delete(delete_id)?; - changes_list.push((Some(before), None)); - self.trace_msg(&format!("[ProcessQuery] Deleted link ID #{}.", delete_id)); - } + let restriction_patterns = self.patterns_from_lino(restriction_link); + let mut links_to_delete = Vec::new(); + for pattern in &restriction_patterns { + links_to_delete.extend(self.matched_links(storage, pattern, &HashMap::new())); + } + links_to_delete.sort_by_key(|link| link.index); + links_to_delete.dedup_by_key(|link| link.index); + + for link in links_to_delete { + if storage.exists(link.index) { + let before = storage.delete(link.index)?; + changes_list.push((Some(before), None)); + self.trace_msg(&format!("[ProcessQuery] Deleted link ID #{}.", link.index)); } } storage.save()?; @@ -146,90 +193,45 @@ impl QueryProcessor { "[ProcessQuery] Both restriction and substitution non-empty => update/mixed scenario.", ); - // Build dictionaries for restriction and substitution links - let restriction_links = self.build_links_by_id(restriction_link); - let substitution_links = self.build_links_by_id(substitution_link); - - // Collect variable assignments from restriction links - let mut variable_assignments: HashMap = HashMap::new(); - - // First pass: resolve restriction links to extract variable values - for lino_link in restriction_links.values() { - if lino_link.values_count() == 2 { - if let Some(ref link_id) = lino_link.id { - if let Ok(numeric_id) = link_id.parse::() { - if storage.exists(numeric_id) { - let actual_link = storage.get(numeric_id).unwrap(); - if let Some(values) = &lino_link.values { - self.assign_variable( - &values[0].id, - actual_link.source, - &mut variable_assignments, - ); - self.assign_variable( - &values[1].id, - actual_link.target, - &mut variable_assignments, - ); - } + let restriction_patterns = self.patterns_from_lino(restriction_link); + let substitution_patterns = self.patterns_from_lino(substitution_link); + let solutions = self.find_all_solutions(storage, &restriction_patterns); + + if solutions.is_empty() { + self.trace_msg("[ProcessQuery] No solutions found => returning."); + return Ok(vec![]); + } + + let all_solutions_no_operation = solutions.iter().all(|solution| { + self.solution_is_no_operation( + storage, + solution, + &restriction_patterns, + &substitution_patterns, + ) + }); + + if all_solutions_no_operation { + for solution in &solutions { + for pattern in &restriction_patterns { + for link in self.matched_links(storage, pattern, solution) { + if !changes_list.contains(&(Some(link), Some(link))) { + changes_list.push((Some(link), Some(link))); } } } } + return Ok(changes_list); } - // Get all unique IDs - let mut all_ids: Vec = restriction_links - .keys() - .chain(substitution_links.keys()) - .cloned() - .collect(); - all_ids.sort(); - all_ids.dedup(); - - // Process each ID - for id in &all_ids { - let has_restriction = restriction_links.contains_key(id); - let has_substitution = substitution_links.contains_key(id); - - if has_restriction && has_substitution { - // Update operation - let restriction_lino = &restriction_links[id]; - let substitution_lino = &substitution_links[id]; - - let restriction_doublet = - self.to_doublet_link(storage, restriction_lino, &variable_assignments, true)?; - let substitution_doublet = - self.to_doublet_link(storage, substitution_lino, &variable_assignments, false)?; - - if restriction_doublet.index != 0 && storage.exists(restriction_doublet.index) { - let before = *storage.get(restriction_doublet.index).unwrap(); - storage.update( - restriction_doublet.index, - substitution_doublet.source, - substitution_doublet.target, - )?; - if let Some(after) = storage.get(restriction_doublet.index) { - changes_list.push((Some(before), Some(*after))); - } - } - } else if has_restriction && !has_substitution { - // Delete operation - let restriction_lino = &restriction_links[id]; - let restriction_doublet = - self.to_doublet_link(storage, restriction_lino, &variable_assignments, true)?; - - if restriction_doublet.index != 0 && storage.exists(restriction_doublet.index) { - let before = storage.delete(restriction_doublet.index)?; - changes_list.push((Some(before), None)); - } - } else if !has_restriction && has_substitution { - // Create operation - let substitution_lino = &substitution_links[id]; - let created_id = self.ensure_link_created(storage, substitution_lino)?; - if let Some(link) = storage.get(created_id) { - changes_list.push((None, Some(*link))); - } + for solution in &solutions { + let restriction_links = + self.resolve_patterns(storage, &restriction_patterns, solution, false)?; + let substitution_links = + self.resolve_patterns(storage, &substitution_patterns, solution, true)?; + let operations = self.determine_operations(&restriction_links, &substitution_links); + for (before, after) in operations { + self.apply_operation(storage, before, after, &mut changes_list)?; } } @@ -241,121 +243,620 @@ impl QueryProcessor { Ok(simplified) } - /// Builds a map of links by their ID - fn build_links_by_id(&self, lino_link: &LinoLink) -> HashMap { - let mut result = HashMap::new(); + fn patterns_from_lino(&self, lino_link: &LinoLink) -> Vec { + let mut patterns = lino_link + .values + .as_ref() + .map(|values| { + values + .iter() + .map(Self::create_pattern_from_lino) + .collect::>() + }) + .unwrap_or_default(); + + if lino_link.id.is_some() { + patterns.insert(0, Self::create_pattern_from_lino(lino_link)); + } + + patterns + } + + fn create_pattern_from_lino(lino_link: &LinoLink) -> Pattern { + let index = lino_link.id.clone().unwrap_or_default(); + match &lino_link.values { + Some(values) if values.len() == 2 => Pattern::new( + index, + Some(Self::create_pattern_from_lino(&values[0])), + Some(Self::create_pattern_from_lino(&values[1])), + ), + _ => Pattern::new(index, None, None), + } + } - if let Some(values) = &lino_link.values { - for value in values { - if let Some(ref id) = value.id { - result.insert(id.clone(), value.clone()); + fn find_all_solutions( + &self, + storage: &LinkStorage, + patterns: &[Pattern], + ) -> Vec> { + let mut partial_solutions = vec![HashMap::new()]; + + for pattern in patterns { + let mut new_solutions = Vec::new(); + for solution in &partial_solutions { + for match_solution in self.match_pattern(storage, pattern, solution) { + if Self::solutions_are_compatible(solution, &match_solution) { + let mut combined = solution.clone(); + combined.extend(match_solution); + new_solutions.push(combined); + } } } + partial_solutions = new_solutions; + if partial_solutions.is_empty() { + break; + } } - if let Some(ref id) = lino_link.id { - result.insert(id.clone(), lino_link.clone()); + partial_solutions + } + + fn solutions_are_compatible( + existing: &HashMap, + new_assignments: &HashMap, + ) -> bool { + new_assignments + .iter() + .all(|(key, value)| existing.get(key).is_none_or(|existing| existing == value)) + } + + fn match_pattern( + &self, + storage: &LinkStorage, + pattern: &Pattern, + current_solution: &HashMap, + ) -> Vec> { + if pattern.is_leaf() { + let resolved_index = self.resolve_match_id(storage, &pattern.index, current_solution); + return storage + .all() + .into_iter() + .filter(|link| Self::is_any(resolved_index) || link.index == resolved_index) + .map(|link| { + let mut assignments = HashMap::new(); + Self::assign_variable(&pattern.index, link.index, &mut assignments); + assignments + }) + .collect(); } - result + let resolved_index = self.resolve_match_id(storage, &pattern.index, current_solution); + + if !Self::is_variable(&pattern.index) + && !Self::is_any(resolved_index) + && resolved_index != 0 + && storage.exists(resolved_index) + { + let link = *storage.get(resolved_index).unwrap(); + return self.match_link_against_pattern(storage, pattern, link, current_solution); + } + + storage + .all() + .into_iter() + .copied() + .flat_map(|link| { + self.match_link_against_pattern(storage, pattern, link, current_solution) + }) + .collect() } - /// Assigns a variable value if the identifier is a variable - fn assign_variable( + fn match_link_against_pattern( &self, - id: &Option, - value: u32, - assignments: &mut HashMap, - ) { - if let Some(ref id) = id { - if id.starts_with('$') && value != 0 { - assignments.insert(id.clone(), value); + storage: &LinkStorage, + pattern: &Pattern, + link: Link, + current_solution: &HashMap, + ) -> Vec> { + if !self.check_id_match(storage, &pattern.index, link.index, current_solution) { + return Vec::new(); + } + + let mut results = Vec::new(); + let source_matches = self.recursive_match_subpattern( + storage, + pattern.source.as_deref(), + link.source, + current_solution, + ); + + for source_solution in source_matches { + let target_matches = self.recursive_match_subpattern( + storage, + pattern.target.as_deref(), + link.target, + &source_solution, + ); + for mut target_solution in target_matches { + Self::assign_variable(&pattern.index, link.index, &mut target_solution); + results.push(target_solution); } } + + results } - /// Converts a LinoLink to a Link - fn to_doublet_link( + fn recursive_match_subpattern( &self, - storage: &mut LinkStorage, - lino_link: &LinoLink, - variable_assignments: &HashMap, - use_any_default: bool, - ) -> Result { - let default_value = if use_any_default { u32::MAX } else { 0 }; + storage: &LinkStorage, + pattern: Option<&Pattern>, + link_id: u32, + current_solution: &HashMap, + ) -> Vec> { + let Some(pattern) = pattern else { + return vec![current_solution.clone()]; + }; + + if pattern.is_leaf() { + if self.check_id_match(storage, &pattern.index, link_id, current_solution) { + let mut solution = current_solution.clone(); + Self::assign_variable(&pattern.index, link_id, &mut solution); + return vec![solution]; + } + return Vec::new(); + } - let mut index = default_value; - let mut source = default_value; - let mut target = default_value; + let Some(link) = storage.get(link_id).copied() else { + return Vec::new(); + }; + + self.match_link_against_pattern(storage, pattern, link, current_solution) + } - // Parse index - if let Some(ref id) = lino_link.id { - index = self.resolve_id(storage, id, variable_assignments, default_value)?; + fn check_id_match( + &self, + storage: &LinkStorage, + pattern_id: &str, + candidate_id: u32, + current_solution: &HashMap, + ) -> bool { + if pattern_id.is_empty() || pattern_id == "*" { + return true; } - // Parse source and target - if let Some(ref values) = lino_link.values { - if values.len() >= 2 { - if let Some(ref source_id) = values[0].id { - source = - self.resolve_id(storage, source_id, variable_assignments, default_value)?; - } - if let Some(ref target_id) = values[1].id { - target = - self.resolve_id(storage, target_id, variable_assignments, default_value)?; + if Self::is_variable(pattern_id) { + return current_solution + .get(pattern_id) + .is_none_or(|existing| *existing == candidate_id); + } + + if let Ok(parsed) = pattern_id.parse::() { + return parsed == candidate_id; + } + + storage + .get_by_name(pattern_id) + .is_some_and(|named_id| named_id == candidate_id) + } + + fn resolve_match_id( + &self, + storage: &LinkStorage, + identifier: &str, + current_solution: &HashMap, + ) -> u32 { + if identifier.is_empty() || identifier == "*" { + return u32::MAX; + } + if let Some(value) = current_solution.get(identifier) { + return *value; + } + if Self::is_variable(identifier) { + return u32::MAX; + } + if let Ok(parsed) = identifier.parse::() { + return parsed; + } + storage.get_by_name(identifier).unwrap_or(0) + } + + fn matched_links( + &self, + storage: &LinkStorage, + pattern: &Pattern, + solution: &HashMap, + ) -> Vec { + if pattern.is_leaf() { + let resolved_index = self.resolve_match_id(storage, &pattern.index, solution); + return storage + .all() + .into_iter() + .filter(|link| Self::is_any(resolved_index) || link.index == resolved_index) + .copied() + .collect(); + } + + self.match_pattern(storage, pattern, solution) + .into_iter() + .filter_map(|matched_solution| { + self.resolve_pattern_readonly(storage, pattern, &matched_solution, false) + }) + .flat_map(|definition| self.links_matching_definition(storage, &definition)) + .collect() + } + + fn solution_is_no_operation( + &self, + storage: &LinkStorage, + solution: &HashMap, + restrictions: &[Pattern], + substitutions: &[Pattern], + ) -> bool { + let mut restriction_links = self + .resolve_patterns_readonly(storage, restrictions, solution, false) + .into_iter() + .map(|definition| definition.to_link()) + .collect::>(); + let mut substitution_links = self + .resolve_patterns_readonly(storage, substitutions, solution, true) + .into_iter() + .map(|definition| definition.to_link()) + .collect::>(); + + restriction_links.sort_by_key(|link| link.index); + substitution_links.sort_by_key(|link| link.index); + + restriction_links == substitution_links + } + + fn resolve_patterns_readonly( + &self, + storage: &LinkStorage, + patterns: &[Pattern], + solution: &HashMap, + is_substitution: bool, + ) -> Vec { + patterns + .iter() + .filter_map(|pattern| { + self.resolve_pattern_readonly(storage, pattern, solution, is_substitution) + }) + .collect() + } + + fn resolve_pattern_readonly( + &self, + storage: &LinkStorage, + pattern: &Pattern, + solution: &HashMap, + is_substitution: bool, + ) -> Option { + if pattern.is_leaf() { + let index = self.resolve_identifier_readonly( + storage, + &pattern.index, + solution, + if is_substitution { 0 } else { u32::MAX }, + ); + return Some(ResolvedLink::new(index, u32::MAX, u32::MAX, None)); + } + + let source = self + .resolve_pattern_readonly( + storage, + pattern.source.as_deref()?, + solution, + is_substitution, + )? + .index; + let target = self + .resolve_pattern_readonly( + storage, + pattern.target.as_deref()?, + solution, + is_substitution, + )? + .index; + let default_index = if is_substitution { 0 } else { u32::MAX }; + let index = + self.resolve_identifier_readonly(storage, &pattern.index, solution, default_index); + + Some(ResolvedLink::new(index, source, target, None)) + } + + fn resolve_identifier_readonly( + &self, + storage: &LinkStorage, + identifier: &str, + solution: &HashMap, + default_value: u32, + ) -> u32 { + if identifier.is_empty() { + return default_value; + } + if identifier == "*" { + return u32::MAX; + } + if let Some(value) = solution.get(identifier) { + return *value; + } + if Self::is_variable(identifier) { + return default_value; + } + if let Ok(parsed) = identifier.parse::() { + return parsed; + } + storage.get_by_name(identifier).unwrap_or(default_value) + } + + fn resolve_patterns( + &self, + storage: &mut LinkStorage, + patterns: &[Pattern], + solution: &HashMap, + is_substitution: bool, + ) -> Result> { + patterns + .iter() + .map(|pattern| self.resolve_pattern(storage, pattern, solution, is_substitution)) + .collect() + } + + fn resolve_pattern( + &self, + storage: &mut LinkStorage, + pattern: &Pattern, + solution: &HashMap, + is_substitution: bool, + ) -> Result { + if pattern.is_leaf() { + let index = self.resolve_identifier( + storage, + &pattern.index, + solution, + if is_substitution { 0 } else { u32::MAX }, + is_substitution, + )?; + return Ok(ResolvedLink::new(index, u32::MAX, u32::MAX, None)); + } + + let source = self + .resolve_pattern( + storage, + pattern.source.as_deref().unwrap(), + solution, + is_substitution, + )? + .index; + let target = self + .resolve_pattern( + storage, + pattern.target.as_deref().unwrap(), + solution, + is_substitution, + )? + .index; + let default_index = if is_substitution { 0 } else { u32::MAX }; + let mut index = + self.resolve_identifier(storage, &pattern.index, solution, default_index, false)?; + let mut name = None; + + if is_substitution + && !pattern.index.is_empty() + && !Self::is_numeric_or_wildcard(&pattern.index) + { + name = Some(pattern.index.clone()); + if index == 0 { + if let Some(existing_id) = storage.search(source, target) { + index = existing_id; } } } - Ok(Link::new(index, source, target)) + Ok(ResolvedLink::new(index, source, target, name)) } - /// Resolves an ID string to a numeric value - fn resolve_id( + fn resolve_identifier( &self, storage: &mut LinkStorage, - id: &str, - variable_assignments: &HashMap, + identifier: &str, + solution: &HashMap, default_value: u32, + create_named_leaf: bool, ) -> Result { - if id.is_empty() { + if identifier.is_empty() { return Ok(default_value); } + if identifier == "*" { + return Ok(u32::MAX); + } + if let Some(value) = solution.get(identifier) { + return Ok(*value); + } + if Self::is_variable(identifier) { + return Ok(default_value); + } + if let Ok(parsed) = identifier.parse::() { + return Ok(parsed); + } + if let Some(named_id) = storage.get_by_name(identifier) { + return Ok(named_id); + } + if create_named_leaf { + return Ok(storage.get_or_create_named(identifier)); + } + Ok(default_value) + } - if id == "*" { - return Ok(u32::MAX); // ANY constant + fn determine_operations( + &self, + restrictions: &[ResolvedLink], + substitutions: &[ResolvedLink], + ) -> Vec<(Option, Option)> { + let mut operations = Vec::new(); + let mut restriction_by_index = HashMap::new(); + let mut substitution_by_index = HashMap::new(); + let mut wildcard_restrictions = Vec::new(); + let mut wildcard_substitutions = Vec::new(); + + for restriction in restrictions { + if Self::is_normal_index(restriction.index) { + restriction_by_index.insert(restriction.index, restriction.clone()); + } else { + wildcard_restrictions.push(restriction.clone()); + } } - // Check if it's a variable - if id.starts_with('$') { - if let Some(&value) = variable_assignments.get(id) { - return Ok(value); + for substitution in substitutions { + if Self::is_normal_index(substitution.index) { + substitution_by_index.insert(substitution.index, substitution.clone()); + } else { + wildcard_substitutions.push(substitution.clone()); } - return Ok(default_value); } - // Try to parse as number - if let Ok(num) = id.parse::() { - return Ok(num); + let mut all_indices = restriction_by_index + .keys() + .chain(substitution_by_index.keys()) + .copied() + .collect::>(); + all_indices.sort_unstable(); + all_indices.dedup(); + + for index in all_indices { + match ( + restriction_by_index.get(&index), + substitution_by_index.get(&index), + ) { + (Some(before), Some(after)) => { + operations.push((Some(before.clone()), Some(after.clone()))); + } + (Some(before), None) => operations.push((Some(before.clone()), None)), + (None, Some(after)) => operations.push((None, Some(after.clone()))), + (None, None) => {} + } } - // Try to resolve as name - if let Some(link_id) = storage.get_by_name(id) { - return Ok(link_id); + operations.extend( + wildcard_restrictions + .into_iter() + .map(|restriction| (Some(restriction), None)), + ); + operations.extend( + wildcard_substitutions + .into_iter() + .map(|substitution| (None, Some(substitution))), + ); + + operations + } + + fn apply_operation( + &self, + storage: &mut LinkStorage, + before: Option, + after: Option, + changes: &mut Vec<(Option, Option)>, + ) -> Result<()> { + match (before, after) { + (Some(before), None) => { + let mut links = self.links_matching_definition(storage, &before); + links.sort_by_key(|link| link.index); + links.dedup_by_key(|link| link.index); + for link in links { + if storage.exists(link.index) { + let deleted = storage.delete(link.index)?; + changes.push((Some(deleted), None)); + } + } + } + (None, Some(after)) => { + let created = self.create_or_update_resolved_link(storage, &after)?; + changes.push((None, Some(created))); + } + (Some(before), Some(after)) => { + if before.index == after.index && storage.exists(before.index) { + let before_link = *storage.get(before.index).unwrap(); + if before_link.source != after.source || before_link.target != after.target { + storage.update(before.index, after.source, after.target)?; + } + if let Some(name) = &after.name { + storage.set_name(before.index, name); + } + let after_link = *storage.get(before.index).unwrap(); + changes.push((Some(before_link), Some(after_link))); + } else { + self.apply_operation(storage, Some(before), None, changes)?; + self.apply_operation(storage, None, Some(after), changes)?; + } + } + (None, None) => {} } - // Create as name if not found - Ok(storage.get_or_create_named(id)) + Ok(()) } - /// Resolves the ID of a LinoLink - fn resolve_link_id(&self, storage: &mut LinkStorage, lino_link: &LinoLink) -> Result { - let empty_map = HashMap::new(); - if let Some(ref id) = lino_link.id { - self.resolve_id(storage, id, &empty_map, 0) + fn create_or_update_resolved_link( + &self, + storage: &mut LinkStorage, + definition: &ResolvedLink, + ) -> Result { + let id = if Self::is_normal_index(definition.index) { + storage.ensure_created(definition.index); + storage.update(definition.index, definition.source, definition.target)?; + definition.index + } else if let Some(existing_id) = storage.search(definition.source, definition.target) { + existing_id } else { - Ok(0) + storage.create(definition.source, definition.target) + }; + + if let Some(name) = &definition.name { + storage.set_name(id, name); } + + Ok(*storage.get(id).unwrap()) + } + + fn links_matching_definition( + &self, + storage: &LinkStorage, + definition: &ResolvedLink, + ) -> Vec { + storage + .all() + .into_iter() + .filter(|link| { + (definition.index == 0 + || Self::is_any(definition.index) + || link.index == definition.index) + && (Self::is_any(definition.source) || link.source == definition.source) + && (Self::is_any(definition.target) || link.target == definition.target) + }) + .copied() + .collect() + } + + fn assign_variable(id: &str, value: u32, assignments: &mut HashMap) { + if Self::is_variable(id) && value != 0 { + assignments.insert(id.to_string(), value); + } + } + + fn is_variable(identifier: &str) -> bool { + !identifier.is_empty() && identifier.starts_with('$') + } + + fn is_any(value: u32) -> bool { + value == u32::MAX + } + + fn is_normal_index(value: u32) -> bool { + value != 0 && !Self::is_any(value) + } + + fn is_numeric_or_wildcard(identifier: &str) -> bool { + identifier == "*" || identifier.parse::().is_ok() } /// Ensures a link is created from a LinoLink pattern diff --git a/rust/tests/query_processor_csharp_parity_tests.rs b/rust/tests/query_processor_csharp_parity_tests.rs new file mode 100644 index 0000000..4186565 --- /dev/null +++ b/rust/tests/query_processor_csharp_parity_tests.rs @@ -0,0 +1,207 @@ +//! C# AdvancedMixedQueryProcessor parity tests. + +use anyhow::Result; +use link_cli::{Link, LinkStorage, QueryProcessor}; +use tempfile::NamedTempFile; + +fn with_storage(test: impl FnOnce(&mut LinkStorage, &QueryProcessor) -> Result<()>) -> Result<()> { + let temp_file = NamedTempFile::new()?; + let db_path = temp_file.path().to_str().unwrap(); + let mut storage = LinkStorage::new(db_path, false)?; + let processor = QueryProcessor::new(false); + test(&mut storage, &processor) +} + +fn sorted_links(storage: &LinkStorage) -> Vec { + let mut links: Vec = storage.all().into_iter().copied().collect(); + links.sort_by_key(|link| link.index); + links +} + +fn assert_link_exists(storage: &LinkStorage, index: u32, source: u32, target: u32) { + let link = storage + .get(index) + .unwrap_or_else(|| panic!("missing link {index}: {source} {target}")); + assert_eq!(*link, Link::new(index, source, target)); +} + +#[test] +fn test_unwrapped_create_query_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "() ((1: 1 1))")?; + + assert_eq!(storage.all().len(), 1); + assert_link_exists(storage, 1, 1, 1); + Ok(()) + }) +} + +#[test] +fn test_create_explicit_index_after_gap_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((3: 3 3)))")?; + + assert_eq!(storage.all().len(), 1); + assert_link_exists(storage, 3, 3, 3); + Ok(()) + }) +} + +#[test] +fn test_create_deep_nested_numeric_links_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() (((1 1) ((2 2) ((3 3) ((4 4) (5 5)))))))")?; + + assert_eq!(storage.all().len(), 9); + assert_link_exists(storage, 1, 1, 1); + assert_link_exists(storage, 2, 2, 2); + assert_link_exists(storage, 3, 3, 3); + assert_link_exists(storage, 4, 4, 4); + assert_link_exists(storage, 5, 5, 5); + assert_link_exists(storage, 6, 4, 5); + assert_link_exists(storage, 7, 3, 6); + assert_link_exists(storage, 8, 2, 7); + assert_link_exists(storage, 9, 1, 8); + Ok(()) + }) +} + +#[test] +fn test_delete_by_source_target_pattern_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((1 2)))")?; + processor.process_query(storage, "(() ((2 2)))")?; + + processor.process_query(storage, "(((1 2)) ())")?; + + assert_eq!(storage.all().len(), 1); + assert_link_exists(storage, 2, 2, 2); + Ok(()) + }) +} + +#[test] +fn test_delete_by_wildcard_target_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((1 2) (2 2)))")?; + + processor.process_query(storage, "(((1 *)) ())")?; + + assert_eq!(storage.all().len(), 1); + assert_link_exists(storage, 2, 2, 2); + Ok(()) + }) +} + +#[test] +fn test_delete_all_by_index_wildcard_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((1 2) (2 2)))")?; + + processor.process_query(storage, "(((*:)) ())")?; + + assert!(storage.all().is_empty()); + Ok(()) + }) +} + +#[test] +fn test_swap_all_links_using_variables_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((1 2) (2 1)))")?; + + processor.process_query( + storage, + "((($index: $source $target)) (($index: $target $source)))", + )?; + + assert_eq!(storage.all().len(), 2); + assert_link_exists(storage, 1, 2, 1); + assert_link_exists(storage, 2, 1, 2); + Ok(()) + }) +} + +#[test] +fn test_no_op_variable_query_returns_matched_changes() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((1 1)))")?; + processor.process_query(storage, "(() ((2 2)))")?; + + let changes = processor.process_query( + storage, + "((($index: $source $target)) (($index: $source $target)))", + )?; + + assert_eq!( + sorted_links(storage), + vec![Link::new(1, 1, 1), Link::new(2, 2, 2)] + ); + assert_eq!(changes.len(), 2); + assert!(changes.contains(&(Some(Link::new(1, 1, 1)), Some(Link::new(1, 1, 1))))); + assert!(changes.contains(&(Some(Link::new(2, 2, 2)), Some(Link::new(2, 2, 2))))); + Ok(()) + }) +} + +#[test] +fn test_named_link_rename_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((child: father mother)))")?; + + processor.process_query(storage, "(((child: father mother)) ((son: father mother)))")?; + + assert_eq!(storage.get_by_name("child"), None); + let son_id = storage.get_by_name("son").expect("son should exist"); + let father_id = storage.get_by_name("father").expect("father should exist"); + let mother_id = storage.get_by_name("mother").expect("mother should exist"); + assert_link_exists(storage, son_id, father_id, mother_id); + assert_eq!(storage.all().len(), 3); + Ok(()) + }) +} + +#[test] +fn test_delete_by_names_keeps_leaf_names_matches_csharp() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((child: father mother)))")?; + + processor.process_query(storage, "(((child: father mother)) ())")?; + + assert_eq!(storage.get_by_name("child"), None); + assert!(storage.get_by_name("father").is_some()); + assert!(storage.get_by_name("mother").is_some()); + assert_eq!(storage.all().len(), 2); + Ok(()) + }) +} + +#[test] +fn test_unknown_named_restriction_matches_nothing() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((known: left right)))")?; + + let changes = processor.process_query(storage, "(((unknown: left right)) ())")?; + + assert!(changes.is_empty()); + assert_eq!(storage.all().len(), 3); + assert!(storage.get_by_name("known").is_some()); + assert!(storage.get_by_name("unknown").is_none()); + Ok(()) + }) +} + +#[test] +fn test_string_composite_left_child_does_not_create_extra_leaf() -> Result<()> { + with_storage(|storage, processor| { + processor.process_query(storage, "(() ((type: type type)))")?; + processor.process_query(storage, "(() ((link: link type)))")?; + + let type_id = storage.get_by_name("type").expect("type should exist"); + let link_id = storage.get_by_name("link").expect("link should exist"); + assert_eq!(storage.all().len(), 2); + assert_link_exists(storage, type_id, type_id, type_id); + assert_link_exists(storage, link_id, link_id, type_id); + Ok(()) + }) +} From 30c354d910eac3f2169db0747f1e797508321a7c Mon Sep 17 00:00:00 2001 From: konard Date: Thu, 30 Apr 2026 04:44:00 +0000 Subject: [PATCH 3/6] feat: wire Rust basis crates --- docs/case-studies/issue-67/README.md | 9 +- rust/Cargo.lock | 251 +++++++++++++++++- rust/Cargo.toml | 9 +- .../20260430_041900_rust_query_parity.md | 2 +- rust/src/lib.rs | 2 +- rust/src/link.rs | 15 ++ rust/src/main.rs | 2 +- rust/src/parser.rs | 154 +---------- rust/src/query_processor.rs | 6 +- rust/tests/dependency_basis_tests.rs | 24 ++ rust/tests/link_tests.rs | 13 +- rust/tests/parser_tests.rs | 15 ++ 12 files changed, 345 insertions(+), 157 deletions(-) create mode 100644 rust/tests/dependency_basis_tests.rs diff --git a/docs/case-studies/issue-67/README.md b/docs/case-studies/issue-67/README.md index c6326aa..667eabe 100644 --- a/docs/case-studies/issue-67/README.md +++ b/docs/case-studies/issue-67/README.md @@ -9,7 +9,8 @@ This case study captures the requirements from issue 67, the external components As of 2026-04-30: - `linksplatform/doublets-rs`: GitHub repository , latest GitHub release `v0.3.0`; crates.io package appears as `doublets = "0.3.0"`. -- `link-foundation/links-notation`: GitHub repository , latest GitHub release `0.13.0_csharp`; crates.io package appears as `links-notation = "0.13.0"`. +- `link-foundation/links-notation`: GitHub repository , latest Rust tag `rust_0.13.0`; crates.io package appears as `links-notation = "0.13.0"`. +- `link-foundation/lino-arguments`: GitHub repository , latest Rust release `0.3.0`; crates.io package appears as `lino-arguments = "0.3.0"`. - `linksplatform/Data.Doublets.Sequences`: GitHub repository , latest GitHub release `csharp_0.6.5`. - CI/CD templates requested by the issue: - @@ -20,7 +21,7 @@ As of 2026-04-30: | Requirement | Current status | Solution plan | | --- | --- | --- | -| Use the latest `doublets-rs` and `links-notation` as a Rust basis. | The current Rust package still uses local storage/parser components. The current published Rust crates are `doublets = "0.3.0"` and `links-notation = "0.13.0"`. | Introduce these crates behind small adapter modules so CLI behavior remains stable while storage and notation parsing are swapped incrementally. Start with parser fixtures, then storage fixtures, then binary compatibility fixtures. | +| Use the latest `doublets-rs`, `links-notation`, and `lino-arguments` as a Rust basis. | `rust/Cargo.toml` now declares `doublets = "0.3.0"`, `links-notation = "0.13.0"`, and `lino-arguments = "0.3.0"` with upstream source links. The Rust parser delegates to `links-notation`, CLI parsing uses `lino-arguments`, and the local link model has conversion coverage for `doublets::Link`. | Continue replacing local storage internals behind compatibility tests so public CLI behavior remains stable while binary storage parity is developed. | | Reimplement sequence support in pure Rust based on `Data.Doublets.Sequences`. | The Rust implementation has link storage and query operations, but no dedicated sequence layer matching the C# package. | Port sequence primitives as a separate Rust module with C# fixture parity tests for creation, traversal, Unicode text, deletion, and persistence. | | Match C# Unicode support and binary file compatibility. | Existing Rust tests cover named links and persistence basics. Cross-runtime binary fixtures are not yet complete. | Add C#-generated binary fixtures and Rust-generated binary fixtures, then verify both runtimes can read each file without data loss. Include non-ASCII names and multi-codepoint text cases. | | Support the same CLI options, features, and tests as C#. | The repository already has C# and Rust test suites. This PR closes concrete query semantics gaps found against the C# `AdvancedMixedQueryProcessor` behavior. | Continue converting C# tests into Rust parity tests by feature area: storage, parser, query processor, CLI commands, persistence, and sequences. | @@ -39,12 +40,14 @@ This PR focuses on query processor parity gaps that were blocking Rust behavior - Applies variable-driven swaps and replacements using solution bindings from the restriction side. - Returns matched changes for no-op variable substitutions, matching the C# behavior. - Reuses existing structural links for named composite substitutions before applying a new name, avoiding accidental duplicate leaf creation. +- Declares and compiles the requested Rust basis crates: `doublets`, `links-notation`, and `lino-arguments`. +- Uses `links-notation` for parsing, including richer quoted Unicode identifiers, and uses `lino-arguments` as the CLI argument parser entrypoint. The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs`. ## Next Parity Work -1. Add dependency adapter experiments for `doublets` and `links-notation` without replacing public CLI behavior in a single step. +1. Replace the remaining local text-file storage internals with a `doublets`-backed adapter behind compatibility tests. 2. Build cross-runtime fixture tests for binary file compatibility and Unicode names/text. 3. Port sequence primitives from the C# sequence package into Rust with fixture-driven tests. 4. Expand Rust CLI tests until every C# CLI behavior has a corresponding Rust assertion. diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 9809a78..05b28f9 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "allocator-api2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c880a97d28a3681c0267bd29cff89621202715b065127cd445fa0f0fe0aa2880" + [[package]] name = "anstream" version = "0.6.21" @@ -58,6 +64,18 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bitflags" version = "2.10.0" @@ -116,6 +134,59 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "ctor" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6" +dependencies = [ + "ctor-proc-macro", + "dtor", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2" + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "doublets" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5acc4914b466aabfcf42c60b4adbb6e3fc38fb241cd30f50eab36fcc7e9872" +dependencies = [ + "cfg-if", + "leak_slice", + "platform-data", + "platform-mem", + "platform-num", + "platform-trees", + "tap", + "thiserror 2.0.18", +] + +[[package]] +name = "dtor" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055" + [[package]] name = "errno" version = "0.3.14" @@ -156,6 +227,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "leak_slice" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecf3387da9fb41906394e1306ddd3cd26dd9b7177af11c19b45b364b743aed26" + [[package]] name = "libc" version = "0.2.178" @@ -168,16 +245,81 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", + "doublets", + "links-notation", + "lino-arguments", "tempfile", - "thiserror", + "thiserror 2.0.18", +] + +[[package]] +name = "links-notation" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4c952b42a8c6ff6f849d7cafe3b1e13f1063a51bbb144bc6c62026ab327814c" +dependencies = [ + "nom", +] + +[[package]] +name = "lino-arguments" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be512a5c5eacea6ef5ec015fb0c7e1725c8e4cda1befd31606e203f281069968" +dependencies = [ + "clap", + "ctor", + "dotenvy", + "lino-env", + "serde", + "thiserror 1.0.69", ] +[[package]] +name = "lino-env" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f453c53827aabe91a3d3856d61d14ae3867ab1a4344db22f9fa5396664c8d0e" + [[package]] name = "linux-raw-sys" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -190,6 +332,47 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "platform-data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6782bc71345465116de96d250a36dcf49336010a2320d958d12a5d4390186c90" +dependencies = [ + "beef", + "platform-num", + "thiserror 2.0.18", +] + +[[package]] +name = "platform-mem" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27cff7c92440ac926c8c91ea3151db6e52a262f602d0c157f254e422fc15b12" +dependencies = [ + "allocator-api2", + "memmap2", + "tempfile", + "thiserror 2.0.18", +] + +[[package]] +name = "platform-num" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4ca8e18138b1c90ad802aff931f946a0e6bd760c35af30f1ff2489489ab54a" +dependencies = [ + "num-traits", +] + +[[package]] +name = "platform-trees" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e25a531617fa762c8505826c930f6c1cfcc226f63dea09882b56ae0b8ed078" +dependencies = [ + "platform-num", +] + [[package]] name = "proc-macro2" version = "1.0.104" @@ -227,6 +410,36 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "strsim" version = "0.11.1" @@ -244,6 +457,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.24.0" @@ -259,18 +478,38 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "thiserror-impl", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 278c46f..f137675 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -18,9 +18,16 @@ name = "clink" path = "src/main.rs" [dependencies] -clap = { version = "4.5", features = ["derive"] } thiserror = "2.0" anyhow = "1.0" +clap = { version = "4.5", features = ["derive"] } +# Issue 67 Rust basis crates: +# Source: http://github.com/linksplatform/doublets-rs +doublets = "0.3.0" +# Source: http://github.com/link-foundation/links-notation +links-notation = "0.13.0" +# Source: http://github.com/link-foundation/lino-arguments +lino-arguments = "0.3.0" [dev-dependencies] tempfile = "3.14" diff --git a/rust/changelog.d/20260430_041900_rust_query_parity.md b/rust/changelog.d/20260430_041900_rust_query_parity.md index fbb2b45..1c8240f 100644 --- a/rust/changelog.d/20260430_041900_rust_query_parity.md +++ b/rust/changelog.d/20260430_041900_rust_query_parity.md @@ -2,4 +2,4 @@ bump: minor --- -Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, and named composite rename behavior. +Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, named composite rename behavior, and direct Rust basis dependencies on doublets, links-notation, and lino-arguments. diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 2139475..56bf4b9 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -25,7 +25,7 @@ mod query_processor; // Re-export main types for easy access pub use changes_simplifier::simplify_changes; pub use error::LinkError; -pub use link::Link; +pub use link::{DoubletsLink, Link}; pub use link_storage::LinkStorage; pub use lino_link::LinoLink; pub use parser::Parser; diff --git a/rust/src/link.rs b/rust/src/link.rs index 72d4881..e963f84 100644 --- a/rust/src/link.rs +++ b/rust/src/link.rs @@ -36,3 +36,18 @@ impl Link { format!("({} {} {})", self.index, self.source, self.target) } } + +/// Link type from the upstream `doublets` crate used as the Rust basis. +pub type DoubletsLink = doublets::Link; + +impl From for Link { + fn from(link: DoubletsLink) -> Self { + Self::new(link.index, link.source, link.target) + } +} + +impl From for DoubletsLink { + fn from(link: Link) -> Self { + Self::new(link.index, link.source, link.target) + } +} diff --git a/rust/src/main.rs b/rust/src/main.rs index 946b1e2..9efbb9c 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -4,8 +4,8 @@ //! similar functionality to the C# version. use anyhow::Result; -use clap::Parser; use link_cli::{LinkStorage, QueryProcessor}; +use lino_arguments::Parser; /// Link CLI - A CLI tool for managing links data store #[derive(Parser, Debug)] diff --git a/rust/src/parser.rs b/rust/src/parser.rs index 50ebd79..afec6b5 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -1,10 +1,11 @@ -//! LiNo Parser - Parses LiNo notation into LinoLink structures +//! LiNo Parser - Parses LiNo notation into LinoLink structures. //! -//! This module provides the Parser for LiNo notation, corresponding to -//! Platform.Protocols.Lino.Parser in C#. +//! This module adapts the upstream `links-notation` parser into the local +//! `LinoLink` representation used by the query processor. use crate::error::LinkError; use crate::lino_link::LinoLink; +use links_notation::{parse_lino_to_links, LiNo}; /// Parser for LiNo notation /// Corresponds to Platform.Protocols.Lino.Parser in C# @@ -18,147 +19,20 @@ impl Parser { /// Parses a LiNo query string into a list of LinoLinks pub fn parse(&self, query: &str) -> Result, LinkError> { - let query = query.trim(); - if query.is_empty() { - return Ok(vec![]); - } - - let mut result = Vec::new(); - let mut pos = 0; - let chars: Vec = query.chars().collect(); - - while pos < chars.len() { - self.skip_whitespace(&chars, &mut pos); - if pos >= chars.len() { - break; - } - - if chars[pos] == '(' { - let link = self.parse_link(&chars, &mut pos)?; - result.push(link); - } else { - // Handle top-level identifiers - let id = self.parse_identifier(&chars, &mut pos)?; - result.push(LinoLink::new(Some(id))); - } - } - - Ok(result) + parse_lino_to_links(query) + .map(|links| links.into_iter().map(Self::convert_link).collect()) + .map_err(|error| LinkError::ParseError(error.to_string())) } - /// Parses a single link starting at the given position - fn parse_link(&self, chars: &[char], pos: &mut usize) -> Result { - if *pos >= chars.len() || chars[*pos] != '(' { - return Err(LinkError::ParseError( - "Expected '(' at start of link".to_string(), - )); - } - *pos += 1; // consume '(' - - self.skip_whitespace(chars, pos); - - let mut id: Option = None; - let mut values: Vec = Vec::new(); - - // Parse content until ')' - while *pos < chars.len() && chars[*pos] != ')' { - self.skip_whitespace(chars, pos); - - if *pos >= chars.len() || chars[*pos] == ')' { - break; + fn convert_link(link: LiNo) -> LinoLink { + match link { + LiNo::Ref(id) => LinoLink::new(Some(id)), + LiNo::Link { id, values } if values.is_empty() => { + id.map(|id| LinoLink::new(Some(id))).unwrap_or_default() } - - if chars[*pos] == '(' { - // Nested link - let nested = self.parse_link(chars, pos)?; - values.push(nested); - } else { - // Identifier or ID - let identifier = self.parse_identifier(chars, pos)?; - - // Check if this is an ID (ends with ':') - if identifier.ends_with(':') { - // This is the link's ID - let clean_id = identifier.trim_end_matches(':').to_string(); - id = Some(clean_id); - } else { - // This is a value - values.push(LinoLink::new(Some(identifier))); - } + LiNo::Link { id, values } => { + LinoLink::with_values(id, values.into_iter().map(Self::convert_link).collect()) } - - self.skip_whitespace(chars, pos); - } - - // Consume ')' - if *pos < chars.len() && chars[*pos] == ')' { - *pos += 1; - } - - // If no explicit ID but we have values, the first non-nested element might be the ID - // This handles cases like "(id source target)" where id is the index - if id.is_none() && !values.is_empty() { - // Check if first value could be an ID (single identifier, not a nested link) - let first = &values[0]; - if !first.has_values() && first.id.is_some() { - // Don't auto-promote to ID - keep as first value - } - } - - if values.is_empty() && id.is_some() { - Ok(LinoLink::new(id)) - } else if values.is_empty() { - Ok(LinoLink::default()) - } else { - Ok(LinoLink::with_values(id, values)) - } - } - - /// Parses an identifier (name, number, variable, or wildcard) - fn parse_identifier(&self, chars: &[char], pos: &mut usize) -> Result { - self.skip_whitespace(chars, pos); - - if *pos >= chars.len() { - return Err(LinkError::ParseError("Unexpected end of input".to_string())); - } - - let start = *pos; - - // Handle quoted strings - if chars[*pos] == '"' || chars[*pos] == '\'' { - let quote = chars[*pos]; - *pos += 1; - while *pos < chars.len() && chars[*pos] != quote { - if chars[*pos] == '\\' && *pos + 1 < chars.len() { - *pos += 2; // Skip escaped character - } else { - *pos += 1; - } - } - if *pos < chars.len() { - *pos += 1; // consume closing quote - } - let content: String = chars[start + 1..*pos - 1].iter().collect(); - return Ok(content); - } - - // Handle regular identifiers - while *pos < chars.len() { - let c = chars[*pos]; - if c.is_whitespace() || c == '(' || c == ')' { - break; - } - *pos += 1; - } - - let identifier: String = chars[start..*pos].iter().collect(); - Ok(identifier) - } - - /// Skips whitespace characters - fn skip_whitespace(&self, chars: &[char], pos: &mut usize) { - while *pos < chars.len() && chars[*pos].is_whitespace() { - *pos += 1; } } } diff --git a/rust/src/query_processor.rs b/rust/src/query_processor.rs index 8eb358e..14abe43 100644 --- a/rust/src/query_processor.rs +++ b/rust/src/query_processor.rs @@ -136,7 +136,7 @@ impl QueryProcessor { let mut changes_list = Vec::new(); // If both restriction and substitution are empty, do nothing - if restriction_link.values_count() == 0 && substitution_link.values_count() == 0 { + if restriction_link.is_empty() && substitution_link.is_empty() { self.trace_msg( "[ProcessQuery] Restriction & substitution both empty => no operation, returning.", ); @@ -144,7 +144,7 @@ impl QueryProcessor { } // Creation scenario: no restriction, only substitution - if restriction_link.values_count() == 0 && substitution_link.values_count() > 0 { + if restriction_link.is_empty() && !substitution_link.is_empty() { self.trace_msg( "[ProcessQuery] No restriction, but substitution is non-empty => creation scenario.", ); @@ -165,7 +165,7 @@ impl QueryProcessor { } // Deletion scenario: restriction but no substitution - if restriction_link.values_count() > 0 && substitution_link.values_count() == 0 { + if !restriction_link.is_empty() && substitution_link.is_empty() { self.trace_msg( "[ProcessQuery] Restriction non-empty, substitution empty => deletion scenario.", ); diff --git a/rust/tests/dependency_basis_tests.rs b/rust/tests/dependency_basis_tests.rs new file mode 100644 index 0000000..89c177a --- /dev/null +++ b/rust/tests/dependency_basis_tests.rs @@ -0,0 +1,24 @@ +//! Regression tests for issue-67 Rust basis dependencies. + +const CARGO_TOML: &str = include_str!("../Cargo.toml"); + +#[test] +fn rust_manifest_declares_required_basis_crates() { + for dependency in ["doublets", "links-notation", "lino-arguments"] { + assert!( + CARGO_TOML.contains(&format!("{dependency} =")), + "rust/Cargo.toml must declare {dependency} as a direct dependency" + ); + } + + for source in [ + "http://github.com/linksplatform/doublets-rs", + "http://github.com/link-foundation/links-notation", + "http://github.com/link-foundation/lino-arguments", + ] { + assert!( + CARGO_TOML.contains(source), + "rust/Cargo.toml should document the upstream source {source}" + ); + } +} diff --git a/rust/tests/link_tests.rs b/rust/tests/link_tests.rs index 5744b7c..eaa13ab 100644 --- a/rust/tests/link_tests.rs +++ b/rust/tests/link_tests.rs @@ -1,6 +1,6 @@ //! Tests for the Link module -use link_cli::Link; +use link_cli::{DoubletsLink, Link}; #[test] fn test_link_creation() { @@ -33,3 +33,14 @@ fn test_link_format() { let link = Link::new(1, 2, 3); assert_eq!(link.format(), "(1 2 3)"); } + +#[test] +fn test_link_round_trips_through_doublets_link() { + let platform_link = DoubletsLink::new(1, 2, 3); + let local_link = Link::from(platform_link); + + assert_eq!(local_link, Link::new(1, 2, 3)); + + let platform_link = DoubletsLink::from(local_link); + assert_eq!(platform_link, DoubletsLink::new(1, 2, 3)); +} diff --git a/rust/tests/parser_tests.rs b/rust/tests/parser_tests.rs index e44b180..1bfca53 100644 --- a/rust/tests/parser_tests.rs +++ b/rust/tests/parser_tests.rs @@ -58,3 +58,18 @@ fn test_parse_query_format() { assert_eq!(result.len(), 1); assert_eq!(result[0].values_count(), 2); } + +#[test] +fn test_parse_links_notation_backtick_unicode_identifier() { + let parser = Parser::new(); + let result = parser + .parse("(`ссылка с пробелом`: `источник` `цель`)") + .unwrap(); + + assert_eq!(result.len(), 1); + assert_eq!(result[0].id, Some("ссылка с пробелом".to_string())); + + let values = result[0].values.as_ref().unwrap(); + assert_eq!(values[0].id, Some("источник".to_string())); + assert_eq!(values[1].id, Some("цель".to_string())); +} From 73e0d8612b618524129667675c6a6e985c683498 Mon Sep 17 00:00:00 2001 From: konard Date: Thu, 30 Apr 2026 05:07:50 +0000 Subject: [PATCH 4/6] feat: add Rust unicode string storage parity --- docs/case-studies/issue-67/README.md | 29 +- .../20260430_041900_rust_query_parity.md | 2 +- rust/src/lib.rs | 6 + rust/src/pinned_types.rs | 48 ++ rust/src/unicode_string_storage.rs | 471 ++++++++++++++++++ rust/tests/unicode_string_storage_tests.rs | 184 +++++++ 6 files changed, 736 insertions(+), 4 deletions(-) create mode 100644 rust/src/pinned_types.rs create mode 100644 rust/src/unicode_string_storage.rs create mode 100644 rust/tests/unicode_string_storage_tests.rs diff --git a/docs/case-studies/issue-67/README.md b/docs/case-studies/issue-67/README.md index 667eabe..e113244 100644 --- a/docs/case-studies/issue-67/README.md +++ b/docs/case-studies/issue-67/README.md @@ -22,8 +22,8 @@ As of 2026-04-30: | Requirement | Current status | Solution plan | | --- | --- | --- | | Use the latest `doublets-rs`, `links-notation`, and `lino-arguments` as a Rust basis. | `rust/Cargo.toml` now declares `doublets = "0.3.0"`, `links-notation = "0.13.0"`, and `lino-arguments = "0.3.0"` with upstream source links. The Rust parser delegates to `links-notation`, CLI parsing uses `lino-arguments`, and the local link model has conversion coverage for `doublets::Link`. | Continue replacing local storage internals behind compatibility tests so public CLI behavior remains stable while binary storage parity is developed. | -| Reimplement sequence support in pure Rust based on `Data.Doublets.Sequences`. | The Rust implementation has link storage and query operations, but no dedicated sequence layer matching the C# package. | Port sequence primitives as a separate Rust module with C# fixture parity tests for creation, traversal, Unicode text, deletion, and persistence. | -| Match C# Unicode support and binary file compatibility. | Existing Rust tests cover named links and persistence basics. Cross-runtime binary fixtures are not yet complete. | Add C#-generated binary fixtures and Rust-generated binary fixtures, then verify both runtimes can read each file without data loss. Include non-ASCII names and multi-codepoint text cases. | +| Reimplement sequence support in pure Rust based on `Data.Doublets.Sequences`. | Rust now has `rust/src/unicode_string_storage.rs`, a doublet-backed port of the C# `UnicodeStringStorage` path: pinned type links, raw-number Unicode symbols, balanced Unicode sequence trees, right-sequence walking, string links, and name links. | Continue extending this module toward full package coverage for advanced sequence indexes, compaction, and binary fixture compatibility. | +| Match C# Unicode support and binary file compatibility. | Rust now round-trips empty, ASCII, multilingual, and surrogate-pair text through UTF-16 code units, matching the C# `string`/`char` model used by `Data.Doublets.Sequences`. Cross-runtime binary fixtures are not yet complete. | Add C#-generated binary fixtures and Rust-generated binary fixtures, then verify both runtimes can read each file without data loss. Include non-ASCII names and multi-codepoint text cases. | | Support the same CLI options, features, and tests as C#. | The repository already has C# and Rust test suites. This PR closes concrete query semantics gaps found against the C# `AdvancedMixedQueryProcessor` behavior. | Continue converting C# tests into Rust parity tests by feature area: storage, parser, query processor, CLI commands, persistence, and sequences. | | Keep C# under `./csharp`, Rust under `./rust`, and provide separate workflows. | The repository already has `csharp/`, `rust/`, `.github/workflows/csharp.yml`, and `.github/workflows/rust.yml`. | Preserve this layout. Treat future parity work as package-local changes unless a shared workflow or script must change. | | Compare CI/CD templates and reuse best practices. | Rust and C# workflows exist, and Rust has changelog fragment based release automation. | Audit the requested templates in a follow-up pass focused on workflow drift: permissions, cache keys, test matrix, linting, changelog validation, release trigger, and artifact publishing. | @@ -42,8 +42,31 @@ This PR focuses on query processor parity gaps that were blocking Rust behavior - Reuses existing structural links for named composite substitutions before applying a new name, avoiding accidental duplicate leaf creation. - Declares and compiles the requested Rust basis crates: `doublets`, `links-notation`, and `lino-arguments`. - Uses `links-notation` for parsing, including richer quoted Unicode identifiers, and uses `lino-arguments` as the CLI argument parser entrypoint. +- Adds a Rust `UnicodeStringStorage` implementation based on the C# `Data.Doublets.Sequences` path: + - `PinnedTypes` deterministic type allocation for `Type`, `UnicodeSymbol`, `UnicodeSequence`, `String`, `EmptyString`, and `Name`. + - `Hybrid`-compatible external/raw number encoding for Unicode code units and external references. + - `BalancedVariantConverter`-style sequence tree creation and `RightSequenceWalker`-style traversal. + - `NamedLinks` behavior for internal links and external references, including removal. -The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs`. +The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs` and `rust/tests/unicode_string_storage_tests.rs`. + +## C# To Rust Tree Comparison + +| C# file | Rust counterpart | Status | +| --- | --- | --- | +| `AdvancedMixedQueryProcessor.cs` | `rust/src/query_processor.rs`, `rust/tests/query_processor_csharp_parity_tests.rs` | Implemented for the currently tested advanced mixed query semantics, including structural matching, variables, wildcard deletes, no-op reads, swaps, and named composite renames. | +| `BasicQueryProcessor.cs` | `rust/src/query_processor.rs` | Covered by the shared Rust query processor for create, update, delete, and read scenarios. | +| `MixedQueryProcessor.cs` | `rust/src/query_processor.rs` | Covered by the shared Rust query processor and parity tests for mixed restriction/substitution behavior. | +| `ChangesSimplifier.cs` | `rust/src/changes_simplifier.rs`, `rust/tests/changes_simplifier_tests.rs` | Implemented and tested. | +| `UnicodeStringStorage.cs` | `rust/src/unicode_string_storage.rs`, `rust/tests/unicode_string_storage_tests.rs` | Implemented in this pass for pinned types, UTF-16 Unicode sequences, string links, type names, user types, external-reference names, and removal. | +| `PinnedTypes.cs` | `rust/src/pinned_types.rs`, `rust/tests/unicode_string_storage_tests.rs` | Implemented in this pass. | +| `NamedLinks.cs` | `rust/src/unicode_string_storage.rs`, existing `LinkStorage` name APIs, `rust/tests/unicode_string_storage_tests.rs` | Implemented in this pass for the doublet-backed Unicode storage path; existing CLI-facing name APIs remain compatible with prior Rust query tests. | +| `NamedLinksDecorator.cs` | `rust/src/link_storage.rs`, `rust/src/query_processor.rs` | Partially represented by `LinkStorage` plus query processor name handling. The new Unicode storage module provides the C# name database primitives needed for deeper integration. | +| `SimpleLinksDecorator.cs` | `rust/src/link_storage.rs` | Represented by direct storage create/update/delete/query methods. | +| `LinksExtensions.cs` | `rust/src/link_storage.rs` | Represented by `ensure_created` and explicit-index update paths. | +| `EnumerableExtensions.cs` | Rust destructuring is native pattern syntax; no runtime counterpart required. | Not required as a separate module. | +| `ILinksUnrestricted.cs` | No direct Rust trait yet. | Placeholder C# interface only; add a Rust trait if a future storage adapter needs this abstraction. | +| `Program.cs` | `rust/src/main.rs` | Implemented with matching CLI option aliases and query flow. | ## Next Parity Work diff --git a/rust/changelog.d/20260430_041900_rust_query_parity.md b/rust/changelog.d/20260430_041900_rust_query_parity.md index 1c8240f..31315bb 100644 --- a/rust/changelog.d/20260430_041900_rust_query_parity.md +++ b/rust/changelog.d/20260430_041900_rust_query_parity.md @@ -2,4 +2,4 @@ bump: minor --- -Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, named composite rename behavior, and direct Rust basis dependencies on doublets, links-notation, and lino-arguments. +Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, named composite rename behavior, direct Rust basis dependencies on doublets, links-notation, and lino-arguments, and a doublet-backed Rust port of the C# Unicode string storage path. diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 56bf4b9..248dd50 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -20,7 +20,9 @@ mod link; mod link_storage; mod lino_link; mod parser; +mod pinned_types; mod query_processor; +mod unicode_string_storage; // Re-export main types for easy access pub use changes_simplifier::simplify_changes; @@ -29,4 +31,8 @@ pub use link::{DoubletsLink, Link}; pub use link_storage::LinkStorage; pub use lino_link::LinoLink; pub use parser::Parser; +pub use pinned_types::PinnedTypes; pub use query_processor::{QueryOptions, QueryProcessor}; +pub use unicode_string_storage::{ + external_reference, external_reference_value, NamedLinks, UnicodeStringStorage, +}; diff --git a/rust/src/pinned_types.rs b/rust/src/pinned_types.rs new file mode 100644 index 0000000..aefc125 --- /dev/null +++ b/rust/src/pinned_types.rs @@ -0,0 +1,48 @@ +//! Pinned type allocation compatible with the C# `PinnedTypes` helper. + +use anyhow::{bail, Result}; + +use crate::link_storage::LinkStorage; + +/// Creates or validates the reserved type links at deterministic addresses. +pub struct PinnedTypes<'a> { + links: &'a mut LinkStorage, + current: u32, + initial_source: u32, +} + +impl<'a> PinnedTypes<'a> { + pub fn new(links: &'a mut LinkStorage) -> Self { + Self { + links, + current: 1, + initial_source: 1, + } + } + + pub fn next_type(&mut self) -> Result { + let address = self.current; + if let Some(link) = self.links.get(address) { + if link.index != address || link.source != self.initial_source || link.target != address + { + bail!( + "Unexpected link found at address {address}. Expected: ({address}: {} {address}), Found: ({}: {} {}).", + self.initial_source, + link.index, + link.source, + link.target + ); + } + } else { + let created = self.links.get_or_create(self.initial_source, address); + if created != address { + bail!( + "Pinned type address {address} could not be created deterministically; got {created}." + ); + } + } + + self.current += 1; + Ok(address) + } +} diff --git a/rust/src/unicode_string_storage.rs b/rust/src/unicode_string_storage.rs new file mode 100644 index 0000000..359d690 --- /dev/null +++ b/rust/src/unicode_string_storage.rs @@ -0,0 +1,471 @@ +//! Unicode string and name storage backed by doublet links. +//! +//! This mirrors the C# `UnicodeStringStorage` implementation used by +//! `NamedLinksDecorator`: strings are stored as `String -> UnicodeSequence` +//! links, Unicode sequences are balanced doublet trees, Unicode symbols are +//! `raw-code-unit -> UnicodeSymbol` links, and names are regular doublet links +//! from an internal or external reference to `Name -> String`. + +use anyhow::{bail, Result}; + +use crate::link_storage::LinkStorage; +use crate::pinned_types::PinnedTypes; + +const EXTERNAL_ZERO: u32 = (u32::MAX / 2) + 1; + +/// Encodes an external reference the same way `Platform.Data.Hybrid` does. +pub fn external_reference(value: u32) -> u32 { + if value == 0 { + EXTERNAL_ZERO + } else { + 0u32.wrapping_sub(value) + } +} + +/// Decodes a `Platform.Data.Hybrid` external reference. +pub fn external_reference_value(value: u32) -> Option { + if value == EXTERNAL_ZERO { + Some(0) + } else if value >= EXTERNAL_ZERO { + Some(0u32.wrapping_sub(value)) + } else { + None + } +} + +fn raw_number_from_address(value: u32) -> u32 { + external_reference(value) +} + +fn address_from_raw_number(value: u32) -> u32 { + external_reference_value(value).unwrap_or(value) +} + +/// Link-backed Unicode string storage with C# pinned type layout. +pub struct UnicodeStringStorage<'a> { + links: &'a mut LinkStorage, + type_type: u32, + unicode_symbol_type: u32, + unicode_sequence_type: u32, + string_type: u32, + empty_string_type: u32, + name_type: u32, +} + +impl<'a> UnicodeStringStorage<'a> { + pub fn new(links: &'a mut LinkStorage) -> Result { + let ( + type_type, + unicode_symbol_type, + unicode_sequence_type, + string_type, + empty_string_type, + name_type, + ) = { + let mut pinned_types = PinnedTypes::new(links); + ( + pinned_types.next_type()?, + pinned_types.next_type()?, + pinned_types.next_type()?, + pinned_types.next_type()?, + pinned_types.next_type()?, + pinned_types.next_type()?, + ) + }; + + let mut storage = Self { + links, + type_type, + unicode_symbol_type, + unicode_sequence_type, + string_type, + empty_string_type, + name_type, + }; + + storage.set_name(type_type, "Type")?; + storage.set_name(unicode_symbol_type, "UnicodeSymbol")?; + storage.set_name(unicode_sequence_type, "UnicodeSequence")?; + storage.set_name(string_type, "String")?; + storage.set_name(empty_string_type, "EmptyString")?; + storage.set_name(name_type, "Name")?; + + Ok(storage) + } + + pub fn links_mut(&mut self) -> &mut LinkStorage { + self.links + } + + pub fn into_named_links(self) -> NamedLinks<'a> { + NamedLinks { storage: self } + } + + pub fn type_type(&self) -> u32 { + self.type_type + } + + pub fn unicode_symbol_type(&self) -> u32 { + self.unicode_symbol_type + } + + pub fn unicode_sequence_type(&self) -> u32 { + self.unicode_sequence_type + } + + pub fn string_type(&self) -> u32 { + self.string_type + } + + pub fn empty_string_type(&self) -> u32 { + self.empty_string_type + } + + pub fn name_type(&self) -> u32 { + self.name_type + } + + pub fn create_string(&mut self, content: &str) -> Result { + let string_sequence = self.get_string_sequence(content); + Ok(self.links.get_or_create(self.string_type, string_sequence)) + } + + pub fn get_string(&self, string_value: u32) -> Result { + let mut current = string_value; + for _ in 0..3 { + let Some(link) = self.links.get(current) else { + break; + }; + if link.source == self.string_type { + return if link.target == self.empty_string_type { + Ok(String::new()) + } else { + self.unicode_sequence_to_string(link.target) + }; + } + current = link.target; + } + bail!("The passed link does not contain a string.") + } + + pub fn unicode_sequence_code_units(&self, string_value: u32) -> Result> { + let sequence = self.unwrap_string_sequence(string_value)?; + if sequence == self.empty_string_type { + return Ok(Vec::new()); + } + let unicode_sequence = self + .links + .get(sequence) + .ok_or_else(|| anyhow::anyhow!("Unicode sequence link {sequence} does not exist."))?; + if unicode_sequence.target != self.unicode_sequence_type { + bail!("Link {sequence} is not a Unicode sequence."); + } + let symbol_sequence = unicode_sequence.source; + self.walk_right_sequence(symbol_sequence) + .into_iter() + .map(|symbol| self.unicode_symbol_to_code_unit(symbol)) + .collect() + } + + pub fn get_types(&self) -> Vec { + self.links + .query(None, Some(self.type_type), None) + .into_iter() + .map(|link| link.index) + .collect() + } + + pub fn is_type(&self, address: u32) -> bool { + self.links + .get(address) + .is_some_and(|link| link.source == self.type_type) + } + + pub fn get_or_create_type(&mut self, name: &str) -> Result { + if let Some(existing) = self.get_by_name(name)? { + return Ok(existing); + } + + let type_link = self.links.create(0, 0); + self.links.update(type_link, self.type_type, type_link)?; + self.set_name(type_link, name)?; + Ok(type_link) + } + + pub fn set_name_for_external_reference(&mut self, link: u32, name: &str) -> Result { + self.set_name(external_reference(link), name) + } + + pub fn get_name_by_external_reference(&self, link: u32) -> Result> { + self.get_name(external_reference(link)) + } + + pub fn get_external_reference_by_name(&mut self, name: &str) -> Result> { + Ok(self.get_by_name(name)?.and_then(external_reference_value)) + } + + pub fn remove_name_by_external_reference(&mut self, external_reference_id: u32) -> Result<()> { + self.remove_name(external_reference(external_reference_id)) + } + + pub fn set_name(&mut self, link: u32, name: &str) -> Result { + let name_sequence = self.create_string(name)?; + let name_link = self.links.get_or_create(self.name_type, name_sequence); + Ok(self.links.get_or_create(link, name_link)) + } + + pub fn get_name(&self, link: u32) -> Result> { + for name_pair in self.links.query(None, Some(link), None) { + let name_candidate = name_pair.target; + if self + .links + .get(name_candidate) + .is_some_and(|candidate| candidate.source == self.name_type) + { + return self.get_string(name_candidate).map(Some); + } + } + Ok(None) + } + + pub fn get_by_name(&mut self, name: &str) -> Result> { + let name_sequence = self.create_string(name)?; + let Some(name_link) = self.links.search(self.name_type, name_sequence) else { + return Ok(None); + }; + Ok(self + .links + .query(None, None, Some(name_link)) + .into_iter() + .map(|link| link.source) + .next()) + } + + pub fn remove_name(&mut self, link: u32) -> Result<()> { + let name_pairs = self + .links + .query(None, Some(link), None) + .into_iter() + .map(|link| (link.index, link.target)) + .collect::>(); + + for (name_pair, name_candidate) in name_pairs { + let Some(candidate) = self.links.get(name_candidate).copied() else { + continue; + }; + if candidate.source != self.name_type { + continue; + } + + if self.links.exists(name_pair) { + self.links.delete(name_pair)?; + } + + let still_used = self + .links + .query(None, None, Some(name_candidate)) + .into_iter() + .any(|usage| usage.index != name_pair); + if !still_used && self.links.exists(name_candidate) { + self.links.delete(name_candidate)?; + } + } + + Ok(()) + } + + fn get_string_sequence(&mut self, content: &str) -> u32 { + if content.is_empty() { + self.empty_string_type + } else { + self.string_to_unicode_sequence(content) + } + } + + fn string_to_unicode_sequence(&mut self, content: &str) -> u32 { + let symbols = content + .encode_utf16() + .map(|code_unit| self.code_unit_to_unicode_symbol(code_unit)) + .collect::>(); + self.unicode_symbols_to_unicode_sequence(&symbols) + } + + fn code_unit_to_unicode_symbol(&mut self, code_unit: u16) -> u32 { + let raw_number = raw_number_from_address(u32::from(code_unit)); + self.links + .get_or_create(raw_number, self.unicode_symbol_type) + } + + fn unicode_symbol_to_code_unit(&self, symbol: u32) -> Result { + let Some(link) = self.links.get(symbol) else { + bail!("Unicode symbol link {symbol} does not exist."); + }; + if link.target != self.unicode_symbol_type { + bail!("Specified link {symbol} is not a Unicode symbol."); + } + let code_unit = address_from_raw_number(link.source); + Ok(u16::try_from(code_unit)?) + } + + fn unicode_symbols_to_unicode_sequence(&mut self, symbols: &[u32]) -> u32 { + if symbols.is_empty() { + return self.unicode_sequence_type; + } + let sequence = self.balanced_variant(symbols); + self.links + .get_or_create(sequence, self.unicode_sequence_type) + } + + fn unicode_sequence_to_string(&self, sequence: u32) -> Result { + if sequence == self.unicode_sequence_type { + return Ok(String::new()); + } + + let Some(sequence_link) = self.links.get(sequence) else { + bail!("Unicode sequence link {sequence} does not exist."); + }; + if sequence_link.target != self.unicode_sequence_type { + bail!("Specified link {sequence} is not a Unicode sequence."); + } + + let code_units = self + .walk_right_sequence(sequence_link.source) + .into_iter() + .map(|symbol| self.unicode_symbol_to_code_unit(symbol)) + .collect::>>()?; + Ok(String::from_utf16(&code_units)?) + } + + fn balanced_variant(&mut self, symbols: &[u32]) -> u32 { + match symbols.len() { + 0 => 0, + 1 => symbols[0], + 2 => self.links.get_or_create(symbols[0], symbols[1]), + _ => { + let mut layer = symbols.to_vec(); + while layer.len() > 2 { + let mut next = Vec::with_capacity(layer.len().div_ceil(2)); + let mut chunks = layer.chunks_exact(2); + for pair in &mut chunks { + next.push(self.links.get_or_create(pair[0], pair[1])); + } + if let Some(&remainder) = chunks.remainder().first() { + next.push(remainder); + } + layer = next; + } + self.links.get_or_create(layer[0], layer[1]) + } + } + } + + fn walk_right_sequence(&self, sequence: u32) -> Vec { + let mut output = Vec::new(); + let mut stack = Vec::new(); + let mut element = sequence; + + if self.is_unicode_symbol(element) { + output.push(element); + return output; + } + + loop { + if self.is_unicode_symbol(element) { + let Some(popped) = stack.pop() else { + break; + }; + if let Some(link) = self.links.get(popped) { + if self.is_unicode_symbol(link.source) { + output.push(link.source); + } + if self.is_unicode_symbol(link.target) { + output.push(link.target); + } + element = link.target; + } else { + break; + } + } else { + let Some(link) = self.links.get(element) else { + break; + }; + stack.push(element); + element = link.source; + } + } + + output + } + + fn is_unicode_symbol(&self, link: u32) -> bool { + self.links + .get(link) + .is_some_and(|link| link.target == self.unicode_symbol_type) + } + + fn unwrap_string_sequence(&self, string_value: u32) -> Result { + let mut current = string_value; + for _ in 0..3 { + let Some(link) = self.links.get(current) else { + break; + }; + if link.source == self.string_type { + return Ok(link.target); + } + current = link.target; + } + bail!("The passed link does not contain a string.") + } +} + +/// Public facade matching the C# `NamedLinks` role. +pub struct NamedLinks<'a> { + storage: UnicodeStringStorage<'a>, +} + +impl<'a> NamedLinks<'a> { + pub fn new(links: &'a mut LinkStorage) -> Result { + Ok(UnicodeStringStorage::new(links)?.into_named_links()) + } + + pub fn set_name_for_external_reference(&mut self, link: u32, name: &str) -> Result { + self.storage.set_name_for_external_reference(link, name) + } + + pub fn set_name(&mut self, link: u32, name: &str) -> Result { + self.storage.set_name(link, name) + } + + pub fn get_name_by_external_reference(&self, link: u32) -> Result> { + self.storage.get_name_by_external_reference(link) + } + + pub fn get_name(&self, link: u32) -> Result> { + self.storage.get_name(link) + } + + pub fn get_by_name(&mut self, name: &str) -> Result> { + self.storage.get_by_name(name) + } + + pub fn get_external_reference_by_name(&mut self, name: &str) -> Result> { + self.storage.get_external_reference_by_name(name) + } + + pub fn remove_name(&mut self, link: u32) -> Result<()> { + self.storage.remove_name(link) + } + + pub fn remove_name_by_external_reference(&mut self, external_reference_id: u32) -> Result<()> { + self.storage + .remove_name_by_external_reference(external_reference_id) + } + + pub fn unicode_storage(&self) -> &UnicodeStringStorage<'a> { + &self.storage + } + + pub fn unicode_storage_mut(&mut self) -> &mut UnicodeStringStorage<'a> { + &mut self.storage + } +} diff --git a/rust/tests/unicode_string_storage_tests.rs b/rust/tests/unicode_string_storage_tests.rs new file mode 100644 index 0000000..c2805ee --- /dev/null +++ b/rust/tests/unicode_string_storage_tests.rs @@ -0,0 +1,184 @@ +use anyhow::Result; +use link_cli::{external_reference, LinkStorage, NamedLinks, UnicodeStringStorage}; +use tempfile::NamedTempFile; + +fn with_storage(test: impl FnOnce(&mut UnicodeStringStorage<'_>) -> Result<()>) -> Result<()> { + let temp_file = NamedTempFile::new()?; + let db_path = temp_file.path().to_str().unwrap(); + let mut links = LinkStorage::new(db_path, false)?; + let mut storage = UnicodeStringStorage::new(&mut links)?; + test(&mut storage) +} + +#[test] +fn create_and_retrieve_empty_string() -> Result<()> { + with_storage(|storage| { + let empty = storage.create_string("")?; + assert_eq!("", storage.get_string(empty)?); + Ok(()) + }) +} + +#[test] +fn create_and_retrieve_simple_string() -> Result<()> { + with_storage(|storage| { + let hello = storage.create_string("Hello")?; + assert_eq!("Hello", storage.get_string(hello)?); + Ok(()) + }) +} + +#[test] +fn create_and_retrieve_multiple_strings() -> Result<()> { + with_storage(|storage| { + let first = storage.create_string("First")?; + let second = storage.create_string("Second")?; + + assert_eq!("First", storage.get_string(first)?); + assert_eq!("Second", storage.get_string(second)?); + Ok(()) + }) +} + +#[test] +fn create_and_retrieve_unicode_string_as_utf16_sequence() -> Result<()> { + with_storage(|storage| { + let content = "Hello, 世界! Привет, мир! 😀"; + let link = storage.create_string(content)?; + + assert_eq!(content, storage.get_string(link)?); + assert!(storage.unicode_sequence_code_units(link)?.len() > content.chars().count()); + Ok(()) + }) +} + +#[test] +fn pinned_types_are_created_and_named() -> Result<()> { + with_storage(|storage| { + assert_eq!(1, storage.type_type()); + assert_eq!(2, storage.unicode_symbol_type()); + assert_eq!(3, storage.unicode_sequence_type()); + assert_eq!(4, storage.string_type()); + assert_eq!(5, storage.empty_string_type()); + assert_eq!(6, storage.name_type()); + + for (id, name) in [ + (storage.type_type(), "Type"), + (storage.unicode_symbol_type(), "UnicodeSymbol"), + (storage.unicode_sequence_type(), "UnicodeSequence"), + (storage.string_type(), "String"), + (storage.empty_string_type(), "EmptyString"), + (storage.name_type(), "Name"), + ] { + assert_eq!(Some(id), storage.get_by_name(name)?); + assert_eq!(Some(name.to_string()), storage.get_name(id)?); + } + + Ok(()) + }) +} + +#[test] +fn create_and_retrieve_user_defined_type() -> Result<()> { + with_storage(|storage| { + let user_type = storage.get_or_create_type("UserType")?; + assert_eq!(Some(user_type), storage.get_by_name("UserType")?); + assert_eq!(Some("UserType".to_string()), storage.get_name(user_type)?); + Ok(()) + }) +} + +#[test] +fn name_external_reference_matches_csharp_hybrid_encoding() -> Result<()> { + with_storage(|storage| { + assert_eq!(u32::MAX, external_reference(1)); + + storage.set_name_for_external_reference(1, "MyExternalReference")?; + + assert_eq!( + Some("MyExternalReference".to_string()), + storage.get_name_by_external_reference(1)? + ); + assert_eq!( + Some(1), + storage.get_external_reference_by_name("MyExternalReference")? + ); + assert_eq!( + Some("MyExternalReference".to_string()), + storage.get_name(external_reference(1))? + ); + + Ok(()) + }) +} + +#[test] +fn name_is_removed_when_link_is_deleted() -> Result<()> { + with_storage(|storage| { + let link = storage.links_mut().create(0, 0); + storage.set_name(link, "TestName")?; + assert_eq!(Some("TestName".to_string()), storage.get_name(link)?); + assert_eq!(Some(link), storage.get_by_name("TestName")?); + + storage.links_mut().delete(link)?; + storage.remove_name(link)?; + + assert_eq!(None, storage.get_by_name("TestName")?); + assert_eq!(None, storage.get_name(link)?); + Ok(()) + }) +} + +#[test] +fn deleting_non_named_link_does_not_affect_other_names() -> Result<()> { + with_storage(|storage| { + let named_link = storage.links_mut().create(0, 0); + storage.set_name(named_link, "Named")?; + let unnamed_link = storage.links_mut().create(0, 0); + + storage.links_mut().delete(unnamed_link)?; + + assert_eq!(Some(named_link), storage.get_by_name("Named")?); + assert_eq!(Some("Named".to_string()), storage.get_name(named_link)?); + Ok(()) + }) +} + +#[test] +fn name_is_removed_when_external_reference_is_deleted() -> Result<()> { + with_storage(|storage| { + storage.set_name_for_external_reference(123, "ExternalName")?; + assert_eq!( + Some("ExternalName".to_string()), + storage.get_name_by_external_reference(123)? + ); + assert_eq!( + Some(123), + storage.get_external_reference_by_name("ExternalName")? + ); + + storage.remove_name_by_external_reference(123)?; + + assert_eq!( + None, + storage.get_external_reference_by_name("ExternalName")? + ); + assert_eq!(None, storage.get_name_by_external_reference(123)?); + Ok(()) + }) +} + +#[test] +fn named_links_facade_matches_csharp_named_links_role() -> Result<()> { + let temp_file = NamedTempFile::new()?; + let db_path = temp_file.path().to_str().unwrap(); + let mut links = LinkStorage::new(db_path, false)?; + let mut named_links = NamedLinks::new(&mut links)?; + + let link = named_links.unicode_storage_mut().links_mut().create(0, 0); + named_links.set_name(link, "FacadeName")?; + + assert_eq!(Some(link), named_links.get_by_name("FacadeName")?); + assert_eq!(Some("FacadeName".to_string()), named_links.get_name(link)?); + Ok(()) +} From f36262376ffffea70cabde4322c9eee596e7c7a8 Mon Sep 17 00:00:00 2001 From: konard Date: Thu, 30 Apr 2026 05:35:16 +0000 Subject: [PATCH 5/6] refactor: split Rust unicode sequence abstractions --- docs/case-studies/issue-67/README.md | 14 +- .../20260430_041900_rust_query_parity.md | 2 +- rust/src/hybrid_reference.rs | 52 +++ rust/src/lib.rs | 9 +- rust/src/named_links.rs | 61 +++ .../address_to_raw_number_converter.rs | 14 + .../sequences/balanced_variant_converter.rs | 33 ++ .../sequences/caching_converter_decorator.rs | 53 +++ .../char_to_unicode_symbol_converter.rs | 27 ++ rust/src/sequences/default_stack.rs | 32 ++ rust/src/sequences/mod.rs | 25 ++ .../raw_number_to_address_converter.rs | 14 + rust/src/sequences/right_sequence_walker.rs | 38 ++ .../string_to_unicode_sequence_converter.rs | 40 ++ rust/src/sequences/target_matcher.rs | 22 ++ .../unicode_sequence_to_string_converter.rs | 52 +++ .../unicode_symbol_to_char_converter.rs | 37 ++ rust/src/unicode_string_storage.rs | 352 +++++++----------- .../tests/unicode_sequence_converter_tests.rs | 141 +++++++ 19 files changed, 796 insertions(+), 222 deletions(-) create mode 100644 rust/src/hybrid_reference.rs create mode 100644 rust/src/named_links.rs create mode 100644 rust/src/sequences/address_to_raw_number_converter.rs create mode 100644 rust/src/sequences/balanced_variant_converter.rs create mode 100644 rust/src/sequences/caching_converter_decorator.rs create mode 100644 rust/src/sequences/char_to_unicode_symbol_converter.rs create mode 100644 rust/src/sequences/default_stack.rs create mode 100644 rust/src/sequences/mod.rs create mode 100644 rust/src/sequences/raw_number_to_address_converter.rs create mode 100644 rust/src/sequences/right_sequence_walker.rs create mode 100644 rust/src/sequences/string_to_unicode_sequence_converter.rs create mode 100644 rust/src/sequences/target_matcher.rs create mode 100644 rust/src/sequences/unicode_sequence_to_string_converter.rs create mode 100644 rust/src/sequences/unicode_symbol_to_char_converter.rs create mode 100644 rust/tests/unicode_sequence_converter_tests.rs diff --git a/docs/case-studies/issue-67/README.md b/docs/case-studies/issue-67/README.md index e113244..0245f57 100644 --- a/docs/case-studies/issue-67/README.md +++ b/docs/case-studies/issue-67/README.md @@ -22,7 +22,7 @@ As of 2026-04-30: | Requirement | Current status | Solution plan | | --- | --- | --- | | Use the latest `doublets-rs`, `links-notation`, and `lino-arguments` as a Rust basis. | `rust/Cargo.toml` now declares `doublets = "0.3.0"`, `links-notation = "0.13.0"`, and `lino-arguments = "0.3.0"` with upstream source links. The Rust parser delegates to `links-notation`, CLI parsing uses `lino-arguments`, and the local link model has conversion coverage for `doublets::Link`. | Continue replacing local storage internals behind compatibility tests so public CLI behavior remains stable while binary storage parity is developed. | -| Reimplement sequence support in pure Rust based on `Data.Doublets.Sequences`. | Rust now has `rust/src/unicode_string_storage.rs`, a doublet-backed port of the C# `UnicodeStringStorage` path: pinned type links, raw-number Unicode symbols, balanced Unicode sequence trees, right-sequence walking, string links, and name links. | Continue extending this module toward full package coverage for advanced sequence indexes, compaction, and binary fixture compatibility. | +| Reimplement sequence support in pure Rust based on `Data.Doublets.Sequences`. | Rust now has one-file-per-abstraction ports for the C# sequence pipeline under `rust/src/sequences/`: `AddressToRawNumberConverter`, `RawNumberToAddressConverter`, `BalancedVariantConverter`, `TargetMatcher`, `CharToUnicodeSymbolConverter`, `UnicodeSymbolToCharConverter`, `CachingConverterDecorator`, `DefaultStack`, `RightSequenceWalker`, `StringToUnicodeSequenceConverter`, and `UnicodeSequenceToStringConverter`. `UnicodeStringStorage` composes those components. | Continue extending this module toward full package coverage for advanced sequence indexes, compaction, and binary fixture compatibility. | | Match C# Unicode support and binary file compatibility. | Rust now round-trips empty, ASCII, multilingual, and surrogate-pair text through UTF-16 code units, matching the C# `string`/`char` model used by `Data.Doublets.Sequences`. Cross-runtime binary fixtures are not yet complete. | Add C#-generated binary fixtures and Rust-generated binary fixtures, then verify both runtimes can read each file without data loss. Include non-ASCII names and multi-codepoint text cases. | | Support the same CLI options, features, and tests as C#. | The repository already has C# and Rust test suites. This PR closes concrete query semantics gaps found against the C# `AdvancedMixedQueryProcessor` behavior. | Continue converting C# tests into Rust parity tests by feature area: storage, parser, query processor, CLI commands, persistence, and sequences. | | Keep C# under `./csharp`, Rust under `./rust`, and provide separate workflows. | The repository already has `csharp/`, `rust/`, `.github/workflows/csharp.yml`, and `.github/workflows/rust.yml`. | Preserve this layout. Treat future parity work as package-local changes unless a shared workflow or script must change. | @@ -45,10 +45,10 @@ This PR focuses on query processor parity gaps that were blocking Rust behavior - Adds a Rust `UnicodeStringStorage` implementation based on the C# `Data.Doublets.Sequences` path: - `PinnedTypes` deterministic type allocation for `Type`, `UnicodeSymbol`, `UnicodeSequence`, `String`, `EmptyString`, and `Name`. - `Hybrid`-compatible external/raw number encoding for Unicode code units and external references. - - `BalancedVariantConverter`-style sequence tree creation and `RightSequenceWalker`-style traversal. - - `NamedLinks` behavior for internal links and external references, including removal. + - Direct Rust files for the C# constructor pipeline: raw-number converters, `BalancedVariantConverter`, `TargetMatcher`, char/symbol converters, `CachingConverterDecorator`, `DefaultStack`, `RightSequenceWalker`, and string/sequence converters. + - Separate `NamedLinks` facade behavior for internal links and external references, including removal. -The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs` and `rust/tests/unicode_string_storage_tests.rs`. +The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs`, `rust/tests/unicode_string_storage_tests.rs`, and `rust/tests/unicode_sequence_converter_tests.rs`. ## C# To Rust Tree Comparison @@ -58,9 +58,9 @@ The Rust test suite now includes focused parity tests in `rust/tests/query_proce | `BasicQueryProcessor.cs` | `rust/src/query_processor.rs` | Covered by the shared Rust query processor for create, update, delete, and read scenarios. | | `MixedQueryProcessor.cs` | `rust/src/query_processor.rs` | Covered by the shared Rust query processor and parity tests for mixed restriction/substitution behavior. | | `ChangesSimplifier.cs` | `rust/src/changes_simplifier.rs`, `rust/tests/changes_simplifier_tests.rs` | Implemented and tested. | -| `UnicodeStringStorage.cs` | `rust/src/unicode_string_storage.rs`, `rust/tests/unicode_string_storage_tests.rs` | Implemented in this pass for pinned types, UTF-16 Unicode sequences, string links, type names, user types, external-reference names, and removal. | +| `UnicodeStringStorage.cs` | `rust/src/unicode_string_storage.rs`, `rust/src/sequences/*`, `rust/src/named_links.rs`, `rust/src/hybrid_reference.rs`, `rust/tests/unicode_string_storage_tests.rs`, `rust/tests/unicode_sequence_converter_tests.rs` | Implemented in this pass for pinned types, UTF-16 Unicode sequences, string links, type names, user types, external-reference names, and removal. The C# constructor pipeline now has separate Rust abstractions instead of a monolithic helper implementation. | | `PinnedTypes.cs` | `rust/src/pinned_types.rs`, `rust/tests/unicode_string_storage_tests.rs` | Implemented in this pass. | -| `NamedLinks.cs` | `rust/src/unicode_string_storage.rs`, existing `LinkStorage` name APIs, `rust/tests/unicode_string_storage_tests.rs` | Implemented in this pass for the doublet-backed Unicode storage path; existing CLI-facing name APIs remain compatible with prior Rust query tests. | +| `NamedLinks.cs` | `rust/src/named_links.rs`, `rust/src/unicode_string_storage.rs`, existing `LinkStorage` name APIs, `rust/tests/unicode_string_storage_tests.rs` | Implemented in this pass for the doublet-backed Unicode storage path; existing CLI-facing name APIs remain compatible with prior Rust query tests. | | `NamedLinksDecorator.cs` | `rust/src/link_storage.rs`, `rust/src/query_processor.rs` | Partially represented by `LinkStorage` plus query processor name handling. The new Unicode storage module provides the C# name database primitives needed for deeper integration. | | `SimpleLinksDecorator.cs` | `rust/src/link_storage.rs` | Represented by direct storage create/update/delete/query methods. | | `LinksExtensions.cs` | `rust/src/link_storage.rs` | Represented by `ensure_created` and explicit-index update paths. | @@ -72,6 +72,6 @@ The Rust test suite now includes focused parity tests in `rust/tests/query_proce 1. Replace the remaining local text-file storage internals with a `doublets`-backed adapter behind compatibility tests. 2. Build cross-runtime fixture tests for binary file compatibility and Unicode names/text. -3. Port sequence primitives from the C# sequence package into Rust with fixture-driven tests. +3. Extend the sequence primitive ports beyond the `UnicodeStringStorage` constructor pipeline with fixture-driven tests. 4. Expand Rust CLI tests until every C# CLI behavior has a corresponding Rust assertion. 5. Run a workflow-template audit against the requested C#, Rust, and JS pipeline templates and apply only concrete drift fixes. diff --git a/rust/changelog.d/20260430_041900_rust_query_parity.md b/rust/changelog.d/20260430_041900_rust_query_parity.md index 31315bb..ba8f827 100644 --- a/rust/changelog.d/20260430_041900_rust_query_parity.md +++ b/rust/changelog.d/20260430_041900_rust_query_parity.md @@ -2,4 +2,4 @@ bump: minor --- -Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, named composite rename behavior, direct Rust basis dependencies on doublets, links-notation, and lino-arguments, and a doublet-backed Rust port of the C# Unicode string storage path. +Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, named composite rename behavior, direct Rust basis dependencies on doublets, links-notation, and lino-arguments, and a doublet-backed Rust port of the C# Unicode string storage path with explicit Data.Doublets.Sequences-style converter abstractions. diff --git a/rust/src/hybrid_reference.rs b/rust/src/hybrid_reference.rs new file mode 100644 index 0000000..966bc32 --- /dev/null +++ b/rust/src/hybrid_reference.rs @@ -0,0 +1,52 @@ +//! `Platform.Data.Hybrid`-compatible reference encoding. + +const EXTERNAL_ZERO: u32 = (u32::MAX / 2) + 1; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct HybridReference { + encoded: u32, +} + +impl HybridReference { + pub fn external(value: u32) -> Self { + Self { + encoded: if value == 0 { + EXTERNAL_ZERO + } else { + 0u32.wrapping_sub(value) + }, + } + } + + pub fn from_encoded(encoded: u32) -> Self { + Self { encoded } + } + + pub fn encoded(self) -> u32 { + self.encoded + } + + pub fn absolute_value(self) -> Option { + if self.encoded == EXTERNAL_ZERO { + Some(0) + } else if self.encoded >= EXTERNAL_ZERO { + Some(0u32.wrapping_sub(self.encoded)) + } else { + None + } + } + + pub fn is_external(self) -> bool { + self.absolute_value().is_some() + } +} + +/// Encodes an external reference the same way `Platform.Data.Hybrid` does. +pub fn external_reference(value: u32) -> u32 { + HybridReference::external(value).encoded() +} + +/// Decodes a `Platform.Data.Hybrid` external reference. +pub fn external_reference_value(value: u32) -> Option { + HybridReference::from_encoded(value).absolute_value() +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 248dd50..e504866 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -16,23 +16,26 @@ mod changes_simplifier; mod error; +mod hybrid_reference; mod link; mod link_storage; mod lino_link; +mod named_links; mod parser; mod pinned_types; mod query_processor; +pub mod sequences; mod unicode_string_storage; // Re-export main types for easy access pub use changes_simplifier::simplify_changes; pub use error::LinkError; +pub use hybrid_reference::{external_reference, external_reference_value, HybridReference}; pub use link::{DoubletsLink, Link}; pub use link_storage::LinkStorage; pub use lino_link::LinoLink; +pub use named_links::NamedLinks; pub use parser::Parser; pub use pinned_types::PinnedTypes; pub use query_processor::{QueryOptions, QueryProcessor}; -pub use unicode_string_storage::{ - external_reference, external_reference_value, NamedLinks, UnicodeStringStorage, -}; +pub use unicode_string_storage::UnicodeStringStorage; diff --git a/rust/src/named_links.rs b/rust/src/named_links.rs new file mode 100644 index 0000000..4e7de18 --- /dev/null +++ b/rust/src/named_links.rs @@ -0,0 +1,61 @@ +//! Link-backed names facade matching the C# `NamedLinks` role. + +use anyhow::Result; + +use crate::link_storage::LinkStorage; +use crate::unicode_string_storage::UnicodeStringStorage; + +pub struct NamedLinks<'a> { + storage: UnicodeStringStorage<'a>, +} + +impl<'a> NamedLinks<'a> { + pub fn new(links: &'a mut LinkStorage) -> Result { + Ok(UnicodeStringStorage::new(links)?.into_named_links()) + } + + pub(crate) fn from_storage(storage: UnicodeStringStorage<'a>) -> Self { + Self { storage } + } + + pub fn set_name_for_external_reference(&mut self, link: u32, name: &str) -> Result { + self.storage.set_name_for_external_reference(link, name) + } + + pub fn set_name(&mut self, link: u32, name: &str) -> Result { + self.storage.set_name(link, name) + } + + pub fn get_name_by_external_reference(&self, link: u32) -> Result> { + self.storage.get_name_by_external_reference(link) + } + + pub fn get_name(&self, link: u32) -> Result> { + self.storage.get_name(link) + } + + pub fn get_by_name(&mut self, name: &str) -> Result> { + self.storage.get_by_name(name) + } + + pub fn get_external_reference_by_name(&mut self, name: &str) -> Result> { + self.storage.get_external_reference_by_name(name) + } + + pub fn remove_name(&mut self, link: u32) -> Result<()> { + self.storage.remove_name(link) + } + + pub fn remove_name_by_external_reference(&mut self, external_reference_id: u32) -> Result<()> { + self.storage + .remove_name_by_external_reference(external_reference_id) + } + + pub fn unicode_storage(&self) -> &UnicodeStringStorage<'a> { + &self.storage + } + + pub fn unicode_storage_mut(&mut self) -> &mut UnicodeStringStorage<'a> { + &mut self.storage + } +} diff --git a/rust/src/sequences/address_to_raw_number_converter.rs b/rust/src/sequences/address_to_raw_number_converter.rs new file mode 100644 index 0000000..fdb9be9 --- /dev/null +++ b/rust/src/sequences/address_to_raw_number_converter.rs @@ -0,0 +1,14 @@ +use crate::hybrid_reference::external_reference; + +#[derive(Clone, Copy, Debug, Default)] +pub struct AddressToRawNumberConverter; + +impl AddressToRawNumberConverter { + pub fn new() -> Self { + Self + } + + pub fn convert(&self, address: u32) -> u32 { + external_reference(address) + } +} diff --git a/rust/src/sequences/balanced_variant_converter.rs b/rust/src/sequences/balanced_variant_converter.rs new file mode 100644 index 0000000..90ca1c4 --- /dev/null +++ b/rust/src/sequences/balanced_variant_converter.rs @@ -0,0 +1,33 @@ +use crate::link_storage::LinkStorage; + +#[derive(Clone, Copy, Debug, Default)] +pub struct BalancedVariantConverter; + +impl BalancedVariantConverter { + pub fn new() -> Self { + Self + } + + pub fn convert(&self, links: &mut LinkStorage, elements: &[u32]) -> u32 { + match elements.len() { + 0 => 0, + 1 => elements[0], + 2 => links.get_or_create(elements[0], elements[1]), + _ => { + let mut layer = elements.to_vec(); + while layer.len() > 2 { + let mut next = Vec::with_capacity(layer.len().div_ceil(2)); + let mut chunks = layer.chunks_exact(2); + for pair in &mut chunks { + next.push(links.get_or_create(pair[0], pair[1])); + } + if let Some(&remainder) = chunks.remainder().first() { + next.push(remainder); + } + layer = next; + } + links.get_or_create(layer[0], layer[1]) + } + } + } +} diff --git a/rust/src/sequences/caching_converter_decorator.rs b/rust/src/sequences/caching_converter_decorator.rs new file mode 100644 index 0000000..6050415 --- /dev/null +++ b/rust/src/sequences/caching_converter_decorator.rs @@ -0,0 +1,53 @@ +use std::collections::HashMap; +use std::hash::Hash; + +#[derive(Clone, Debug)] +pub struct CachingConverterDecorator { + cache: HashMap, +} + +impl Default for CachingConverterDecorator { + fn default() -> Self { + Self { + cache: HashMap::new(), + } + } +} + +impl CachingConverterDecorator +where + K: Eq + Hash + Clone, + V: Clone, +{ + pub fn new() -> Self { + Self::default() + } + + pub fn get(&self, input: &K) -> Option { + self.cache.get(input).cloned() + } + + pub fn insert(&mut self, input: K, output: V) -> V { + self.cache.insert(input, output.clone()); + output + } + + pub fn convert_with(&mut self, input: K, convert: F) -> Result + where + F: FnOnce(&K) -> Result, + { + if let Some(output) = self.get(&input) { + return Ok(output); + } + let output = convert(&input)?; + Ok(self.insert(input, output)) + } + + pub fn len(&self) -> usize { + self.cache.len() + } + + pub fn is_empty(&self) -> bool { + self.cache.is_empty() + } +} diff --git a/rust/src/sequences/char_to_unicode_symbol_converter.rs b/rust/src/sequences/char_to_unicode_symbol_converter.rs new file mode 100644 index 0000000..b36495e --- /dev/null +++ b/rust/src/sequences/char_to_unicode_symbol_converter.rs @@ -0,0 +1,27 @@ +use crate::link_storage::LinkStorage; +use crate::sequences::AddressToRawNumberConverter; + +#[derive(Clone, Copy, Debug)] +pub struct CharToUnicodeSymbolConverter { + address_to_number_converter: AddressToRawNumberConverter, + unicode_symbol_type: u32, +} + +impl CharToUnicodeSymbolConverter { + pub fn new( + address_to_number_converter: AddressToRawNumberConverter, + unicode_symbol_type: u32, + ) -> Self { + Self { + address_to_number_converter, + unicode_symbol_type, + } + } + + pub fn convert(&self, links: &mut LinkStorage, code_unit: u16) -> u32 { + let raw_number = self + .address_to_number_converter + .convert(u32::from(code_unit)); + links.get_or_create(raw_number, self.unicode_symbol_type) + } +} diff --git a/rust/src/sequences/default_stack.rs b/rust/src/sequences/default_stack.rs new file mode 100644 index 0000000..f8f21a3 --- /dev/null +++ b/rust/src/sequences/default_stack.rs @@ -0,0 +1,32 @@ +#[derive(Clone, Debug)] +pub struct DefaultStack { + items: Vec, +} + +impl Default for DefaultStack { + fn default() -> Self { + Self { items: Vec::new() } + } +} + +impl DefaultStack { + pub fn new() -> Self { + Self::default() + } + + pub fn push(&mut self, item: T) { + self.items.push(item); + } + + pub fn pop(&mut self) -> Option { + self.items.pop() + } + + pub fn len(&self) -> usize { + self.items.len() + } + + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } +} diff --git a/rust/src/sequences/mod.rs b/rust/src/sequences/mod.rs new file mode 100644 index 0000000..1804403 --- /dev/null +++ b/rust/src/sequences/mod.rs @@ -0,0 +1,25 @@ +//! Rust ports of the `Data.Doublets.Sequences` abstractions used by C#. + +mod address_to_raw_number_converter; +mod balanced_variant_converter; +mod caching_converter_decorator; +mod char_to_unicode_symbol_converter; +mod default_stack; +mod raw_number_to_address_converter; +mod right_sequence_walker; +mod string_to_unicode_sequence_converter; +mod target_matcher; +mod unicode_sequence_to_string_converter; +mod unicode_symbol_to_char_converter; + +pub use address_to_raw_number_converter::AddressToRawNumberConverter; +pub use balanced_variant_converter::BalancedVariantConverter; +pub use caching_converter_decorator::CachingConverterDecorator; +pub use char_to_unicode_symbol_converter::CharToUnicodeSymbolConverter; +pub use default_stack::DefaultStack; +pub use raw_number_to_address_converter::RawNumberToAddressConverter; +pub use right_sequence_walker::RightSequenceWalker; +pub use string_to_unicode_sequence_converter::StringToUnicodeSequenceConverter; +pub use target_matcher::TargetMatcher; +pub use unicode_sequence_to_string_converter::UnicodeSequenceToStringConverter; +pub use unicode_symbol_to_char_converter::UnicodeSymbolToCharConverter; diff --git a/rust/src/sequences/raw_number_to_address_converter.rs b/rust/src/sequences/raw_number_to_address_converter.rs new file mode 100644 index 0000000..630eed6 --- /dev/null +++ b/rust/src/sequences/raw_number_to_address_converter.rs @@ -0,0 +1,14 @@ +use crate::hybrid_reference::external_reference_value; + +#[derive(Clone, Copy, Debug, Default)] +pub struct RawNumberToAddressConverter; + +impl RawNumberToAddressConverter { + pub fn new() -> Self { + Self + } + + pub fn convert(&self, raw_number: u32) -> u32 { + external_reference_value(raw_number).unwrap_or(raw_number) + } +} diff --git a/rust/src/sequences/right_sequence_walker.rs b/rust/src/sequences/right_sequence_walker.rs new file mode 100644 index 0000000..dc9eac4 --- /dev/null +++ b/rust/src/sequences/right_sequence_walker.rs @@ -0,0 +1,38 @@ +use crate::link_storage::LinkStorage; +use crate::sequences::{DefaultStack, TargetMatcher}; + +#[derive(Clone, Copy, Debug)] +pub struct RightSequenceWalker { + unicode_symbol_criterion_matcher: TargetMatcher, +} + +impl RightSequenceWalker { + pub fn new(unicode_symbol_criterion_matcher: TargetMatcher) -> Self { + Self { + unicode_symbol_criterion_matcher, + } + } + + pub fn walk(&self, links: &LinkStorage, sequence: u32) -> Vec { + let mut output = Vec::new(); + let mut stack = DefaultStack::new(); + stack.push(sequence); + + while let Some(element) = stack.pop() { + if self + .unicode_symbol_criterion_matcher + .is_matched(links, element) + { + output.push(element); + continue; + } + + if let Some(link) = links.get(element) { + stack.push(link.target); + stack.push(link.source); + } + } + + output + } +} diff --git a/rust/src/sequences/string_to_unicode_sequence_converter.rs b/rust/src/sequences/string_to_unicode_sequence_converter.rs new file mode 100644 index 0000000..6047213 --- /dev/null +++ b/rust/src/sequences/string_to_unicode_sequence_converter.rs @@ -0,0 +1,40 @@ +use crate::link_storage::LinkStorage; +use crate::sequences::{BalancedVariantConverter, CharToUnicodeSymbolConverter}; + +#[derive(Clone, Copy, Debug)] +pub struct StringToUnicodeSequenceConverter { + char_to_unicode_symbol_converter: CharToUnicodeSymbolConverter, + balanced_variant_converter: BalancedVariantConverter, + unicode_sequence_type: u32, +} + +impl StringToUnicodeSequenceConverter { + pub fn new( + char_to_unicode_symbol_converter: CharToUnicodeSymbolConverter, + balanced_variant_converter: BalancedVariantConverter, + unicode_sequence_type: u32, + ) -> Self { + Self { + char_to_unicode_symbol_converter, + balanced_variant_converter, + unicode_sequence_type, + } + } + + pub fn convert(&self, links: &mut LinkStorage, content: &str) -> u32 { + let symbols = content + .encode_utf16() + .map(|code_unit| { + self.char_to_unicode_symbol_converter + .convert(links, code_unit) + }) + .collect::>(); + + if symbols.is_empty() { + return self.unicode_sequence_type; + } + + let sequence = self.balanced_variant_converter.convert(links, &symbols); + links.get_or_create(sequence, self.unicode_sequence_type) + } +} diff --git a/rust/src/sequences/target_matcher.rs b/rust/src/sequences/target_matcher.rs new file mode 100644 index 0000000..6a3a13f --- /dev/null +++ b/rust/src/sequences/target_matcher.rs @@ -0,0 +1,22 @@ +use crate::link_storage::LinkStorage; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TargetMatcher { + target: u32, +} + +impl TargetMatcher { + pub fn new(target: u32) -> Self { + Self { target } + } + + pub fn target(&self) -> u32 { + self.target + } + + pub fn is_matched(&self, links: &LinkStorage, link: u32) -> bool { + links + .get(link) + .is_some_and(|candidate| candidate.target == self.target) + } +} diff --git a/rust/src/sequences/unicode_sequence_to_string_converter.rs b/rust/src/sequences/unicode_sequence_to_string_converter.rs new file mode 100644 index 0000000..f5472c9 --- /dev/null +++ b/rust/src/sequences/unicode_sequence_to_string_converter.rs @@ -0,0 +1,52 @@ +use anyhow::{bail, Result}; + +use crate::link_storage::LinkStorage; +use crate::sequences::{RightSequenceWalker, TargetMatcher, UnicodeSymbolToCharConverter}; + +#[derive(Clone, Copy, Debug)] +pub struct UnicodeSequenceToStringConverter { + unicode_sequence_criterion_matcher: TargetMatcher, + sequence_walker: RightSequenceWalker, + unicode_symbol_to_char_converter: UnicodeSymbolToCharConverter, + unicode_sequence_type: u32, +} + +impl UnicodeSequenceToStringConverter { + pub fn new( + unicode_sequence_criterion_matcher: TargetMatcher, + sequence_walker: RightSequenceWalker, + unicode_symbol_to_char_converter: UnicodeSymbolToCharConverter, + unicode_sequence_type: u32, + ) -> Self { + Self { + unicode_sequence_criterion_matcher, + sequence_walker, + unicode_symbol_to_char_converter, + unicode_sequence_type, + } + } + + pub fn convert(&self, links: &LinkStorage, sequence: u32) -> Result { + if sequence == self.unicode_sequence_type { + return Ok(String::new()); + } + if !self + .unicode_sequence_criterion_matcher + .is_matched(links, sequence) + { + bail!("Specified link {sequence} is not a Unicode sequence."); + } + + let Some(sequence_link) = links.get(sequence) else { + bail!("Unicode sequence link {sequence} does not exist."); + }; + let code_units = self + .sequence_walker + .walk(links, sequence_link.source) + .into_iter() + .map(|symbol| self.unicode_symbol_to_char_converter.convert(links, symbol)) + .collect::>>()?; + + Ok(String::from_utf16(&code_units)?) + } +} diff --git a/rust/src/sequences/unicode_symbol_to_char_converter.rs b/rust/src/sequences/unicode_symbol_to_char_converter.rs new file mode 100644 index 0000000..40e8413 --- /dev/null +++ b/rust/src/sequences/unicode_symbol_to_char_converter.rs @@ -0,0 +1,37 @@ +use anyhow::{bail, Result}; + +use crate::link_storage::LinkStorage; +use crate::sequences::{RawNumberToAddressConverter, TargetMatcher}; + +#[derive(Clone, Copy, Debug)] +pub struct UnicodeSymbolToCharConverter { + number_to_address_converter: RawNumberToAddressConverter, + unicode_symbol_criterion_matcher: TargetMatcher, +} + +impl UnicodeSymbolToCharConverter { + pub fn new( + number_to_address_converter: RawNumberToAddressConverter, + unicode_symbol_criterion_matcher: TargetMatcher, + ) -> Self { + Self { + number_to_address_converter, + unicode_symbol_criterion_matcher, + } + } + + pub fn convert(&self, links: &LinkStorage, symbol: u32) -> Result { + if !self + .unicode_symbol_criterion_matcher + .is_matched(links, symbol) + { + bail!("Specified link {symbol} is not a Unicode symbol."); + } + + let Some(link) = links.get(symbol) else { + bail!("Unicode symbol link {symbol} does not exist."); + }; + let code_unit = self.number_to_address_converter.convert(link.source); + Ok(u16::try_from(code_unit)?) + } +} diff --git a/rust/src/unicode_string_storage.rs b/rust/src/unicode_string_storage.rs index 359d690..22a4fd6 100644 --- a/rust/src/unicode_string_storage.rs +++ b/rust/src/unicode_string_storage.rs @@ -1,45 +1,24 @@ //! Unicode string and name storage backed by doublet links. //! -//! This mirrors the C# `UnicodeStringStorage` implementation used by -//! `NamedLinksDecorator`: strings are stored as `String -> UnicodeSequence` -//! links, Unicode sequences are balanced doublet trees, Unicode symbols are -//! `raw-code-unit -> UnicodeSymbol` links, and names are regular doublet links -//! from an internal or external reference to `Name -> String`. +//! This mirrors the C# `UnicodeStringStorage` constructor pipeline: +//! pinned types, `BalancedVariantConverter`, target matchers, Unicode symbol +//! converters, string/sequence converters, right-sequence walking, and +//! `NamedLinks`. + +use std::cell::RefCell; use anyhow::{bail, Result}; +use crate::hybrid_reference::{external_reference, external_reference_value}; use crate::link_storage::LinkStorage; +use crate::named_links::NamedLinks; use crate::pinned_types::PinnedTypes; - -const EXTERNAL_ZERO: u32 = (u32::MAX / 2) + 1; - -/// Encodes an external reference the same way `Platform.Data.Hybrid` does. -pub fn external_reference(value: u32) -> u32 { - if value == 0 { - EXTERNAL_ZERO - } else { - 0u32.wrapping_sub(value) - } -} - -/// Decodes a `Platform.Data.Hybrid` external reference. -pub fn external_reference_value(value: u32) -> Option { - if value == EXTERNAL_ZERO { - Some(0) - } else if value >= EXTERNAL_ZERO { - Some(0u32.wrapping_sub(value)) - } else { - None - } -} - -fn raw_number_from_address(value: u32) -> u32 { - external_reference(value) -} - -fn address_from_raw_number(value: u32) -> u32 { - external_reference_value(value).unwrap_or(value) -} +use crate::sequences::{ + AddressToRawNumberConverter, BalancedVariantConverter, CachingConverterDecorator, + CharToUnicodeSymbolConverter, RawNumberToAddressConverter, RightSequenceWalker, + StringToUnicodeSequenceConverter, TargetMatcher, UnicodeSequenceToStringConverter, + UnicodeSymbolToCharConverter, +}; /// Link-backed Unicode string storage with C# pinned type layout. pub struct UnicodeStringStorage<'a> { @@ -50,6 +29,18 @@ pub struct UnicodeStringStorage<'a> { string_type: u32, empty_string_type: u32, name_type: u32, + address_to_number_converter: AddressToRawNumberConverter, + number_to_address_converter: RawNumberToAddressConverter, + balanced_variant_converter: BalancedVariantConverter, + unicode_symbol_criterion_matcher: TargetMatcher, + unicode_sequence_criterion_matcher: TargetMatcher, + char_to_unicode_symbol_converter: CharToUnicodeSymbolConverter, + unicode_symbol_to_char_converter: UnicodeSymbolToCharConverter, + string_to_unicode_sequence_converter: StringToUnicodeSequenceConverter, + sequence_walker: RightSequenceWalker, + unicode_sequence_to_string_converter: UnicodeSequenceToStringConverter, + string_to_unicode_sequence_cache: CachingConverterDecorator, + unicode_sequence_to_string_cache: RefCell>, } impl<'a> UnicodeStringStorage<'a> { @@ -73,6 +64,30 @@ impl<'a> UnicodeStringStorage<'a> { ) }; + let address_to_number_converter = AddressToRawNumberConverter::new(); + let number_to_address_converter = RawNumberToAddressConverter::new(); + let balanced_variant_converter = BalancedVariantConverter::new(); + let unicode_symbol_criterion_matcher = TargetMatcher::new(unicode_symbol_type); + let unicode_sequence_criterion_matcher = TargetMatcher::new(unicode_sequence_type); + let char_to_unicode_symbol_converter = + CharToUnicodeSymbolConverter::new(address_to_number_converter, unicode_symbol_type); + let unicode_symbol_to_char_converter = UnicodeSymbolToCharConverter::new( + number_to_address_converter, + unicode_symbol_criterion_matcher, + ); + let string_to_unicode_sequence_converter = StringToUnicodeSequenceConverter::new( + char_to_unicode_symbol_converter, + balanced_variant_converter, + unicode_sequence_type, + ); + let sequence_walker = RightSequenceWalker::new(unicode_symbol_criterion_matcher); + let unicode_sequence_to_string_converter = UnicodeSequenceToStringConverter::new( + unicode_sequence_criterion_matcher, + sequence_walker, + unicode_symbol_to_char_converter, + unicode_sequence_type, + ); + let mut storage = Self { links, type_type, @@ -81,6 +96,18 @@ impl<'a> UnicodeStringStorage<'a> { string_type, empty_string_type, name_type, + address_to_number_converter, + number_to_address_converter, + balanced_variant_converter, + unicode_symbol_criterion_matcher, + unicode_sequence_criterion_matcher, + char_to_unicode_symbol_converter, + unicode_symbol_to_char_converter, + string_to_unicode_sequence_converter, + sequence_walker, + unicode_sequence_to_string_converter, + string_to_unicode_sequence_cache: CachingConverterDecorator::new(), + unicode_sequence_to_string_cache: RefCell::new(CachingConverterDecorator::new()), }; storage.set_name(type_type, "Type")?; @@ -98,7 +125,7 @@ impl<'a> UnicodeStringStorage<'a> { } pub fn into_named_links(self) -> NamedLinks<'a> { - NamedLinks { storage: self } + NamedLinks::from_storage(self) } pub fn type_type(&self) -> u32 { @@ -125,6 +152,46 @@ impl<'a> UnicodeStringStorage<'a> { self.name_type } + pub fn address_to_number_converter(&self) -> AddressToRawNumberConverter { + self.address_to_number_converter + } + + pub fn number_to_address_converter(&self) -> RawNumberToAddressConverter { + self.number_to_address_converter + } + + pub fn balanced_variant_converter(&self) -> BalancedVariantConverter { + self.balanced_variant_converter + } + + pub fn unicode_symbol_criterion_matcher(&self) -> TargetMatcher { + self.unicode_symbol_criterion_matcher + } + + pub fn unicode_sequence_criterion_matcher(&self) -> TargetMatcher { + self.unicode_sequence_criterion_matcher + } + + pub fn char_to_unicode_symbol_converter(&self) -> CharToUnicodeSymbolConverter { + self.char_to_unicode_symbol_converter + } + + pub fn unicode_symbol_to_char_converter(&self) -> UnicodeSymbolToCharConverter { + self.unicode_symbol_to_char_converter + } + + pub fn string_to_unicode_sequence_converter(&self) -> StringToUnicodeSequenceConverter { + self.string_to_unicode_sequence_converter + } + + pub fn sequence_walker(&self) -> RightSequenceWalker { + self.sequence_walker + } + + pub fn unicode_sequence_to_string_converter(&self) -> UnicodeSequenceToStringConverter { + self.unicode_sequence_to_string_converter + } + pub fn create_string(&mut self, content: &str) -> Result { let string_sequence = self.get_string_sequence(content); Ok(self.links.get_or_create(self.string_type, string_sequence)) @@ -153,17 +220,24 @@ impl<'a> UnicodeStringStorage<'a> { if sequence == self.empty_string_type { return Ok(Vec::new()); } + if !self + .unicode_sequence_criterion_matcher + .is_matched(self.links, sequence) + { + bail!("Link {sequence} is not a Unicode sequence."); + } let unicode_sequence = self .links .get(sequence) .ok_or_else(|| anyhow::anyhow!("Unicode sequence link {sequence} does not exist."))?; - if unicode_sequence.target != self.unicode_sequence_type { - bail!("Link {sequence} is not a Unicode sequence."); - } - let symbol_sequence = unicode_sequence.source; - self.walk_right_sequence(symbol_sequence) + + self.sequence_walker + .walk(self.links, unicode_sequence.source) .into_iter() - .map(|symbol| self.unicode_symbol_to_code_unit(symbol)) + .map(|symbol| { + self.unicode_symbol_to_char_converter + .convert(self.links, symbol) + }) .collect() } @@ -217,12 +291,11 @@ impl<'a> UnicodeStringStorage<'a> { pub fn get_name(&self, link: u32) -> Result> { for name_pair in self.links.query(None, Some(link), None) { let name_candidate = name_pair.target; - if self - .links - .get(name_candidate) - .is_some_and(|candidate| candidate.source == self.name_type) - { - return self.get_string(name_candidate).map(Some); + let Some(candidate) = self.links.get(name_candidate) else { + continue; + }; + if candidate.source == self.name_type { + return self.get_string(candidate.target).map(Some); } } Ok(None) @@ -283,124 +356,33 @@ impl<'a> UnicodeStringStorage<'a> { } fn string_to_unicode_sequence(&mut self, content: &str) -> u32 { - let symbols = content - .encode_utf16() - .map(|code_unit| self.code_unit_to_unicode_symbol(code_unit)) - .collect::>(); - self.unicode_symbols_to_unicode_sequence(&symbols) - } - - fn code_unit_to_unicode_symbol(&mut self, code_unit: u16) -> u32 { - let raw_number = raw_number_from_address(u32::from(code_unit)); - self.links - .get_or_create(raw_number, self.unicode_symbol_type) - } - - fn unicode_symbol_to_code_unit(&self, symbol: u32) -> Result { - let Some(link) = self.links.get(symbol) else { - bail!("Unicode symbol link {symbol} does not exist."); - }; - if link.target != self.unicode_symbol_type { - bail!("Specified link {symbol} is not a Unicode symbol."); + let input = content.to_string(); + if let Some(cached) = self.string_to_unicode_sequence_cache.get(&input) { + return cached; } - let code_unit = address_from_raw_number(link.source); - Ok(u16::try_from(code_unit)?) - } - fn unicode_symbols_to_unicode_sequence(&mut self, symbols: &[u32]) -> u32 { - if symbols.is_empty() { - return self.unicode_sequence_type; - } - let sequence = self.balanced_variant(symbols); - self.links - .get_or_create(sequence, self.unicode_sequence_type) + let converter = self.string_to_unicode_sequence_converter; + let sequence = converter.convert(self.links, content); + self.string_to_unicode_sequence_cache + .insert(input, sequence) } fn unicode_sequence_to_string(&self, sequence: u32) -> Result { - if sequence == self.unicode_sequence_type { - return Ok(String::new()); + if let Some(cached) = self + .unicode_sequence_to_string_cache + .borrow() + .get(&sequence) + { + return Ok(cached); } - let Some(sequence_link) = self.links.get(sequence) else { - bail!("Unicode sequence link {sequence} does not exist."); - }; - if sequence_link.target != self.unicode_sequence_type { - bail!("Specified link {sequence} is not a Unicode sequence."); - } - - let code_units = self - .walk_right_sequence(sequence_link.source) - .into_iter() - .map(|symbol| self.unicode_symbol_to_code_unit(symbol)) - .collect::>>()?; - Ok(String::from_utf16(&code_units)?) - } - - fn balanced_variant(&mut self, symbols: &[u32]) -> u32 { - match symbols.len() { - 0 => 0, - 1 => symbols[0], - 2 => self.links.get_or_create(symbols[0], symbols[1]), - _ => { - let mut layer = symbols.to_vec(); - while layer.len() > 2 { - let mut next = Vec::with_capacity(layer.len().div_ceil(2)); - let mut chunks = layer.chunks_exact(2); - for pair in &mut chunks { - next.push(self.links.get_or_create(pair[0], pair[1])); - } - if let Some(&remainder) = chunks.remainder().first() { - next.push(remainder); - } - layer = next; - } - self.links.get_or_create(layer[0], layer[1]) - } - } - } - - fn walk_right_sequence(&self, sequence: u32) -> Vec { - let mut output = Vec::new(); - let mut stack = Vec::new(); - let mut element = sequence; - - if self.is_unicode_symbol(element) { - output.push(element); - return output; - } - - loop { - if self.is_unicode_symbol(element) { - let Some(popped) = stack.pop() else { - break; - }; - if let Some(link) = self.links.get(popped) { - if self.is_unicode_symbol(link.source) { - output.push(link.source); - } - if self.is_unicode_symbol(link.target) { - output.push(link.target); - } - element = link.target; - } else { - break; - } - } else { - let Some(link) = self.links.get(element) else { - break; - }; - stack.push(element); - element = link.source; - } - } - - output - } - - fn is_unicode_symbol(&self, link: u32) -> bool { - self.links - .get(link) - .is_some_and(|link| link.target == self.unicode_symbol_type) + let output = self + .unicode_sequence_to_string_converter + .convert(self.links, sequence)?; + self.unicode_sequence_to_string_cache + .borrow_mut() + .insert(sequence, output.clone()); + Ok(output) } fn unwrap_string_sequence(&self, string_value: u32) -> Result { @@ -417,55 +399,3 @@ impl<'a> UnicodeStringStorage<'a> { bail!("The passed link does not contain a string.") } } - -/// Public facade matching the C# `NamedLinks` role. -pub struct NamedLinks<'a> { - storage: UnicodeStringStorage<'a>, -} - -impl<'a> NamedLinks<'a> { - pub fn new(links: &'a mut LinkStorage) -> Result { - Ok(UnicodeStringStorage::new(links)?.into_named_links()) - } - - pub fn set_name_for_external_reference(&mut self, link: u32, name: &str) -> Result { - self.storage.set_name_for_external_reference(link, name) - } - - pub fn set_name(&mut self, link: u32, name: &str) -> Result { - self.storage.set_name(link, name) - } - - pub fn get_name_by_external_reference(&self, link: u32) -> Result> { - self.storage.get_name_by_external_reference(link) - } - - pub fn get_name(&self, link: u32) -> Result> { - self.storage.get_name(link) - } - - pub fn get_by_name(&mut self, name: &str) -> Result> { - self.storage.get_by_name(name) - } - - pub fn get_external_reference_by_name(&mut self, name: &str) -> Result> { - self.storage.get_external_reference_by_name(name) - } - - pub fn remove_name(&mut self, link: u32) -> Result<()> { - self.storage.remove_name(link) - } - - pub fn remove_name_by_external_reference(&mut self, external_reference_id: u32) -> Result<()> { - self.storage - .remove_name_by_external_reference(external_reference_id) - } - - pub fn unicode_storage(&self) -> &UnicodeStringStorage<'a> { - &self.storage - } - - pub fn unicode_storage_mut(&mut self) -> &mut UnicodeStringStorage<'a> { - &mut self.storage - } -} diff --git a/rust/tests/unicode_sequence_converter_tests.rs b/rust/tests/unicode_sequence_converter_tests.rs new file mode 100644 index 0000000..d3c82a1 --- /dev/null +++ b/rust/tests/unicode_sequence_converter_tests.rs @@ -0,0 +1,141 @@ +use anyhow::Result; +use link_cli::sequences::{ + AddressToRawNumberConverter, BalancedVariantConverter, CachingConverterDecorator, + CharToUnicodeSymbolConverter, RawNumberToAddressConverter, RightSequenceWalker, + StringToUnicodeSequenceConverter, TargetMatcher, UnicodeSequenceToStringConverter, + UnicodeSymbolToCharConverter, +}; +use link_cli::{external_reference, HybridReference, LinkStorage, PinnedTypes}; +use std::cell::Cell; +use tempfile::NamedTempFile; + +fn with_links(test: impl FnOnce(&mut LinkStorage) -> Result<()>) -> Result<()> { + let temp_file = NamedTempFile::new()?; + let db_path = temp_file.path().to_str().unwrap(); + let mut links = LinkStorage::new(db_path, false)?; + test(&mut links) +} + +fn allocate_unicode_types(links: &mut LinkStorage) -> Result<(u32, u32)> { + let mut pinned_types = PinnedTypes::new(links); + let _type_type = pinned_types.next_type()?; + let unicode_symbol_type = pinned_types.next_type()?; + let unicode_sequence_type = pinned_types.next_type()?; + Ok((unicode_symbol_type, unicode_sequence_type)) +} + +#[test] +fn raw_number_converters_match_hybrid_external_reference_encoding() { + let address_to_number = AddressToRawNumberConverter::new(); + let number_to_address = RawNumberToAddressConverter::new(); + + assert_eq!(u32::MAX, external_reference(1)); + assert_eq!(u32::MAX, address_to_number.convert(1)); + assert_eq!(1, number_to_address.convert(u32::MAX)); + + let zero = HybridReference::external(0); + assert!(zero.is_external()); + assert_eq!(Some(0), zero.absolute_value()); +} + +#[test] +fn target_and_char_symbol_converters_create_and_decode_symbols() -> Result<()> { + with_links(|links| { + let (unicode_symbol_type, _) = allocate_unicode_types(links)?; + let symbol_matcher = TargetMatcher::new(unicode_symbol_type); + let char_to_symbol = CharToUnicodeSymbolConverter::new( + AddressToRawNumberConverter::new(), + unicode_symbol_type, + ); + let symbol_to_char = + UnicodeSymbolToCharConverter::new(RawNumberToAddressConverter::new(), symbol_matcher); + + let symbol = char_to_symbol.convert(links, 'A' as u16); + + assert!(symbol_matcher.is_matched(links, symbol)); + assert_eq!('A' as u16, symbol_to_char.convert(links, symbol)?); + Ok(()) + }) +} + +#[test] +fn balanced_variant_and_right_sequence_walker_preserve_symbol_order() -> Result<()> { + with_links(|links| { + let (unicode_symbol_type, _) = allocate_unicode_types(links)?; + let symbol_matcher = TargetMatcher::new(unicode_symbol_type); + let char_to_symbol = CharToUnicodeSymbolConverter::new( + AddressToRawNumberConverter::new(), + unicode_symbol_type, + ); + let symbol_to_char = + UnicodeSymbolToCharConverter::new(RawNumberToAddressConverter::new(), symbol_matcher); + let symbols = "ABCDE" + .encode_utf16() + .map(|code_unit| char_to_symbol.convert(links, code_unit)) + .collect::>(); + + let root = BalancedVariantConverter::new().convert(links, &symbols); + let walked = RightSequenceWalker::new(symbol_matcher).walk(links, root); + let code_units = walked + .into_iter() + .map(|symbol| symbol_to_char.convert(links, symbol)) + .collect::>>()?; + + assert_eq!("ABCDE", String::from_utf16(&code_units)?); + Ok(()) + }) +} + +#[test] +fn string_and_unicode_sequence_converters_round_trip_utf16_text() -> Result<()> { + with_links(|links| { + let (unicode_symbol_type, unicode_sequence_type) = allocate_unicode_types(links)?; + let symbol_matcher = TargetMatcher::new(unicode_symbol_type); + let sequence_matcher = TargetMatcher::new(unicode_sequence_type); + let char_to_symbol = CharToUnicodeSymbolConverter::new( + AddressToRawNumberConverter::new(), + unicode_symbol_type, + ); + let symbol_to_char = + UnicodeSymbolToCharConverter::new(RawNumberToAddressConverter::new(), symbol_matcher); + let string_to_sequence = StringToUnicodeSequenceConverter::new( + char_to_symbol, + BalancedVariantConverter::new(), + unicode_sequence_type, + ); + let sequence_to_string = UnicodeSequenceToStringConverter::new( + sequence_matcher, + RightSequenceWalker::new(symbol_matcher), + symbol_to_char, + unicode_sequence_type, + ); + let input = "A😀B世界"; + + let sequence = string_to_sequence.convert(links, input); + + assert!(sequence_matcher.is_matched(links, sequence)); + assert_eq!(input, sequence_to_string.convert(links, sequence)?); + Ok(()) + }) +} + +#[test] +fn caching_converter_decorator_reuses_cached_values() -> Result<()> { + let calls = Cell::new(0); + let mut cache = CachingConverterDecorator::::new(); + + let first = cache.convert_with("Unicode".to_string(), |input| { + calls.set(calls.get() + 1); + Ok::<_, anyhow::Error>(input.encode_utf16().count()) + })?; + let second = cache.convert_with("Unicode".to_string(), |input| { + calls.set(calls.get() + 1); + Ok::<_, anyhow::Error>(input.len()) + })?; + + assert_eq!(7, first); + assert_eq!(first, second); + assert_eq!(1, calls.get()); + assert_eq!(1, cache.len()); + Ok(()) +} From 0a7b26376d5893fb68af2a93156c2ee5e1a9e101 Mon Sep 17 00:00:00 2001 From: konard Date: Thu, 30 Apr 2026 06:05:33 +0000 Subject: [PATCH 6/6] fix: remove direct clap dependency --- docs/case-studies/issue-67/README.md | 8 +- rust/Cargo.lock | 1 - rust/Cargo.toml | 1 - .../20260430_041900_rust_query_parity.md | 2 +- rust/src/cli.rs | 225 ++++++++++++++++++ rust/src/lib.rs | 1 + rust/src/main.rs | 59 +---- rust/tests/cli_arguments_tests.rs | 82 +++++++ rust/tests/dependency_basis_tests.rs | 26 +- 9 files changed, 350 insertions(+), 55 deletions(-) create mode 100644 rust/src/cli.rs create mode 100644 rust/tests/cli_arguments_tests.rs diff --git a/docs/case-studies/issue-67/README.md b/docs/case-studies/issue-67/README.md index 0245f57..fcf5353 100644 --- a/docs/case-studies/issue-67/README.md +++ b/docs/case-studies/issue-67/README.md @@ -21,7 +21,7 @@ As of 2026-04-30: | Requirement | Current status | Solution plan | | --- | --- | --- | -| Use the latest `doublets-rs`, `links-notation`, and `lino-arguments` as a Rust basis. | `rust/Cargo.toml` now declares `doublets = "0.3.0"`, `links-notation = "0.13.0"`, and `lino-arguments = "0.3.0"` with upstream source links. The Rust parser delegates to `links-notation`, CLI parsing uses `lino-arguments`, and the local link model has conversion coverage for `doublets::Link`. | Continue replacing local storage internals behind compatibility tests so public CLI behavior remains stable while binary storage parity is developed. | +| Use the latest `doublets-rs`, `links-notation`, and `lino-arguments` as a Rust basis. | `rust/Cargo.toml` now declares `doublets = "0.3.0"`, `links-notation = "0.13.0"`, and `lino-arguments = "0.3.0"` with upstream source links, without a direct `clap` dependency. The Rust parser delegates to `links-notation`, the CLI initializes through `lino-arguments`, and the local link model has conversion coverage for `doublets::Link`. | Continue replacing local storage internals behind compatibility tests so public CLI behavior remains stable while binary storage parity is developed. | | Reimplement sequence support in pure Rust based on `Data.Doublets.Sequences`. | Rust now has one-file-per-abstraction ports for the C# sequence pipeline under `rust/src/sequences/`: `AddressToRawNumberConverter`, `RawNumberToAddressConverter`, `BalancedVariantConverter`, `TargetMatcher`, `CharToUnicodeSymbolConverter`, `UnicodeSymbolToCharConverter`, `CachingConverterDecorator`, `DefaultStack`, `RightSequenceWalker`, `StringToUnicodeSequenceConverter`, and `UnicodeSequenceToStringConverter`. `UnicodeStringStorage` composes those components. | Continue extending this module toward full package coverage for advanced sequence indexes, compaction, and binary fixture compatibility. | | Match C# Unicode support and binary file compatibility. | Rust now round-trips empty, ASCII, multilingual, and surrogate-pair text through UTF-16 code units, matching the C# `string`/`char` model used by `Data.Doublets.Sequences`. Cross-runtime binary fixtures are not yet complete. | Add C#-generated binary fixtures and Rust-generated binary fixtures, then verify both runtimes can read each file without data loss. Include non-ASCII names and multi-codepoint text cases. | | Support the same CLI options, features, and tests as C#. | The repository already has C# and Rust test suites. This PR closes concrete query semantics gaps found against the C# `AdvancedMixedQueryProcessor` behavior. | Continue converting C# tests into Rust parity tests by feature area: storage, parser, query processor, CLI commands, persistence, and sequences. | @@ -41,14 +41,14 @@ This PR focuses on query processor parity gaps that were blocking Rust behavior - Returns matched changes for no-op variable substitutions, matching the C# behavior. - Reuses existing structural links for named composite substitutions before applying a new name, avoiding accidental duplicate leaf creation. - Declares and compiles the requested Rust basis crates: `doublets`, `links-notation`, and `lino-arguments`. -- Uses `links-notation` for parsing, including richer quoted Unicode identifiers, and uses `lino-arguments` as the CLI argument parser entrypoint. +- Uses `links-notation` for parsing, including richer quoted Unicode identifiers, and keeps `lino-arguments` as the CLI configuration basis without declaring `clap` directly. - Adds a Rust `UnicodeStringStorage` implementation based on the C# `Data.Doublets.Sequences` path: - `PinnedTypes` deterministic type allocation for `Type`, `UnicodeSymbol`, `UnicodeSequence`, `String`, `EmptyString`, and `Name`. - `Hybrid`-compatible external/raw number encoding for Unicode code units and external references. - Direct Rust files for the C# constructor pipeline: raw-number converters, `BalancedVariantConverter`, `TargetMatcher`, char/symbol converters, `CachingConverterDecorator`, `DefaultStack`, `RightSequenceWalker`, and string/sequence converters. - Separate `NamedLinks` facade behavior for internal links and external references, including removal. -The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs`, `rust/tests/unicode_string_storage_tests.rs`, and `rust/tests/unicode_sequence_converter_tests.rs`. +The Rust test suite now includes focused parity tests in `rust/tests/query_processor_csharp_parity_tests.rs`, `rust/tests/unicode_string_storage_tests.rs`, `rust/tests/unicode_sequence_converter_tests.rs`, `rust/tests/dependency_basis_tests.rs`, and `rust/tests/cli_arguments_tests.rs`. ## C# To Rust Tree Comparison @@ -66,7 +66,7 @@ The Rust test suite now includes focused parity tests in `rust/tests/query_proce | `LinksExtensions.cs` | `rust/src/link_storage.rs` | Represented by `ensure_created` and explicit-index update paths. | | `EnumerableExtensions.cs` | Rust destructuring is native pattern syntax; no runtime counterpart required. | Not required as a separate module. | | `ILinksUnrestricted.cs` | No direct Rust trait yet. | Placeholder C# interface only; add a Rust trait if a future storage adapter needs this abstraction. | -| `Program.cs` | `rust/src/main.rs` | Implemented with matching CLI option aliases and query flow. | +| `Program.cs` | `rust/src/main.rs`, `rust/src/cli.rs`, `rust/tests/cli_arguments_tests.rs` | Implemented with matching CLI option aliases and query flow, while relying on `lino-arguments` initialization and no direct `clap` manifest dependency. | ## Next Parity Work diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 05b28f9..9c2b008 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -244,7 +244,6 @@ name = "link-cli" version = "0.1.0" dependencies = [ "anyhow", - "clap", "doublets", "links-notation", "lino-arguments", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index f137675..fde3d2a 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -20,7 +20,6 @@ path = "src/main.rs" [dependencies] thiserror = "2.0" anyhow = "1.0" -clap = { version = "4.5", features = ["derive"] } # Issue 67 Rust basis crates: # Source: http://github.com/linksplatform/doublets-rs doublets = "0.3.0" diff --git a/rust/changelog.d/20260430_041900_rust_query_parity.md b/rust/changelog.d/20260430_041900_rust_query_parity.md index ba8f827..8ada425 100644 --- a/rust/changelog.d/20260430_041900_rust_query_parity.md +++ b/rust/changelog.d/20260430_041900_rust_query_parity.md @@ -2,4 +2,4 @@ bump: minor --- -Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, named composite rename behavior, direct Rust basis dependencies on doublets, links-notation, and lino-arguments, and a doublet-backed Rust port of the C# Unicode string storage path with explicit Data.Doublets.Sequences-style converter abstractions. +Expanded Rust query processor parity with the C# implementation, including unwrapped query pairs, structural and wildcard deletes, variable-driven updates, named composite rename behavior, direct Rust basis dependencies on doublets, links-notation, and lino-arguments without a direct clap dependency, and a doublet-backed Rust port of the C# Unicode string storage path with explicit Data.Doublets.Sequences-style converter abstractions. diff --git a/rust/src/cli.rs b/rust/src/cli.rs new file mode 100644 index 0000000..7ab8293 --- /dev/null +++ b/rust/src/cli.rs @@ -0,0 +1,225 @@ +//! Command-line argument parsing for the `clink` binary. + +use anyhow::{bail, Result}; +use std::env; +use std::ffi::OsString; + +const DEFAULT_DATABASE_FILENAME: &str = "db.links"; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Cli { + pub db: String, + pub query: Option, + pub query_arg: Option, + pub trace: bool, + pub structure: Option, + pub before: bool, + pub changes: bool, + pub after: bool, +} + +impl Default for Cli { + fn default() -> Self { + Self { + db: DEFAULT_DATABASE_FILENAME.to_string(), + query: None, + query_arg: None, + trace: false, + structure: None, + before: false, + changes: false, + after: false, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CliCommand { + Run(Cli), + Help, + Version, +} + +impl Cli { + pub fn parse() -> Result { + lino_arguments::init(); + Self::parse_from(env::args_os()) + } + + pub fn parse_from(args: I) -> Result + where + I: IntoIterator, + T: Into, + { + let mut cli = Cli::default(); + let mut args = args + .into_iter() + .map(|arg| arg.into().to_string_lossy().into_owned()) + .peekable(); + + let _program = args.next(); + + while let Some(arg) = args.next() { + if let Some(value) = inline_value(&arg, &["--db", "--data-source", "--data"]) { + cli.db = value.to_string(); + continue; + } + if let Some(value) = inline_value(&arg, &["--query", "--apply", "--do"]) { + cli.query = Some(value.to_string()); + continue; + } + if let Some(value) = inline_value(&arg, &["--structure"]) { + cli.structure = Some(parse_link_id("--structure", value)?); + continue; + } + if let Some(value) = inline_value(&arg, &["--trace"]) { + cli.trace = parse_bool("--trace", value)?; + continue; + } + if let Some(value) = inline_value(&arg, &["--before"]) { + cli.before = parse_bool("--before", value)?; + continue; + } + if let Some(value) = inline_value(&arg, &["--changes"]) { + cli.changes = parse_bool("--changes", value)?; + continue; + } + if let Some(value) = inline_value(&arg, &["--after", "--links"]) { + cli.after = parse_bool("--after", value)?; + continue; + } + + match arg.as_str() { + "-h" | "--help" => return Ok(CliCommand::Help), + "-V" | "--version" => return Ok(CliCommand::Version), + "-d" | "--db" | "--data-source" | "--data" => { + cli.db = next_value(&mut args, &arg)?; + } + "-q" | "--query" | "--apply" | "--do" => { + cli.query = Some(next_value(&mut args, &arg)?); + } + "-t" | "--trace" => { + cli.trace = next_bool_value(&mut args, true)?; + } + "-s" | "--structure" => { + let value = next_value(&mut args, &arg)?; + cli.structure = Some(parse_link_id(&arg, &value)?); + } + "-b" | "--before" => { + cli.before = next_bool_value(&mut args, true)?; + } + "-c" | "--changes" => { + cli.changes = next_bool_value(&mut args, true)?; + } + "-a" | "--after" | "--links" => { + cli.after = next_bool_value(&mut args, true)?; + } + "--" => { + for value in args.by_ref() { + set_positional_query(&mut cli, value)?; + } + break; + } + value if value.starts_with('-') => { + bail!("unknown option '{value}'"); + } + value => { + set_positional_query(&mut cli, value.to_string())?; + } + } + } + + Ok(CliCommand::Run(cli)) + } + + pub fn print_help() { + print!("{}", Self::help_text()); + } + + pub fn help_text() -> &'static str { + concat!( + "LiNo CLI Tool for managing links data store\n\n", + "Usage: clink [OPTIONS] [QUERY]\n\n", + "Arguments:\n", + " [QUERY] LiNo query for CRUD operation\n\n", + "Options:\n", + " -d, --db , --data-source , --data \n", + " Path to the links database file [default: db.links]\n", + " -q, --query , --apply , --do \n", + " LiNo query for CRUD operation\n", + " -t, --trace\n", + " Enable trace (verbose output)\n", + " -s, --structure \n", + " ID of the link to format its structure\n", + " -b, --before\n", + " Print the state of the database before applying changes\n", + " -c, --changes\n", + " Print the changes applied by the query\n", + " -a, --after, --links\n", + " Print the state of the database after applying changes\n", + " -h, --help\n", + " Print help\n", + " -V, --version\n", + " Print version\n", + ) + } + + pub fn version_text() -> String { + format!("clink {}", env!("CARGO_PKG_VERSION")) + } +} + +fn inline_value<'a>(arg: &'a str, names: &[&str]) -> Option<&'a str> { + names.iter().find_map(|name| { + arg.strip_prefix(name) + .and_then(|rest| rest.strip_prefix('=')) + }) +} + +fn next_value(args: &mut I, option: &str) -> Result +where + I: Iterator, +{ + args.next() + .ok_or_else(|| anyhow::anyhow!("missing value for option '{option}'")) +} + +fn next_bool_value(args: &mut std::iter::Peekable, default: bool) -> Result +where + I: Iterator, +{ + if let Some(value) = args.peek().and_then(|value| bool_literal(value)) { + args.next(); + Ok(value) + } else { + Ok(default) + } +} + +fn parse_bool(option: &str, value: &str) -> Result { + bool_literal(value) + .ok_or_else(|| anyhow::anyhow!("invalid boolean value '{value}' for {option}")) +} + +fn bool_literal(value: &str) -> Option { + match value.to_ascii_lowercase().as_str() { + "true" | "1" | "yes" | "on" => Some(true), + "false" | "0" | "no" | "off" => Some(false), + _ => None, + } +} + +fn parse_link_id(option: &str, value: &str) -> Result { + value + .parse() + .map_err(|_| anyhow::anyhow!("invalid link id '{value}' for {option}")) +} + +fn set_positional_query(cli: &mut Cli, value: String) -> Result<()> { + if cli.query_arg.is_some() { + bail!("unexpected extra positional argument '{value}'"); + } + + cli.query_arg = Some(value); + Ok(()) +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index e504866..c1560e4 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -15,6 +15,7 @@ //! - `query_processor` - LiNo query processing mod changes_simplifier; +pub mod cli; mod error; mod hybrid_reference; mod link; diff --git a/rust/src/main.rs b/rust/src/main.rs index 9efbb9c..a49d00d 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -4,56 +4,21 @@ //! similar functionality to the C# version. use anyhow::Result; +use link_cli::cli::{Cli, CliCommand}; use link_cli::{LinkStorage, QueryProcessor}; -use lino_arguments::Parser; - -/// Link CLI - A CLI tool for managing links data store -#[derive(Parser, Debug)] -#[command(name = "clink")] -#[command(author = "link-foundation")] -#[command(version)] -#[command(about = "LiNo CLI Tool for managing links data store")] -struct Cli { - /// Path to the links database file - #[arg(short = 'd', long = "db", default_value = "db.links")] - #[arg(alias = "data-source")] - #[arg(alias = "data")] - db: String, - - /// LiNo query for CRUD operation - #[arg(short = 'q', long = "query")] - #[arg(alias = "apply")] - #[arg(alias = "do")] - query: Option, - - /// LiNo query for CRUD operation (positional argument) - #[arg(name = "QUERY")] - query_arg: Option, - - /// Enable trace (verbose output) - #[arg(short = 't', long = "trace", default_value = "false")] - trace: bool, - - /// ID of the link to format its structure - #[arg(short = 's', long = "structure")] - structure: Option, - - /// Print the state of the database before applying changes - #[arg(short = 'b', long = "before", default_value = "false")] - before: bool, - - /// Print the changes applied by the query - #[arg(short = 'c', long = "changes", default_value = "false")] - changes: bool, - - /// Print the state of the database after applying changes - #[arg(short = 'a', long = "after", default_value = "false")] - #[arg(alias = "links")] - after: bool, -} fn main() -> Result<()> { - let cli = Cli::parse(); + let cli = match Cli::parse()? { + CliCommand::Run(cli) => cli, + CliCommand::Help => { + Cli::print_help(); + return Ok(()); + } + CliCommand::Version => { + println!("{}", Cli::version_text()); + return Ok(()); + } + }; // Create link storage let mut storage = LinkStorage::new(&cli.db, cli.trace)?; diff --git a/rust/tests/cli_arguments_tests.rs b/rust/tests/cli_arguments_tests.rs new file mode 100644 index 0000000..453223b --- /dev/null +++ b/rust/tests/cli_arguments_tests.rs @@ -0,0 +1,82 @@ +//! Tests for Rust CLI argument parity with the C# command surface. + +use link_cli::cli::{Cli, CliCommand}; + +fn parse_run(args: &[&str]) -> Cli { + match Cli::parse_from(args).expect("CLI arguments should parse") { + CliCommand::Run(cli) => cli, + other => panic!("expected run command, got {other:?}"), + } +} + +#[test] +fn parses_csharp_option_aliases_without_direct_clap_dependency() { + let cli = parse_run(&[ + "clink", + "--data-source", + "links.db", + "--apply", + "(1 2)", + "--links", + "-b", + "-c", + "-t", + "-s", + "42", + ]); + + assert_eq!(cli.db, "links.db"); + assert_eq!(cli.query.as_deref(), Some("(1 2)")); + assert!(cli.after); + assert!(cli.before); + assert!(cli.changes); + assert!(cli.trace); + assert_eq!(cli.structure, Some(42)); +} + +#[test] +fn query_option_takes_precedence_over_positional_query() { + let cli = parse_run(&["clink", "--query", "(1 2)", "(3 4)"]); + + assert_eq!(cli.query.as_deref(), Some("(1 2)")); + assert_eq!(cli.query_arg.as_deref(), Some("(3 4)")); +} + +#[test] +fn parses_inline_alias_values_and_boolean_values() { + let cli = parse_run(&[ + "clink", + "--data=db.bin", + "--do=(5 6)", + "--trace=false", + "--before=true", + "--changes=on", + "--after=0", + ]); + + assert_eq!(cli.db, "db.bin"); + assert_eq!(cli.query.as_deref(), Some("(5 6)")); + assert!(!cli.trace); + assert!(cli.before); + assert!(cli.changes); + assert!(!cli.after); +} + +#[test] +fn returns_help_and_version_commands() { + assert_eq!( + Cli::parse_from(["clink", "--help"]).expect("help should parse"), + CliCommand::Help + ); + assert_eq!( + Cli::parse_from(["clink", "--version"]).expect("version should parse"), + CliCommand::Version + ); +} + +#[test] +fn rejects_extra_positional_queries() { + let error = Cli::parse_from(["clink", "(1 2)", "(3 4)"]).expect_err("extra query should fail"); + + assert!(error.to_string().contains("unexpected extra positional")); +} diff --git a/rust/tests/dependency_basis_tests.rs b/rust/tests/dependency_basis_tests.rs index 89c177a..de43eb8 100644 --- a/rust/tests/dependency_basis_tests.rs +++ b/rust/tests/dependency_basis_tests.rs @@ -2,11 +2,21 @@ const CARGO_TOML: &str = include_str!("../Cargo.toml"); +fn dependencies_section() -> &'static str { + CARGO_TOML + .split("[dependencies]") + .nth(1) + .and_then(|rest| rest.split("\n[").next()) + .expect("rust/Cargo.toml should have a [dependencies] section") +} + #[test] fn rust_manifest_declares_required_basis_crates() { + let dependencies = dependencies_section(); + for dependency in ["doublets", "links-notation", "lino-arguments"] { assert!( - CARGO_TOML.contains(&format!("{dependency} =")), + dependencies.contains(&format!("{dependency} =")), "rust/Cargo.toml must declare {dependency} as a direct dependency" ); } @@ -22,3 +32,17 @@ fn rust_manifest_declares_required_basis_crates() { ); } } + +#[test] +fn rust_manifest_uses_lino_arguments_without_direct_clap_dependency() { + let dependencies = dependencies_section(); + + assert!( + dependencies.contains("lino-arguments ="), + "rust/Cargo.toml should use lino-arguments as the CLI configuration basis" + ); + assert!( + !dependencies.contains("\nclap ="), + "rust/Cargo.toml should not declare clap directly; lino-arguments owns that integration transitively" + ); +}