From a6c345d24540473f1611341152367f5e7cac79c3 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Tue, 10 Mar 2026 17:16:08 +0100 Subject: [PATCH 01/12] Normalize to NFD on write --- .../Services/MiniLcmJsInvokable.cs | 10 +- .../FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs | 9 +- .../FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs | 12 +- .../FwLiteWeb/Hubs/MiniLcmApiHubBase.cs | 17 +- .../FwLite/MiniLcm.Tests/MiniLcmTestBase.cs | 5 +- .../MiniLcm.Tests/QueryEntryTestsBase.cs | 8 +- .../MiniLcm.Tests/WriteNormalizationTests.cs | 1131 +++++++++++++++++ .../MiniLcmWriteApiNormalizationWrapper.cs | 570 +++++++++ .../MiniLcm/Normalization/StringNormalizer.cs | 76 ++ .../MiniLcm/Validators/MiniLcmValidators.cs | 1 + .../MiniLcm/Wrappers/MiniLcmWrappers.cs | 8 +- 11 files changed, 1823 insertions(+), 24 deletions(-) create mode 100644 backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs create mode 100644 backend/FwLite/MiniLcm/Normalization/MiniLcmWriteApiNormalizationWrapper.cs create mode 100644 backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs diff --git a/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs b/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs index 10f58081ba..db81eaa7e3 100644 --- a/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs +++ b/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs @@ -19,10 +19,16 @@ public class MiniLcmJsInvokable( ILogger logger, MiniLcmApiNotifyWrapperFactory notificationWrapperFactory, MiniLcmApiValidationWrapperFactory validationWrapperFactory, - MiniLcmApiStringNormalizationWrapperFactory normalizationWrapperFactory + MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory, + MiniLcmWriteApiNormalizationWrapperFactory writeNormalizationWrapperFactory ) : IDisposable { - private readonly IMiniLcmApi _wrappedApi = api.WrapWith([normalizationWrapperFactory, validationWrapperFactory, notificationWrapperFactory], project); + private readonly IMiniLcmApi _wrappedApi = api.WrapWith([ + readNormalizationWrapperFactory, + writeNormalizationWrapperFactory, // normalize data before validating + validationWrapperFactory, + notificationWrapperFactory, + ], project); public record MiniLcmFeatures(bool? History, bool? Write, bool? OpenWithFlex, bool? Feedback, bool? Sync, bool? Audio, bool? CustomViews); private bool SupportsSync => project.DataFormat == ProjectDataFormat.Harmony && api is CrdtMiniLcmApi; diff --git a/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs b/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs index f21ecd36cc..50324210c2 100644 --- a/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs +++ b/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs @@ -1,14 +1,13 @@ -using FwLiteShared; using FwLiteShared.Events; using FwLiteShared.Projects; using FwLiteShared.Sync; using LcmCrdt; using LcmCrdt.Data; -using FwLiteWeb.Services; using Microsoft.AspNetCore.SignalR; using Microsoft.Extensions.Caching.Memory; using MiniLcm; using MiniLcm.Models; +using MiniLcm.Normalization; using MiniLcm.Validators; using SystemTextJsonPatch; @@ -23,8 +22,10 @@ public class CrdtMiniLcmApiHub( LexboxProjectService lexboxProjectService, IMemoryCache memoryCache, IHubContext hubContext, - MiniLcmApiValidationWrapperFactory validationWrapperFactory -) : MiniLcmApiHubBase(miniLcmApi, validationWrapperFactory) + MiniLcmApiValidationWrapperFactory validationWrapperFactory, + MiniLcmWriteApiNormalizationWrapperFactory writeNormalizationWrapperFactory, + MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory +) : MiniLcmApiHubBase(miniLcmApi, validationWrapperFactory, readNormalizationWrapperFactory, writeNormalizationWrapperFactory, projectContext.Project) { public const string ProjectRouteKey = "project"; public static string ProjectGroup(string projectName) => "crdt-" + projectName; diff --git a/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs b/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs index c392f77017..05e6de79a5 100644 --- a/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs +++ b/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs @@ -1,10 +1,8 @@ using FwDataMiniLcmBridge; -using Microsoft.AspNetCore.SignalR; -using Microsoft.Extensions.Options; using MiniLcm; +using MiniLcm.Normalization; using MiniLcm.Validators; using SIL.LCModel; -using SystemTextJsonPatch; namespace FwLiteWeb.Hubs; @@ -13,8 +11,12 @@ public class FwDataMiniLcmHub( IMiniLcmApi miniLcmApi, FwDataFactory fwDataFactory, FwDataProjectContext context, - MiniLcmApiValidationWrapperFactory validationWrapperFactory -) : MiniLcmApiHubBase(miniLcmApi, validationWrapperFactory) + MiniLcmApiValidationWrapperFactory validationWrapperFactory, + MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory +) +// Note: FwData already handles string normalization internally (via LCModel), +// so we skip the write normalization wrapper for FwData APIs. +: MiniLcmApiHubBase(miniLcmApi, validationWrapperFactory, readNormalizationWrapperFactory, null, context.Project) { public const string ProjectRouteKey = "fwdata"; public override async Task OnConnectedAsync() diff --git a/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs b/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs index 5c3b962423..3936cea78f 100644 --- a/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs +++ b/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs @@ -1,16 +1,25 @@ using Microsoft.AspNetCore.SignalR; -using Microsoft.Extensions.Options; using MiniLcm; using MiniLcm.Models; +using MiniLcm.Normalization; using MiniLcm.Validators; using SystemTextJsonPatch; +using MiniLcm.Wrappers; namespace FwLiteWeb.Hubs; -public abstract class MiniLcmApiHubBase(IMiniLcmApi miniLcmApi, - MiniLcmApiValidationWrapperFactory validationWrapperFactory) : Hub +public abstract class MiniLcmApiHubBase( + IMiniLcmApi miniLcmApi, + MiniLcmApiValidationWrapperFactory validationWrapperFactory, + MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory, + MiniLcmWriteApiNormalizationWrapperFactory? writeNormalizationWrapperFactory, + IProjectIdentifier? projectIdentifier) : Hub { - private readonly IMiniLcmApi _miniLcmApi = validationWrapperFactory.Create(miniLcmApi); + private readonly IMiniLcmApi _miniLcmApi = miniLcmApi.WrapWith([ + readNormalizationWrapperFactory, + writeNormalizationWrapperFactory, // normalize data before validating + validationWrapperFactory, + ], projectIdentifier!); public async Task GetWritingSystems() { diff --git a/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs b/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs index 1b3d255600..085a1dd952 100644 --- a/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs +++ b/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs @@ -17,7 +17,10 @@ public virtual async Task InitializeAsync() { BaseApi = await NewApi(); BaseApi.Should().NotBeNull(); - Api = BaseApi.WrapWith([new MiniLcmApiStringNormalizationWrapperFactory()], null!); + Api = BaseApi.WrapWith([ + new MiniLcmApiStringNormalizationWrapperFactory(), + new MiniLcmWriteApiNormalizationWrapperFactory(), + ], null!); Api.Should().NotBeNull(); } diff --git a/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs b/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs index bc37968262..022e92eb65 100644 --- a/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs @@ -410,12 +410,10 @@ public async Task SuccessfulMatches(string searchTerm, string word, bool identic { // identical is to make the test cases more readable when they only differ in their normalization (searchTerm == word).Should().Be(identical); - // remove next line in https://github.com/sillsdev/languageforge-lexbox/issues/2065 - word = word.Normalize(NormalizationForm.FormD); await Api.CreateEntry(new Entry { LexemeForm = { ["en"] = word } }); var words = await Api.SearchEntries(searchTerm).Select(e => e.LexemeForm["en"]).ToArrayAsync(); words.Should().NotBeEmpty(); - // Like LicLCM the MiniLcm API should normalize to NFD + // The API normalizes to NFD on write, so results should be NFD regardless of input words.Should().Contain(word.Normalize(NormalizationForm.FormD)); } @@ -427,12 +425,10 @@ public async Task SuccessfulMatches(string searchTerm, string word, bool identic [InlineData("É", "È")] // Different accents public async Task NegativeMatches(string searchTerm, string word) { - word = word.Normalize(NormalizationForm.FormD); - //should we be normalizing the search term internally? - searchTerm = searchTerm.Normalize(NormalizationForm.FormD); await Api.CreateEntry(new Entry { LexemeForm = { ["en"] = word } }); var words = await Api.SearchEntries(searchTerm).Select(e => e.LexemeForm["en"]).ToArrayAsync(); words.Should().NotContain(word); + words.Should().NotContain(word.Normalize(NormalizationForm.FormD)); } [Theory] diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs new file mode 100644 index 0000000000..a96e4ed92c --- /dev/null +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -0,0 +1,1131 @@ +using System.Collections; +using System.Reflection; +using System.Text; +using System.Text.Json; +using MiniLcm.Normalization; +using Moq; +using SystemTextJsonPatch.Operations; + +namespace MiniLcm.Tests; + +#pragma warning disable IDE0022 // Use block body for method + +/// +/// Tests for the MiniLcmWriteApiNormalizationWrapper. +/// These tests verify that all user-entered text is normalized to NFD on write operations. +/// +public class WriteNormalizationTests +{ + private readonly IMiniLcmApi _mockApi; + private readonly IMiniLcmApi _normalizingApi; + + public WriteNormalizationTests() + { + _mockApi = Mock.Of(); + var factory = new MiniLcmWriteApiNormalizationWrapperFactory(); + _normalizingApi = factory.Create(_mockApi); + } + + + private static void AssertNfc(object obj) => NormalizationAssert.AssertAllNfc(obj); + private static void AssertNfd(object obj) => NormalizationAssert.AssertAllNfd(obj); + private static bool IsNfd(object obj) => NormalizationAssert.IsAllNfd(obj); + + private static void AssertAllNfc(IEnumerable values) + { + foreach (var value in values) AssertNfc(value!); + } + + private static void AssertAllNfd(IEnumerable values) + { + foreach (var value in values) AssertNfd(value!); + } + + #region WritingSystem Tests + + [Fact] + public async Task CreateWritingSystem_NormalizesToNfd() + { + var ws = NfcTestData.CreateNfcWritingSystem(); + AssertNfc(ws); + + await _normalizingApi.CreateWritingSystem(ws); + + Mock.Get(_mockApi).Verify(api => api.CreateWritingSystem( + It.Is(w => IsNfd(w)), + null + )); + } + + [Fact] + public async Task UpdateWritingSystem_BeforeAfter_NormalizesToNfd() + { + var before = NfcTestData.CreateNfcWritingSystem(); + var after = NfcTestData.CreateNfcWritingSystem(); + AssertNfc(after); + + await _normalizingApi.UpdateWritingSystem(before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdateWritingSystem( + It.IsAny(), + It.Is(w => IsNfd(w)), + It.IsAny() + )); + } + + #endregion + + #region PartOfSpeech Tests + + [Fact] + public async Task CreatePartOfSpeech_NormalizesToNfd() + { + var pos = NfcTestData.CreateNfcPartOfSpeech(); + AssertNfc(pos); + + await _normalizingApi.CreatePartOfSpeech(pos); + + Mock.Get(_mockApi).Verify(api => api.CreatePartOfSpeech( + It.Is(p => IsNfd(p)) + )); + } + + [Fact] + public async Task UpdatePartOfSpeech_BeforeAfter_NormalizesToNfd() + { + var before = NfcTestData.CreateNfcPartOfSpeech(); + var after = NfcTestData.CreateNfcPartOfSpeech(); + AssertNfc(after); + + await _normalizingApi.UpdatePartOfSpeech(before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdatePartOfSpeech( + It.IsAny(), + It.Is(p => IsNfd(p)), + It.IsAny() + )); + } + + #endregion + + #region Publication Tests + + [Fact] + public async Task CreatePublication_NormalizesToNfd() + { + var pub = NfcTestData.CreateNfcPublication(); + AssertNfc(pub); + + await _normalizingApi.CreatePublication(pub); + + Mock.Get(_mockApi).Verify(api => api.CreatePublication( + It.Is(p => IsNfd(p)) + )); + } + + [Fact] + public async Task UpdatePublication_BeforeAfter_NormalizesToNfd() + { + var before = NfcTestData.CreateNfcPublication(); + var after = NfcTestData.CreateNfcPublication(); + AssertNfc(after); + + await _normalizingApi.UpdatePublication(before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdatePublication( + It.IsAny(), + It.Is(p => IsNfd(p)), + It.IsAny() + )); + } + + #endregion + + #region SemanticDomain Tests + + [Fact] + public async Task CreateSemanticDomain_NormalizesToNfd() + { + var sd = NfcTestData.CreateNfcSemanticDomain(); + AssertNfc(sd); + + await _normalizingApi.CreateSemanticDomain(sd); + + Mock.Get(_mockApi).Verify(api => api.CreateSemanticDomain( + It.Is(s => IsNfd(s)) + )); + } + + [Fact] + public async Task UpdateSemanticDomain_BeforeAfter_NormalizesToNfd() + { + var before = NfcTestData.CreateNfcSemanticDomain(); + var after = NfcTestData.CreateNfcSemanticDomain(); + AssertNfc(after); + + await _normalizingApi.UpdateSemanticDomain(before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdateSemanticDomain( + It.IsAny(), + It.Is(s => IsNfd(s)), + It.IsAny() + )); + } + + [Fact] + public async Task AddSemanticDomainToSense_NormalizesToNfd() + { + var sd = NfcTestData.CreateNfcSemanticDomain(); + AssertNfc(sd); + + await _normalizingApi.AddSemanticDomainToSense(Guid.NewGuid(), sd); + + Mock.Get(_mockApi).Verify(api => api.AddSemanticDomainToSense( + It.IsAny(), + It.Is(s => IsNfd(s)) + )); + } + + [Fact] + public async Task BulkImportSemanticDomains_NormalizesToNfd() + { + var domains = new[] { NfcTestData.CreateNfcSemanticDomain(), NfcTestData.CreateNfcSemanticDomain() }; + AssertAllNfc(domains); + + var capturedDomains = new List(); + Mock.Get(_mockApi) + .Setup(api => api.BulkImportSemanticDomains(It.IsAny>())) + .Returns(async (IAsyncEnumerable stream) => + { + await foreach (var sd in stream) capturedDomains.Add(sd); + }); + + await _normalizingApi.BulkImportSemanticDomains(domains.ToAsyncEnumerable()); + + capturedDomains.Should().HaveCount(2); + AssertAllNfd(capturedDomains); + } + + #endregion + + #region ComplexFormType Tests + + [Fact] + public async Task CreateComplexFormType_NormalizesToNfd() + { + var cft = NfcTestData.CreateNfcComplexFormType(); + AssertNfc(cft); + + await _normalizingApi.CreateComplexFormType(cft); + + Mock.Get(_mockApi).Verify(api => api.CreateComplexFormType( + It.Is(c => IsNfd(c)) + )); + } + + [Fact] + public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() + { + var before = NfcTestData.CreateNfcComplexFormType(); + var after = NfcTestData.CreateNfcComplexFormType(); + AssertNfc(after); + + await _normalizingApi.UpdateComplexFormType(before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdateComplexFormType( + It.IsAny(), + It.Is(c => IsNfd(c)), + It.IsAny() + )); + } + + #endregion + + #region MorphTypeData Tests + + [Fact] + public async Task CreateMorphTypeData_NormalizesToNfd() + { + var mtd = NfcTestData.CreateNfcMorphTypeData(); + AssertNfc(mtd); + + await _normalizingApi.CreateMorphTypeData(mtd); + + Mock.Get(_mockApi).Verify(api => api.CreateMorphTypeData( + It.Is(m => IsNfd(m)) + )); + } + + [Fact] + public async Task UpdateMorphTypeData_BeforeAfter_NormalizesToNfd() + { + var before = NfcTestData.CreateNfcMorphTypeData(); + var after = NfcTestData.CreateNfcMorphTypeData(); + AssertNfc(after); + + await _normalizingApi.UpdateMorphTypeData(before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdateMorphTypeData( + It.IsAny(), + It.Is(m => IsNfd(m)), + It.IsAny() + )); + } + + #endregion + + #region Entry Tests + + [Fact] + public async Task CreateEntry_NormalizesToNfd() + { + var entry = NfcTestData.CreateNfcEntry(); + AssertNfc(entry); + + await _normalizingApi.CreateEntry(entry); + + Mock.Get(_mockApi).Verify(api => api.CreateEntry( + It.Is(e => IsNfd(e)), + null + )); + } + + [Fact] + public async Task UpdateEntry_BeforeAfter_NormalizesToNfd() + { + var before = NfcTestData.CreateNfcEntry(); + var after = NfcTestData.CreateNfcEntry(); + AssertNfc(after); + + await _normalizingApi.UpdateEntry(before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdateEntry( + It.IsAny(), + It.Is(e => IsNfd(e)), + It.IsAny() + )); + } + + [Fact] + public async Task CreateEntry_WithNestedSenses_NormalizesToNfd() + { + var entry = NfcTestData.CreateNfcEntryWithSenses(); + AssertNfc(entry); + + await _normalizingApi.CreateEntry(entry); + + Mock.Get(_mockApi).Verify(api => api.CreateEntry( + It.Is(e => IsNfd(e)), + null + )); + } + + [Fact] + public async Task CreateEntry_WithComplexFormComponents_NormalizesToNfd() + { + var entry = NfcTestData.CreateNfcEntryWithComponents(); + AssertNfc(entry); + + await _normalizingApi.CreateEntry(entry); + + Mock.Get(_mockApi).Verify(api => api.CreateEntry( + It.Is(e => IsNfd(e)), + null + )); + } + + [Fact] + public async Task BulkCreateEntries_NormalizesToNfd() + { + var entries = new[] { NfcTestData.CreateNfcEntry(), NfcTestData.CreateNfcEntryWithSenses() }; + AssertAllNfc(entries); + + var capturedEntries = new List(); + Mock.Get(_mockApi) + .Setup(api => api.BulkCreateEntries(It.IsAny>())) + .Returns(async (IAsyncEnumerable stream) => + { + await foreach (var e in stream) capturedEntries.Add(e); + }); + + await _normalizingApi.BulkCreateEntries(entries.ToAsyncEnumerable()); + + capturedEntries.Should().HaveCount(2); + AssertAllNfd(capturedEntries); + } + + #endregion + + #region JsonPatch Tests + + [Fact] + public async Task UpdateEntry_JsonPatch_NormalizesValuesToNfd() + { + var update = new UpdateObjectInput() + .Set(e => e.LexemeForm["en"], NfcTestData.Nfc) + .Set(e => e.CitationForm, NfcTestData.CreateNfcMultiString()) + .Set(e => e.Note["en"], NfcTestData.CreateNfcRichString()) + .Set(e => e.LiteralMeaning, NfcTestData.CreateNfcRichMultiString()); + + UpdateObjectInput? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateEntry(It.IsAny(), It.IsAny>())) + .Callback>((_, patch) => captured = patch) + .ReturnsAsync(NfcTestData.CreateNfcEntry()); + + await _normalizingApi.UpdateEntry(Guid.NewGuid(), update); + + captured.Should().NotBeNull(); + captured.Should().NotBeSameAs(update); + AssertPatchValuesNfd(captured); + } + + [Fact] + public async Task UpdateEntry_JsonPatch_NormalizesJsonElementString() + { + using var document = JsonDocument.Parse($"\"{NfcTestData.Nfc}\""); + var update = new UpdateObjectInput(); + update.Patch.Operations.Add(new Operation("replace", "/LexemeForm/en", null, document.RootElement)); + + UpdateObjectInput? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateEntry(It.IsAny(), It.IsAny>())) + .Callback>((_, patch) => captured = patch) + .ReturnsAsync(NfcTestData.CreateNfcEntry()); + + await _normalizingApi.UpdateEntry(Guid.NewGuid(), update); + + captured.Should().NotBeNull(); + var value = captured.Patch.Operations.Single().Value; + value.Should().BeOfType(); + ((string)value).Should().Be(NfcTestData.Nfd); + } + + [Fact] + public async Task UpdateWritingSystem_JsonPatch_NormalizesStringArray() + { + var update = new UpdateObjectInput() + .Set(ws => ws.Exemplars, [NfcTestData.Nfc, NfcTestData.Nfc]); + + UpdateObjectInput? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateWritingSystem(It.IsAny(), It.IsAny(), + It.IsAny>())) + .Callback>((_, _, patch) => + captured = patch) + .ReturnsAsync(NfcTestData.CreateNfcWritingSystem()); + + await _normalizingApi.UpdateWritingSystem("en", WritingSystemType.Analysis, update); + + captured.Should().NotBeNull(); + var values = captured!.Patch.Operations.Single().Value.Should().BeOfType().Subject; + values.Should().AllSatisfy(value => value.IsNormalized(NormalizationForm.FormD).Should().BeTrue()); + } + + private static void AssertPatchValuesNfd(UpdateObjectInput update) where T : class + { + update.Patch.Operations.Should().NotBeEmpty(); + foreach (var op in update.Patch.Operations) + { + AssertPatchValueNfd(op.Value); + } + } + + private static void AssertPatchValueNfd(object? value) + { + switch (value) + { + case null: + return; + case string s: + s.IsNormalized(NormalizationForm.FormD).Should().BeTrue(); + return; + case string[] strings: + strings.Should().AllSatisfy(str => str.IsNormalized(NormalizationForm.FormD).Should().BeTrue()); + return; + case MultiString ms: + NormalizationAssert.AssertAllNfd(ms); + return; + case RichString rs: + NormalizationAssert.AssertAllNfd(rs); + return; + case RichMultiString rms: + NormalizationAssert.AssertAllNfd(rms); + return; + case JsonElement jsonElement when jsonElement.ValueKind == JsonValueKind.String: + var jsonText = jsonElement.GetString(); + jsonText.Should().NotBeNull(); + jsonText.IsNormalized(NormalizationForm.FormD).Should().BeTrue(); + return; + default: + throw new Xunit.Sdk.XunitException($"Unexpected patch value type: {value.GetType().FullName}"); + } + } + + #endregion + + #region ComplexFormComponent Tests + + [Fact] + public async Task CreateComplexFormComponent_NormalizesToNfd() + { + var cfc = NfcTestData.CreateNfcComplexFormComponent(); + AssertNfc(cfc); + + await _normalizingApi.CreateComplexFormComponent(cfc); + + Mock.Get(_mockApi).Verify(api => api.CreateComplexFormComponent( + It.Is(c => IsNfd(c)), + null + )); + } + + #endregion + + #region Sense Tests + + [Fact] + public async Task CreateSense_NormalizesToNfd() + { + var sense = NfcTestData.CreateNfcSense(); + AssertNfc(sense); + + await _normalizingApi.CreateSense(Guid.NewGuid(), sense); + + Mock.Get(_mockApi).Verify(api => api.CreateSense( + It.IsAny(), + It.Is(s => IsNfd(s)), + null + )); + } + + [Fact] + public async Task UpdateSense_BeforeAfter_NormalizesToNfd() + { + var entryId = Guid.NewGuid(); + var before = NfcTestData.CreateNfcSense(); + var after = NfcTestData.CreateNfcSense(); + AssertNfc(after); + + await _normalizingApi.UpdateSense(entryId, before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdateSense( + entryId, + It.IsAny(), + It.Is(s => IsNfd(s)), + It.IsAny() + )); + } + + [Fact] + public async Task CreateSense_WithNestedExampleSentences_NormalizesToNfd() + { + var sense = NfcTestData.CreateNfcSenseWithExamples(); + AssertNfc(sense); + + await _normalizingApi.CreateSense(Guid.NewGuid(), sense); + + Mock.Get(_mockApi).Verify(api => api.CreateSense( + It.IsAny(), + It.Is(s => IsNfd(s)), + null + )); + } + + #endregion + + #region ExampleSentence Tests + + [Fact] + public async Task CreateExampleSentence_NormalizesToNfd() + { + var example = NfcTestData.CreateNfcExampleSentence(); + AssertNfc(example); + + await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); + + Mock.Get(_mockApi).Verify(api => api.CreateExampleSentence( + It.IsAny(), + It.IsAny(), + It.Is(e => IsNfd(e)), + null + )); + } + + [Fact] + public async Task UpdateExampleSentence_BeforeAfter_NormalizesToNfd() + { + var entryId = Guid.NewGuid(); + var senseId = Guid.NewGuid(); + var before = NfcTestData.CreateNfcExampleSentence(); + var after = NfcTestData.CreateNfcExampleSentence(); + AssertNfc(after); + + await _normalizingApi.UpdateExampleSentence(entryId, senseId, before, after); + + Mock.Get(_mockApi).Verify(api => api.UpdateExampleSentence( + entryId, + senseId, + It.IsAny(), + It.Is(e => IsNfd(e)), + It.IsAny() + )); + } + + [Fact] + public async Task CreateExampleSentence_WithTranslations_NormalizesToNfd() + { + var example = NfcTestData.CreateNfcExampleSentenceWithTranslations(); + AssertNfc(example); + + await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); + + Mock.Get(_mockApi).Verify(api => api.CreateExampleSentence( + It.IsAny(), + It.IsAny(), + It.Is(e => IsNfd(e)), + null + )); + } + + #endregion + + #region Translation Tests + + [Fact] + public async Task AddTranslation_NormalizesToNfd() + { + var translation = NfcTestData.CreateNfcTranslation(); + AssertNfc(translation); + + await _normalizingApi.AddTranslation(Guid.NewGuid(), Guid.NewGuid(), Guid.NewGuid(), translation); + + Mock.Get(_mockApi).Verify(api => api.AddTranslation( + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.Is(t => IsNfd(t)) + )); + } + + #endregion +} + + +/// +/// Tests for NormalizationAssert to ensure it correctly detects NFC/NFD issues. +/// These tests verify the assertion helpers don't have false negatives. +/// +public class NormalizationAssertTests +{ + [Fact] + public void AssertAllNfc_WithNfcData_DoesNotThrow() + { + var entry = NfcTestData.CreateNfcEntry(); + + // Should not throw + NormalizationAssert.AssertAllNfc(entry); + } + + [Fact] + public void AssertAllNfc_WithNfdData_Throws() + { + var entry = new Entry + { + Id = Guid.NewGuid(), + LexemeForm = new MultiString { Values = { { "en", NfcTestData.Nfd } } }, // NFD should fail + CitationForm = new MultiString { Values = { { "en", NfcTestData.Nfc } } }, + LiteralMeaning = new RichMultiString { { "en", new RichString(NfcTestData.Nfc) } }, + Note = new RichMultiString { { "en", new RichString(NfcTestData.Nfc) } } + }; + + var act = () => NormalizationAssert.AssertAllNfc(entry); + + act.Should().Throw() + .WithMessage("*NFC*"); + } + + [Fact] + public void AssertAllNfd_WithNfdData_DoesNotThrow() + { + var entry = new Entry + { + Id = Guid.NewGuid(), + LexemeForm = new MultiString { Values = { { "en", NfcTestData.Nfd } } }, + CitationForm = new MultiString { Values = { { "en", NfcTestData.Nfd } } }, + LiteralMeaning = new RichMultiString { { "en", new RichString(NfcTestData.Nfd) } }, + Note = new RichMultiString { { "en", new RichString(NfcTestData.Nfd) } } + }; + + // Should not throw + NormalizationAssert.AssertAllNfd(entry); + } + + [Fact] + public void AssertAllNfd_WithNfcData_Throws() + { + var entry = NfcTestData.CreateNfcEntry(); + + var act = () => NormalizationAssert.AssertAllNfd(entry); + + act.Should().Throw() + .WithMessage("*NFD*"); + } + + [Fact] + public void AssertAllNfc_WithEmptyMultiString_Throws() + { + var entry = new Entry + { + Id = Guid.NewGuid(), + LexemeForm = [], // Empty - should fail + CitationForm = NfcTestData.CreateNfcMultiString() + }; + + var act = () => NormalizationAssert.AssertAllNfc(entry); + + act.Should().Throw() + .WithMessage("*no values*"); + } + + [Fact] + public void AssertAllNfc_WithNestedNfdData_Throws() + { + var entry = new Entry + { + Id = Guid.NewGuid(), + LexemeForm = NfcTestData.CreateNfcMultiString(), + CitationForm = NfcTestData.CreateNfcMultiString(), + LiteralMeaning = NfcTestData.CreateNfcRichMultiString(), + Note = NfcTestData.CreateNfcRichMultiString(), + Senses = + [ + new Sense + { + Id = Guid.NewGuid(), + Gloss = new MultiString { Values = { { "en", NfcTestData.Nfd } } }, // NFD nested in sense + Definition = NfcTestData.CreateNfcRichMultiString() + } + ] + }; + + var act = () => NormalizationAssert.AssertAllNfc(entry); + + act.Should().Throw() + .WithMessage("*Senses*Gloss*"); + } + + [Fact] + public void IsAllNfd_WithNfdData_ReturnsTrue() + { + var multiString = new MultiString { Values = { { "en", NfcTestData.Nfd } } }; + + NormalizationAssert.IsAllNfd(multiString).Should().BeTrue(); + } + + [Fact] + public void IsAllNfd_WithNfcData_ReturnsFalse() + { + var multiString = NfcTestData.CreateNfcMultiString(); + + NormalizationAssert.IsAllNfd(multiString).Should().BeFalse(); + } +} + +/// +/// Provides test data with NFC-normalized strings for all entity types. +/// Each Create method returns an object with ALL normalizable properties populated with NFC strings. +/// +public static class NfcTestData +{ + /// + /// NFC string: "naïve" with U+00EF LATIN SMALL LETTER I WITH DIAERESIS (composed form) + /// + public const string Nfc = "na\u00efve"; + + /// + /// NFD string: "naïve" with U+0069 LATIN SMALL LETTER I + U+0308 COMBINING DIAERESIS (decomposed form) + /// + public const string Nfd = "na\u0069\u0308ve"; + + public static MultiString CreateNfcMultiString() => new() { Values = { { "en", Nfc }, { "fr", Nfc } } }; + + public static RichString CreateNfcRichString() => new([ + new RichSpan { Text = Nfc, Ws = "en" }, + new RichSpan { Text = Nfc, Ws = "en", Bold = RichTextToggle.On } + ]); + + public static RichMultiString CreateNfcRichMultiString() => new() + { + { "en", CreateNfcRichString() }, + { "fr", CreateNfcRichString() } + }; + + public static WritingSystem CreateNfcWritingSystem() => new() + { + Id = Guid.NewGuid(), + WsId = "en", + Type = WritingSystemType.Analysis, + Name = Nfc, + Abbreviation = Nfc, + Font = Nfc, + Exemplars = [Nfc, Nfc] + }; + + public static PartOfSpeech CreateNfcPartOfSpeech() => new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString() + }; + + public static Publication CreateNfcPublication() => new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString() + }; + + public static SemanticDomain CreateNfcSemanticDomain() => new() + { + Id = Guid.NewGuid(), + Code = "1.1.1", // Code is NOT normalized (metadata) + Name = CreateNfcMultiString() + }; + + public static ComplexFormType CreateNfcComplexFormType() => new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString() + }; + + public static MorphTypeData CreateNfcMorphTypeData() => new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString(), + Abbreviation = CreateNfcMultiString(), + Description = CreateNfcRichMultiString(), + LeadingToken = Nfc, + TrailingToken = Nfc + }; + + public static Translation CreateNfcTranslation() => new() + { + Id = Guid.NewGuid(), + Text = CreateNfcRichMultiString() + }; + + public static ExampleSentence CreateNfcExampleSentence() => new() + { + Id = Guid.NewGuid(), + Sentence = CreateNfcRichMultiString(), + Reference = CreateNfcRichString() + }; + + public static ExampleSentence CreateNfcExampleSentenceWithTranslations() => new() + { + Id = Guid.NewGuid(), + Sentence = CreateNfcRichMultiString(), + Reference = CreateNfcRichString(), + Translations = [CreateNfcTranslation(), CreateNfcTranslation()] + }; + + public static Sense CreateNfcSense() => new() + { + Id = Guid.NewGuid(), + Gloss = CreateNfcMultiString(), + Definition = CreateNfcRichMultiString() + }; + + public static Sense CreateNfcSenseWithExamples() => new() + { + Id = Guid.NewGuid(), + Gloss = CreateNfcMultiString(), + Definition = CreateNfcRichMultiString(), + SemanticDomains = [CreateNfcSemanticDomain()], + PartOfSpeech = CreateNfcPartOfSpeech(), + ExampleSentences = [CreateNfcExampleSentenceWithTranslations()] + }; + + public static ComplexFormComponent CreateNfcComplexFormComponent() => new() + { + Id = Guid.NewGuid(), + ComplexFormEntryId = Guid.NewGuid(), + ComponentEntryId = Guid.NewGuid(), + ComplexFormHeadword = Nfc, + ComponentHeadword = Nfc + }; + + public static Entry CreateNfcEntry() => new() + { + Id = Guid.NewGuid(), + LexemeForm = CreateNfcMultiString(), + CitationForm = CreateNfcMultiString(), + LiteralMeaning = CreateNfcRichMultiString(), + Note = CreateNfcRichMultiString() + }; + + public static Entry CreateNfcEntryWithSenses() => new() + { + Id = Guid.NewGuid(), + LexemeForm = CreateNfcMultiString(), + CitationForm = CreateNfcMultiString(), + LiteralMeaning = CreateNfcRichMultiString(), + Note = CreateNfcRichMultiString(), + Senses = [CreateNfcSenseWithExamples()] + }; + + public static Entry CreateNfcEntryWithComponents() => new() + { + Id = Guid.NewGuid(), + LexemeForm = CreateNfcMultiString(), + CitationForm = CreateNfcMultiString(), + LiteralMeaning = CreateNfcRichMultiString(), + Note = CreateNfcRichMultiString(), + Components = [CreateNfcComplexFormComponent()], + ComplexForms = [CreateNfcComplexFormComponent()] + }; +} + +/// +/// Assertion helpers for verifying NFC/NFD normalization state of objects. +/// Uses reflection to walk the object graph and check all normalizable properties. +/// +public static class NormalizationAssert +{ + private static readonly HashSet NormalizableTypes = + [ + typeof(string), + typeof(string[]), + typeof(MultiString), + typeof(RichString), + typeof(RichMultiString) + ]; + + /// + /// Properties that should NOT be normalized (metadata, not user text) + /// + private static readonly HashSet ExcludedProperties = + [ + "Code", // SemanticDomain.Code is metadata + "WsId", // WritingSystemId is metadata + "Ws" // RichSpan.Ws is a writing system ID + ]; + + /// + /// Asserts that all normalizable properties in the object contain NFC strings. + /// Throws if any property is null, empty, or contains NFD strings. + /// + public static void AssertAllNfc(object obj) + { + var issues = FindNormalizationIssues(obj, expectNfc: true); + if (issues.Count > 0) + { + throw new Xunit.Sdk.XunitException( + $"Expected all normalizable properties to contain NFC strings, but found issues:\n" + + string.Join("\n", issues.Select(i => $" - {i}")) + ); + } + } + + /// + /// Asserts that all normalizable properties in the object contain NFD strings. + /// Throws if any property contains NFC strings. + /// + public static void AssertAllNfd(object obj) + { + var issues = FindNormalizationIssues(obj, expectNfc: false); + if (issues.Count > 0) + { + throw new Xunit.Sdk.XunitException( + $"Expected all normalizable properties to contain NFD strings, but found issues:\n" + + string.Join("\n", issues.Select(i => $" - {i}")) + ); + } + } + + /// + /// Returns true if all normalizable properties contain NFD strings. + /// For use in Moq It.Is() expressions. + /// + public static bool IsAllNfd(object obj) + { + var issues = FindNormalizationIssues(obj, expectNfc: false); + return issues.Count == 0; + } + + private static List FindNormalizationIssues(object obj, bool expectNfc, string path = "") + { + var issues = new List(); + if (obj == null) return issues; + + var type = obj.GetType(); + + // Handle string directly + if (obj is string str) + { + CheckString(str, path, expectNfc, issues); + return issues; + } + + // Handle string array + if (obj is string[] strArray) + { + for (var i = 0; i < strArray.Length; i++) + { + CheckString(strArray[i], $"{path}[{i}]", expectNfc, issues); + } + return issues; + } + + // Handle MultiString + if (obj is MultiString ms) + { + if (ms.Values.Count == 0) + { + issues.Add($"{path}: MultiString has no values (must have at least one for testing)"); + } + foreach (var (key, value) in ms.Values) + { + CheckString(value, $"{path}.Values[{key}]", expectNfc, issues); + } + return issues; + } + + // Handle RichString + if (obj is RichString rs) + { + if (rs.Spans.Count == 0) + { + issues.Add($"{path}: RichString has no spans (must have at least one for testing)"); + } + for (var i = 0; i < rs.Spans.Count; i++) + { + CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", expectNfc, issues); + } + return issues; + } + + // Handle RichMultiString + if (obj is RichMultiString rms) + { + if (rms.Count == 0) + { + issues.Add($"{path}: RichMultiString has no values (must have at least one for testing)"); + } + foreach (var (key, value) in rms) + { + issues.AddRange(FindNormalizationIssues(value, expectNfc, $"{path}[{key}]")); + } + return issues; + } + + // Walk object properties + var properties = type.GetProperties(BindingFlags.Public | BindingFlags.Instance); + foreach (var prop in properties) + { + if (!prop.CanRead) continue; + if (ExcludedProperties.Contains(prop.Name)) continue; + + var value = prop.GetValue(obj); + if (value == null) continue; + + var propPath = string.IsNullOrEmpty(path) ? prop.Name : $"{path}.{prop.Name}"; + var propType = prop.PropertyType; + + // Check if this is a normalizable type + if (NormalizableTypes.Contains(propType) || + propType == typeof(string[]) || + (propType.IsGenericType && propType.GetGenericTypeDefinition() == typeof(Nullable<>))) + { + issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); + } + else if (propType.IsPrimitive || propType.IsEnum || propType == typeof(Guid) || propType == typeof(DateTime) || propType == typeof(DateTimeOffset) || propType == typeof(decimal)) + { + continue; + } + // Check collections of model objects + else if (value is IEnumerable enumerable) + { + var index = 0; + foreach (var item in enumerable) + { + if (item != null && IsModelType(item.GetType())) + { + issues.AddRange(FindNormalizationIssues(item, expectNfc, $"{propPath}[{index}]")); + } + index++; + } + } + // Check nested model objects + else if (IsModelType(propType)) + { + issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); + } + else + { + throw new Xunit.Sdk.XunitException($"Unexpected property type: {propType.FullName} at {propPath}"); + } + } + + return issues; + } + + private static void CheckString(string? value, string path, bool expectNfc, List issues) + { + if (string.IsNullOrEmpty(value)) + { + issues.Add($"{path}: string is null or empty (must have a value for testing)"); + return; + } + + var isNfc = value.IsNormalized(NormalizationForm.FormC); + var isNfd = value.IsNormalized(NormalizationForm.FormD); + + if (expectNfc) + { + // When expecting NFC, the string should be NFC but NOT NFD (unless it has no decomposable chars) + // For our test string "naïve", NFC != NFD, so we check it's in NFC form + if (!isNfc) + { + issues.Add($"{path}: expected NFC but string is not NFC-normalized"); + } + // Also verify it's not already NFD (to ensure our test data is meaningful) + if (isNfd && !isNfc) + { + // This is fine - some strings are both NFC and NFD (e.g., ASCII) + } + else if (isNfd && isNfc) + { + // String is both NFC and NFD - this means it has no decomposable characters + // For testing purposes, we need strings that ARE different in NFC vs NFD + // But we'll allow it since it's still valid + } + } + else + { + // When expecting NFD, the string should be NFD + if (!isNfd) + { + issues.Add($"{path}: expected NFD but string is not NFD-normalized (value contains NFC)"); + } + } + } + + private static bool IsModelType(Type type) + { + // Model types are in the MiniLcm.Models namespace + return type.Namespace?.StartsWith("MiniLcm.Models") == true || + type == typeof(Entry) || + type == typeof(Sense) || + type == typeof(ExampleSentence) || + type == typeof(Translation) || + type == typeof(WritingSystem) || + type == typeof(PartOfSpeech) || + type == typeof(SemanticDomain) || + type == typeof(ComplexFormType) || + type == typeof(MorphTypeData) || + type == typeof(Publication) || + type == typeof(ComplexFormComponent); + } +} + +#pragma warning restore IDE0022 // Use block body for method diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmWriteApiNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmWriteApiNormalizationWrapper.cs new file mode 100644 index 0000000000..047fc07aee --- /dev/null +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmWriteApiNormalizationWrapper.cs @@ -0,0 +1,570 @@ +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using MiniLcm.Media; +using MiniLcm.Models; +using MiniLcm.SyncHelpers; +using MiniLcm.Wrappers; + +namespace MiniLcm.Normalization; + +public class MiniLcmWriteApiNormalizationWrapperFactory : IMiniLcmWrapperFactory +{ + public IMiniLcmApi Create(IMiniLcmApi api, IProjectIdentifier _unused) + { + return Create(api); + } + + + public IMiniLcmApi Create(IMiniLcmApi api) + { + return new MiniLcmWriteApiNormalizationWrapper(api); + } +} + +/// +/// Normalizes all user-entered text to NFD on write operations. +/// +/// Design notes: +/// - Read operations are forwarded automatically via BeaKona.AutoInterface. +/// - Write operations are manually implemented here (compile-time enforced by IMiniLcmApi). +/// - JsonPatch overloads normalize string-ish values best-effort (string, RichString, MultiString, RichMultiString). +/// JsonElement values are only normalized when they are simple strings; complex JSON values are left as-is +/// to avoid guessing the target type. +/// +public partial class MiniLcmWriteApiNormalizationWrapper(IMiniLcmApi api) : IMiniLcmApi +{ + private readonly IMiniLcmApi _api = api; + + // BeaKona.AutoInterface only forwards IMiniLcmReadApi methods. + // IMiniLcmWriteApi methods are NOT auto-forwarded, ensuring compile-time + // enforcement that all write methods are manually implemented below. + [BeaKona.AutoInterface] + private IMiniLcmReadApi ReadApi => _api; + + // ********** IMiniLcmWriteApi Manual Implementations ********** + // All write methods are implemented manually to ensure NFD normalization + // and guarantee compile-time coverage of all write operations. + + #region WritingSystem + + public async Task CreateWritingSystem(WritingSystem writingSystem, BetweenPosition? between = null) + { + return await _api.CreateWritingSystem(NormalizeWritingSystem(writingSystem), between); + } + + public Task UpdateWritingSystem(WritingSystemId id, WritingSystemType type, UpdateObjectInput update) + { + return _api.UpdateWritingSystem(id, type, NormalizePatch(update)); + } + + + public async Task UpdateWritingSystem(WritingSystem before, WritingSystem after, IMiniLcmApi? api = null) + { + return await _api.UpdateWritingSystem(before, NormalizeWritingSystem(after), api ?? this); + } + + public Task MoveWritingSystem(WritingSystemId id, WritingSystemType type, BetweenPosition between) + { + return _api.MoveWritingSystem(id, type, between); + } + + private static WritingSystem NormalizeWritingSystem(WritingSystem ws) + { + return ws with + { + Name = StringNormalizer.Normalize(ws.Name), + Abbreviation = StringNormalizer.Normalize(ws.Abbreviation), + Font = StringNormalizer.Normalize(ws.Font), + Exemplars = StringNormalizer.Normalize(ws.Exemplars) + }; + } + + #endregion + + #region PartOfSpeech + + public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech) + {; + return await _api.CreatePartOfSpeech(NormalizePartOfSpeech(partOfSpeech)); + } + + public Task UpdatePartOfSpeech(Guid id, UpdateObjectInput update) + { + return _api.UpdatePartOfSpeech(id, NormalizePatch(update)); + } + + + public async Task UpdatePartOfSpeech(PartOfSpeech before, PartOfSpeech after, IMiniLcmApi? api = null) + { + return await _api.UpdatePartOfSpeech(before, NormalizePartOfSpeech(after), api ?? this); + } + + public Task DeletePartOfSpeech(Guid id) + { + return _api.DeletePartOfSpeech(id); + } + + private static PartOfSpeech NormalizePartOfSpeech(PartOfSpeech pos) + { + return new PartOfSpeech + { + Id = pos.Id, + Name = StringNormalizer.Normalize(pos.Name), + DeletedAt = pos.DeletedAt, + Predefined = pos.Predefined + }; + } + + #endregion + + #region Publication + + public async Task CreatePublication(Publication pub) + { + return await _api.CreatePublication(NormalizePublication(pub)); + } + + public Task UpdatePublication(Guid id, UpdateObjectInput update) + { + return _api.UpdatePublication(id, NormalizePatch(update)); + } + + + public async Task UpdatePublication(Publication before, Publication after, IMiniLcmApi? api = null) + { + return await _api.UpdatePublication(before, NormalizePublication(after), api ?? this); + } + + public Task DeletePublication(Guid id) + { + return _api.DeletePublication(id); + } + + private static Publication NormalizePublication(Publication pub) + { + return new Publication + { + Id = pub.Id, + Name = StringNormalizer.Normalize(pub.Name), + DeletedAt = pub.DeletedAt + }; + } + + #endregion + + #region SemanticDomain + + public async Task CreateSemanticDomain(SemanticDomain semanticDomain) + { + return await _api.CreateSemanticDomain(NormalizeSemanticDomain(semanticDomain)); + } + + public Task UpdateSemanticDomain(Guid id, UpdateObjectInput update) + { + return _api.UpdateSemanticDomain(id, NormalizePatch(update)); + } + + + public async Task UpdateSemanticDomain(SemanticDomain before, SemanticDomain after, IMiniLcmApi? api = null) + { + return await _api.UpdateSemanticDomain(before, NormalizeSemanticDomain(after), api ?? this); + } + + public Task DeleteSemanticDomain(Guid id) + { + return _api.DeleteSemanticDomain(id); + } + + private static SemanticDomain NormalizeSemanticDomain(SemanticDomain sd) + { + return new SemanticDomain + { + Id = sd.Id, + Name = StringNormalizer.Normalize(sd.Name), + Code = sd.Code, // Code is metadata, not user text + DeletedAt = sd.DeletedAt, + Predefined = sd.Predefined + }; + } + + #endregion + + #region ComplexFormType + + public async Task CreateComplexFormType(ComplexFormType complexFormType) + { + return await _api.CreateComplexFormType(NormalizeComplexFormType(complexFormType)); + } + + public Task UpdateComplexFormType(Guid id, UpdateObjectInput update) + { + return _api.UpdateComplexFormType(id, NormalizePatch(update)); + } + + + public async Task UpdateComplexFormType(ComplexFormType before, ComplexFormType after, IMiniLcmApi? api = null) + { + return await _api.UpdateComplexFormType(before, NormalizeComplexFormType(after), api ?? this); + } + + public Task DeleteComplexFormType(Guid id) + { + return _api.DeleteComplexFormType(id); + } + + private static ComplexFormType NormalizeComplexFormType(ComplexFormType cft) + { + return cft with + { + Name = StringNormalizer.Normalize(cft.Name) + }; + } + + #endregion + + #region MorphType + + public async Task CreateMorphTypeData(MorphTypeData morphType) + { + return await _api.CreateMorphTypeData(NormalizeMorphTypeData(morphType)); + } + + public Task UpdateMorphTypeData(Guid id, UpdateObjectInput update) + { + return _api.UpdateMorphTypeData(id, NormalizePatch(update)); + } + + + public async Task UpdateMorphTypeData(MorphTypeData before, MorphTypeData after, IMiniLcmApi? api = null) + { + return await _api.UpdateMorphTypeData(before, NormalizeMorphTypeData(after), api ?? this); + } + + public Task DeleteMorphTypeData(Guid id) + { + return _api.DeleteMorphTypeData(id); + } + + private static MorphTypeData NormalizeMorphTypeData(MorphTypeData mtd) + { + return new MorphTypeData + { + Id = mtd.Id, + MorphType = mtd.MorphType, + Name = StringNormalizer.Normalize(mtd.Name), + Abbreviation = StringNormalizer.Normalize(mtd.Abbreviation), + Description = StringNormalizer.Normalize(mtd.Description), + LeadingToken = StringNormalizer.Normalize(mtd.LeadingToken), + TrailingToken = StringNormalizer.Normalize(mtd.TrailingToken), + SecondaryOrder = mtd.SecondaryOrder, + DeletedAt = mtd.DeletedAt + }; + } + + #endregion + + #region Entry + + public async Task CreateEntry(Entry entry, CreateEntryOptions? options = null) + { + return await _api.CreateEntry(NormalizeEntry(entry), options); + } + + public Task UpdateEntry(Guid id, UpdateObjectInput update) + { + return _api.UpdateEntry(id, NormalizePatch(update)); + } + + + public async Task UpdateEntry(Entry before, Entry after, IMiniLcmApi? api = null) + { + return await _api.UpdateEntry(before, NormalizeEntry(after), api ?? this); + } + + public Task DeleteEntry(Guid id) + { + return _api.DeleteEntry(id); + } + + public async Task CreateComplexFormComponent(ComplexFormComponent complexFormComponent, BetweenPosition? position = null) + { + return await _api.CreateComplexFormComponent(NormalizeComplexFormComponent(complexFormComponent), position); + } + + public Task MoveComplexFormComponent(ComplexFormComponent complexFormComponent, BetweenPosition between) + { + return _api.MoveComplexFormComponent(complexFormComponent, between); + } + + public Task DeleteComplexFormComponent(ComplexFormComponent complexFormComponent) + { + return _api.DeleteComplexFormComponent(complexFormComponent); + } + + public Task AddComplexFormType(Guid entryId, Guid complexFormTypeId) + { + return _api.AddComplexFormType(entryId, complexFormTypeId); + } + + public Task RemoveComplexFormType(Guid entryId, Guid complexFormTypeId) + { + return _api.RemoveComplexFormType(entryId, complexFormTypeId); + } + + public Task AddPublication(Guid entryId, Guid publicationId) + { + return _api.AddPublication(entryId, publicationId); + } + + public Task RemovePublication(Guid entryId, Guid publicationId) + { + return _api.RemovePublication(entryId, publicationId); + } + + private static Entry NormalizeEntry(Entry entry) + { + return new Entry + { + Id = entry.Id, + DeletedAt = entry.DeletedAt, + LexemeForm = StringNormalizer.Normalize(entry.LexemeForm), + CitationForm = StringNormalizer.Normalize(entry.CitationForm), + LiteralMeaning = StringNormalizer.Normalize(entry.LiteralMeaning), + Note = StringNormalizer.Normalize(entry.Note), + MorphType = entry.MorphType, + Senses = [.. entry.Senses.Select(NormalizeSense)], + Components = [.. entry.Components.Select(NormalizeComplexFormComponent)], + ComplexForms = [.. entry.ComplexForms.Select(NormalizeComplexFormComponent)], + ComplexFormTypes = entry.ComplexFormTypes, + PublishIn = entry.PublishIn + }; + } + + private static ComplexFormComponent NormalizeComplexFormComponent(ComplexFormComponent cfc) + { + return cfc with + { + ComplexFormHeadword = StringNormalizer.Normalize(cfc.ComplexFormHeadword), + ComponentHeadword = StringNormalizer.Normalize(cfc.ComponentHeadword) + }; + } + + #endregion + + #region Sense + + public async Task CreateSense(Guid entryId, Sense sense, BetweenPosition? position = null) + { + return await _api.CreateSense(entryId, NormalizeSense(sense), position); + } + + public Task UpdateSense(Guid entryId, Guid senseId, UpdateObjectInput update) + { + return _api.UpdateSense(entryId, senseId, NormalizePatch(update)); + } + + + public async Task UpdateSense(Guid entryId, Sense before, Sense after, IMiniLcmApi? api = null) + { + return await _api.UpdateSense(entryId, before, NormalizeSense(after), api ?? this); + } + + public Task MoveSense(Guid entryId, Guid senseId, BetweenPosition position) + { + return _api.MoveSense(entryId, senseId, position); + } + + public Task DeleteSense(Guid entryId, Guid senseId) + { + return _api.DeleteSense(entryId, senseId); + } + + public Task AddSemanticDomainToSense(Guid senseId, SemanticDomain semanticDomain) + { + return _api.AddSemanticDomainToSense(senseId, NormalizeSemanticDomain(semanticDomain)); + } + + public Task RemoveSemanticDomainFromSense(Guid senseId, Guid semanticDomainId) + { + return _api.RemoveSemanticDomainFromSense(senseId, semanticDomainId); + } + + public Task SetSensePartOfSpeech(Guid senseId, Guid? partOfSpeechId) + { + return _api.SetSensePartOfSpeech(senseId, partOfSpeechId); + } + + private static Sense NormalizeSense(Sense sense) + { + return new Sense + { + Id = sense.Id, + Order = sense.Order, + DeletedAt = sense.DeletedAt, + EntryId = sense.EntryId, + Definition = StringNormalizer.Normalize(sense.Definition), + Gloss = StringNormalizer.Normalize(sense.Gloss), + PartOfSpeech = sense.PartOfSpeech is not null ? NormalizePartOfSpeech(sense.PartOfSpeech) : null, + PartOfSpeechId = sense.PartOfSpeechId, + SemanticDomains = [.. sense.SemanticDomains.Select(NormalizeSemanticDomain)], + ExampleSentences = [.. sense.ExampleSentences.Select(NormalizeExampleSentence)] + }; + } + + #endregion + + #region ExampleSentence + + public async Task CreateExampleSentence(Guid entryId, Guid senseId, ExampleSentence exampleSentence, BetweenPosition? position = null) + { + return await _api.CreateExampleSentence(entryId, senseId, NormalizeExampleSentence(exampleSentence), position); + } + + public Task UpdateExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId, UpdateObjectInput update) + { + return _api.UpdateExampleSentence(entryId, senseId, exampleSentenceId, NormalizePatch(update)); + } + + + public async Task UpdateExampleSentence(Guid entryId, Guid senseId, ExampleSentence before, ExampleSentence after, IMiniLcmApi? api = null) + { + return await _api.UpdateExampleSentence(entryId, senseId, before, NormalizeExampleSentence(after), api ?? this); + } + + public Task MoveExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId, BetweenPosition position) + { + return _api.MoveExampleSentence(entryId, senseId, exampleSentenceId, position); + } + + public Task DeleteExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId) + { + return _api.DeleteExampleSentence(entryId, senseId, exampleSentenceId); + } + + public Task AddTranslation(Guid entryId, Guid senseId, Guid exampleSentenceId, Translation translation) + { + return _api.AddTranslation(entryId, senseId, exampleSentenceId, NormalizeTranslation(translation)); + } + + public Task RemoveTranslation(Guid entryId, Guid senseId, Guid exampleSentenceId, Guid translationId) + { + return _api.RemoveTranslation(entryId, senseId, exampleSentenceId, translationId); + } + + public Task UpdateTranslation(Guid entryId, Guid senseId, Guid exampleSentenceId, Guid translationId, UpdateObjectInput update) + { + return _api.UpdateTranslation(entryId, senseId, exampleSentenceId, translationId, NormalizePatch(update)); + } + + + private static ExampleSentence NormalizeExampleSentence(ExampleSentence example) + { + return new ExampleSentence + { + Id = example.Id, + Order = example.Order, + DeletedAt = example.DeletedAt, + SenseId = example.SenseId, + Sentence = StringNormalizer.Normalize(example.Sentence), + Translations = [.. example.Translations.Select(NormalizeTranslation)], + Reference = StringNormalizer.Normalize(example.Reference) + }; + } + + private static Translation NormalizeTranslation(Translation translation) + { + return new Translation + { + Id = translation.Id, + Text = StringNormalizer.Normalize(translation.Text) + }; + } + + #endregion + + #region Bulk Import + + public Task BulkImportSemanticDomains(IAsyncEnumerable semanticDomains) + { + return _api.BulkImportSemanticDomains(NormalizeStream(semanticDomains, NormalizeSemanticDomain)); + } + + public Task BulkCreateEntries(IAsyncEnumerable entries) + { + return _api.BulkCreateEntries(NormalizeStream(entries, NormalizeEntry)); + } + + private static async IAsyncEnumerable NormalizeStream( + IAsyncEnumerable source, + Func normalize) + { + await foreach (var item in source) + { + yield return normalize(item); + } + } + + #endregion + + #region File Operations + + public Task SaveFile(Stream stream, LcmFileMetadata metadata) + { + // File metadata is not user-entered text, so don't normalize + return _api.SaveFile(stream, metadata); + } + + #endregion + + #region Patch Normalization + + private static UpdateObjectInput NormalizePatch(UpdateObjectInput update) where T : class + { + if (update.Patch.Operations.Count == 0) return update; + + var normalizedPatch = new SystemTextJsonPatch.JsonPatchDocument(); + foreach (var op in update.Patch.Operations) + { + var normalizedValue = NormalizePatchValue(op.Value); + var normalizedOp = new SystemTextJsonPatch.Operations.Operation + { + Op = op.Op, + Path = op.Path, + From = op.From, + Value = normalizedValue, + }; + normalizedPatch.Operations.Add(normalizedOp); + } + return new UpdateObjectInput(normalizedPatch); + } + + [return: NotNullIfNotNull(nameof(value))] + private static object? NormalizePatchValue(object? value) + { + return value switch + { + null => null, + string s => s.Normalize(StringNormalizer.Form), + RichString richString => StringNormalizer.Normalize(richString), + MultiString multiString => StringNormalizer.Normalize(multiString), + RichMultiString richMultiString => StringNormalizer.Normalize(richMultiString), + string[] strings => StringNormalizer.Normalize(strings), + JsonElement jsonElement => NormalizeJsonElement(jsonElement), + _ => value + }; + } + + private static object? NormalizeJsonElement(JsonElement element) + { + if (element.ValueKind != JsonValueKind.String) return element; + var value = element.GetString(); + return value?.Normalize(StringNormalizer.Form); + } + + #endregion + + void IDisposable.Dispose() + { + // No resources to dispose + } +} diff --git a/backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs b/backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs new file mode 100644 index 0000000000..f12cd96a12 --- /dev/null +++ b/backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs @@ -0,0 +1,76 @@ +using System.Diagnostics.CodeAnalysis; +using System.Text; +using MiniLcm.Models; + +namespace MiniLcm.Normalization; + +/// +/// Helper class for normalizing strings to NFD (Unicode Normalization Form D - Canonical Decomposition) +/// +public static class StringNormalizer +{ + public const NormalizationForm Form = NormalizationForm.FormD; + + /// + /// Normalizes a string to NFD. Returns null if input is null. + /// + [return: NotNullIfNotNull(nameof(value))] + public static string? Normalize(string? value) + { + return value?.Normalize(Form); + } + + /// + /// Normalizes all values in a MultiString to NFD + /// + public static MultiString Normalize(MultiString multiString) + { + var normalized = new MultiString(multiString.Values.Count); + foreach (var (key, value) in multiString.Values) + { + // Preserve all keys, even if the value is empty or null + normalized.Values[key] = string.IsNullOrEmpty(value) ? value : value.Normalize(Form); + } + return normalized; + } + + /// + /// Normalizes all text spans in a RichString to NFD + /// + [return: NotNullIfNotNull(nameof(richString))] + public static RichString? Normalize(RichString? richString) + { + if (richString is null) return null; + + var normalizedSpans = richString.Spans.Select(span => + span with { Text = span.Text.Normalize(Form) } + ).ToList(); + + return new RichString(normalizedSpans); + } + + /// + /// Normalizes all values in a RichMultiString to NFD + /// + public static RichMultiString Normalize(RichMultiString richMultiString) + { + var normalized = new RichMultiString(richMultiString.Count); + foreach (var (key, value) in richMultiString) + { + var normalizedRichString = Normalize(value); + if (normalizedRichString is not null) + { + normalized[key] = normalizedRichString; + } + } + return normalized; + } + + /// + /// Normalizes an array of strings to NFD + /// + public static string[] Normalize(string[] values) + { + return [.. values.Select(v => v?.Normalize(Form) ?? string.Empty)]; + } +} diff --git a/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs b/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs index 1a325290f3..d9e3ce2fb4 100644 --- a/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs +++ b/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs @@ -92,6 +92,7 @@ public static IServiceCollection AddMiniLcmValidators(this IServiceCollection se services.AddTransient>, MorphTypeUpdateValidator>(); services.AddTransient>, WritingSystemUpdateValidator>(); services.AddTransient(); + services.AddTransient(); return services; } } diff --git a/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs b/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs index 578fbf1820..a24d7b4cc4 100644 --- a/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs +++ b/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs @@ -9,10 +9,14 @@ public interface IMiniLcmWrapperFactory public static class MiniLcmWrapperExtensions { - public static IMiniLcmApi WrapWith(this IMiniLcmApi api, IEnumerable factories, IProjectIdentifier project) + /// + /// Wraps with in runtime call order + /// (outermost to innermost). The first factory becomes the outermost wrapper. + /// + public static IMiniLcmApi WrapWith(this IMiniLcmApi api, IEnumerable factories, IProjectIdentifier project) { var wrappedApi = api; - foreach (var factory in factories.Reverse()) + foreach (var factory in factories.Reverse().Where(f => f != null).Cast()) { wrappedApi = factory.Create(wrappedApi, project); } From d32c78e26178a07a1e377992e7496399fa719019 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Wed, 6 May 2026 17:42:35 +0200 Subject: [PATCH 02/12] Adapt tests to now non-mutated api input --- .../MiniLcmTests/UpdateEntryTests.cs | 5 ++++- .../FwLite/MiniLcm.Tests/CreateEntryTestsBase.cs | 3 ++- .../MiniLcm.Tests/ExampleSentenceTestsBase.cs | 16 +++++++++++++--- .../FwLite/MiniLcm.Tests/UpdateEntryTestsBase.cs | 11 ++++++++--- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/UpdateEntryTests.cs b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/UpdateEntryTests.cs index 1e184836b1..bf8132a921 100644 --- a/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/UpdateEntryTests.cs +++ b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/UpdateEntryTests.cs @@ -81,7 +81,10 @@ public async Task UpdateEntry_CanUpdateExampleSentenceTranslations_WhenNoTransla var before = entry.Copy(); var exampleSentence = entry.Senses[0].ExampleSentences[0]; - exampleSentence.Translations = [new() { Text = { { "en", new RichString("updated") } } }]; + exampleSentence.Translations = + [ + new() { Id = Guid.NewGuid(), Text = { { "en", new RichString("updated") } } } + ]; // Act var updatedEntry = await Api.UpdateEntry(before, entry); diff --git a/backend/FwLite/MiniLcm.Tests/CreateEntryTestsBase.cs b/backend/FwLite/MiniLcm.Tests/CreateEntryTestsBase.cs index 8ba898e353..8e309871d5 100644 --- a/backend/FwLite/MiniLcm.Tests/CreateEntryTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/CreateEntryTestsBase.cs @@ -20,7 +20,8 @@ public async Task CanCreateEntry_AutoFaker() var createdEntry = await Api.CreateEntry(entry); createdEntry.Should().BeEquivalentTo(entry, options => options .For(e => e.Components).Exclude(e => e.Id) - .For(e => e.ComplexForms).Exclude(e => e.Id)); + .For(e => e.ComplexForms).Exclude(e => e.Id) + .Excluding(member => member.Name == nameof(IOrderable.Order))); } [Fact] diff --git a/backend/FwLite/MiniLcm.Tests/ExampleSentenceTestsBase.cs b/backend/FwLite/MiniLcm.Tests/ExampleSentenceTestsBase.cs index 8c0c50e87d..f48e787dd0 100644 --- a/backend/FwLite/MiniLcm.Tests/ExampleSentenceTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/ExampleSentenceTestsBase.cs @@ -49,13 +49,21 @@ public async Task CanCreateExampleSentence() { var expectedExampleSentence = new ExampleSentence() { + Id = Guid.NewGuid(), SenseId = _senseId, Reference = new RichString("This is a reference", "en"), Sentence = { { "en", new RichString("test", "en") } }, - Translations = { new Translation() { Text = { { "en", new RichString("test", "en") } } } } + Translations = { + new Translation() + { + Id = Guid.NewGuid(), + Text = { { "en", new RichString("test", "en") } }, + } + } }; var actualSentence = await Api.CreateExampleSentence(_entryId, _senseId, expectedExampleSentence); - actualSentence.Should().BeEquivalentTo(expectedExampleSentence); + actualSentence.Should().BeEquivalentTo(expectedExampleSentence, + options => options.Excluding(s => s.Order)); } [Fact] @@ -63,10 +71,12 @@ public async Task CanCreateEmptyExampleSentence() { var expectedExampleSentence = new ExampleSentence() { + Id = Guid.NewGuid(), SenseId = _senseId }; var actualSentence = await Api.CreateExampleSentence(_entryId, _senseId, expectedExampleSentence); - actualSentence.Should().BeEquivalentTo(expectedExampleSentence); + actualSentence.Should().BeEquivalentTo(expectedExampleSentence, + options => options.Excluding(s => s.Order)); } [Fact] diff --git a/backend/FwLite/MiniLcm.Tests/UpdateEntryTestsBase.cs b/backend/FwLite/MiniLcm.Tests/UpdateEntryTestsBase.cs index de2f12728f..f7d8663f63 100644 --- a/backend/FwLite/MiniLcm.Tests/UpdateEntryTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/UpdateEntryTestsBase.cs @@ -169,7 +169,9 @@ public async Task UpdateEntry_CanReorderSenses(string before, string after, stri afterEntry.Senses = afterSenses; // sanity checks - beforeEntry.Senses.Should().BeEquivalentTo(beforeSenses, options => options.WithStrictOrdering()); + beforeEntry.Senses.Should().BeEquivalentTo(beforeSenses, options => options + .WithStrictOrdering() + .Excluding(s => s.Order)); if (!ApiUsesImplicitOrdering) { beforeEntry.Senses.Select(s => s.Order).Should() @@ -231,7 +233,9 @@ public async Task UpdateEntry_CanReorderExampleSentence(string before, string af afterSense.ExampleSentences = afterExamples; // sanity checks - beforeSense.ExampleSentences.Should().BeEquivalentTo(beforeExamples, options => options.WithStrictOrdering()); + beforeSense.ExampleSentences.Should().BeEquivalentTo(beforeExamples, options => options + .WithStrictOrdering() + .Excluding(s => s.Order)); if (!ApiUsesImplicitOrdering) { beforeSense.ExampleSentences.Select(s => s.Order).Should() @@ -309,7 +313,8 @@ public async Task UpdateEntry_CanReorderComponents(string before, string after, // sanity checks beforeEntry.Components.Should().BeEquivalentTo(beforeComponents, options => options .WithStrictOrdering() - .Excluding(c => c.Id)); + .Excluding(c => c.Id) + .Excluding(c => c.Order)); if (!ApiUsesImplicitOrdering) { beforeEntry.Components.Select(s => s.Order).Should() From 5ed1c0001f61e84a87f8bb0a5b669052bad662cd Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 7 May 2026 14:01:16 +0200 Subject: [PATCH 03/12] Rename normalization wrappers and consolidate wrapper stack (#2259) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add CustomView forwarding to normalization wrapper; apply write normalization to all user-facing entry points - MiniLcmWriteApiNormalizationWrapper: add #region CustomView with pass-through CreateCustomView/UpdateCustomView/DeleteCustomView (no normalization needed; Name is view configuration metadata, not linguistic user text) - MiniLcmApiHubBase: make MiniLcmWriteApiNormalizationWrapperFactory non-nullable - FwDataMiniLcmHub: inject and pass write normalization wrapper (FwData now uses it too, for consistent forwarding-new-objects behaviour) - MiniLcmRoutes: apply read + write normalization wrappers via WrapWith, matching MiniLcmJsInvokable / MiniLcmApiHubBase pattern - CrdtFwdataProjectSyncService: add comment documenting that write normalization is intentionally not applied during sync https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Introduce MiniLcmApiUserFacingWrappers to centralise wrapper ordering and rationale - New MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs groups the three standard factories (read normalisation → write normalisation → validation) with a doc comment explaining why write normalisation is applied to both CRDT and FwData backends, and an Apply(api, project, params innerWrappers) method for callers that need extras (e.g. MiniLcmJsInvokable adds the notification wrapper) - Registered via AddMiniLcmValidators() - All user-facing entry points simplified to inject/call MiniLcmApiUserFacingWrappers: MiniLcmJsInvokable, MiniLcmApiHubBase, CrdtMiniLcmApiHub, FwDataMiniLcmHub, MiniLcmRoutes - CrdtFwdataProjectSyncService: remove read normalisation wrapper (data is already normalised on both sides); simplify comment - CustomView wrapper comment broadened from Name-specific to the whole object https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Fix MiniLcmApiUserFacingWrappers comment: explain transitive normalisation, drop api??this jargon https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Correct MiniLcmApiUserFacingWrappers comment: upfront normalisation covers all sub-objects The "transitive coverage" claim was wrong: NormalizeEntry normalises the full object tree before forwarding, so sub-objects from the sync logic are already normalised. The real reason for uniform FwData normalisation is a simpler invariant, not coverage. https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Don't pass 'this' in normalization wrapper; note new-instance behaviour in comment Upfront normalisation covers the full object tree, so sub-operations triggered by Update* sync logic use data that is already normalised. Passing 'this' back into the recursion caused a redundant second normalisation pass with no benefit. Now the wrapper forwards 'api' as-is; the inner validation wrapper still inserts itself for sub-calls. Also adds a note to MiniLcmApiUserFacingWrappers that the wrapper produces new instances, so the caller's original is never mutated. https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Remove ! on projectIdentifier; null-guard Normalize* helpers against bad JSON input Item 1: MiniLcmApiHubBase now takes non-nullable IProjectIdentifier; the null-forgiving operator is gone. FwDataMiniLcmHub throws a clear InvalidOperationException at connection time if context.Project is null rather than silently propagating null through !. Item 3: NormalizeEntry/NormalizeSense/NormalizeExampleSentence now use ?? new() for MultiString/RichMultiString fields and ?? [] for collection fields. JSON deserialization from external input can produce null values at runtime despite non-nullable annotations and = new() initializers; the guards prevent a 500 NRE and let the validation layer downstream report the problem properly. https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Move write normalisation after validation; revert null guards Validation now runs before write normalisation (read norm → validation → write norm). Bad input is rejected with a 400 before the normaliser sees it; the normaliser can therefore assume structurally valid data and needs no null guards of its own. Reverts the ?? new() / ?? [] guards added in the previous commit. https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Add inline comment explaining validation-before-write-normalisation ordering https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH * Rename normalization wrappers for consistent naming MiniLcmApiStringNormalizationWrapper → MiniLcmApiQueryNormalizationWrapper MiniLcmWriteApiNormalizationWrapper → MiniLcmApiWriteNormalizationWrapper Both now follow the MiniLcmApi* prefix convention used across all wrapper factories. "Query" better describes the role of the read-side wrapper (normalises search/exemplar query parameters, not all strings). https://claude.ai/code/session_01LN7JMAeHPcoXp7r7xVR8FH --------- Co-authored-by: Claude --- .../CrdtFwdataProjectSyncService.cs | 10 ++--- .../Services/MiniLcmJsInvokable.cs | 13 +----- .../FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs | 9 ++-- .../FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs | 10 ++--- .../FwLiteWeb/Hubs/MiniLcmApiHubBase.cs | 16 ++----- .../FwLite/FwLiteWeb/Routes/MiniLcmRoutes.cs | 11 +++-- .../FwLite/MiniLcm.Tests/MiniLcmTestBase.cs | 4 +- .../MiniLcm.Tests/NormalizationTests.cs | 12 ++--- .../MiniLcm.Tests/WriteNormalizationTests.cs | 4 +- ...=> MiniLcmApiQueryNormalizationWrapper.cs} | 6 +-- ...=> MiniLcmApiWriteNormalizationWrapper.cs} | 44 ++++++++++++++----- .../MiniLcm/Validators/MiniLcmValidators.cs | 6 ++- .../Wrappers/MiniLcmApiUserFacingWrappers.cs | 34 ++++++++++++++ 13 files changed, 105 insertions(+), 74 deletions(-) rename backend/FwLite/MiniLcm/Normalization/{MiniLcmApiStringNormalizationWrapper.cs => MiniLcmApiQueryNormalizationWrapper.cs} (86%) rename backend/FwLite/MiniLcm/Normalization/{MiniLcmWriteApiNormalizationWrapper.cs => MiniLcmApiWriteNormalizationWrapper.cs} (95%) create mode 100644 backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs diff --git a/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs b/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs index c760dffb27..813422346e 100644 --- a/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs +++ b/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs @@ -4,7 +4,6 @@ using LexCore.Sync; using Microsoft.Extensions.Logging; using MiniLcm; -using MiniLcm.Normalization; using MiniLcm.SyncHelpers; using MiniLcm.Validators; @@ -12,8 +11,7 @@ namespace FwLiteProjectSync; public class CrdtFwdataProjectSyncService(MiniLcmImport miniLcmImport, ILogger logger, - MiniLcmApiValidationWrapperFactory validationWrapperFactory, - MiniLcmApiStringNormalizationWrapperFactory normalizationWrapperFactory) + MiniLcmApiValidationWrapperFactory validationWrapperFactory) { public record DryRunSyncResult( int CrdtChanges, @@ -63,8 +61,10 @@ private async Task SyncOrImportInternal(IMiniLcmApi crdtApi, IMiniLc throw new InvalidOperationException("Project sync state does not match presence of snapshot."); } - crdtApi = normalizationWrapperFactory.Create(validationWrapperFactory.Create(crdtApi)); - fwdataApi = normalizationWrapperFactory.Create(validationWrapperFactory.Create(fwdataApi)); + // Write normalisation is not applied here: data is already normalised on both sides + // (FwData internally by LibLCM; CRDT at the user-facing entry points before persistence). + crdtApi = validationWrapperFactory.Create(crdtApi); + fwdataApi = validationWrapperFactory.Create(fwdataApi); if (dryRun) { diff --git a/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs b/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs index db81eaa7e3..a339270bbb 100644 --- a/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs +++ b/backend/FwLite/FwLiteShared/Services/MiniLcmJsInvokable.cs @@ -5,8 +5,6 @@ using MiniLcm; using MiniLcm.Media; using MiniLcm.Models; -using MiniLcm.Normalization; -using MiniLcm.Validators; using MiniLcm.Wrappers; using Reinforced.Typings.Attributes; @@ -18,17 +16,10 @@ public class MiniLcmJsInvokable( IProjectIdentifier project, ILogger logger, MiniLcmApiNotifyWrapperFactory notificationWrapperFactory, - MiniLcmApiValidationWrapperFactory validationWrapperFactory, - MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory, - MiniLcmWriteApiNormalizationWrapperFactory writeNormalizationWrapperFactory + MiniLcmApiUserFacingWrappers userFacingWrappers ) : IDisposable { - private readonly IMiniLcmApi _wrappedApi = api.WrapWith([ - readNormalizationWrapperFactory, - writeNormalizationWrapperFactory, // normalize data before validating - validationWrapperFactory, - notificationWrapperFactory, - ], project); + private readonly IMiniLcmApi _wrappedApi = userFacingWrappers.Apply(api, project, notificationWrapperFactory); public record MiniLcmFeatures(bool? History, bool? Write, bool? OpenWithFlex, bool? Feedback, bool? Sync, bool? Audio, bool? CustomViews); private bool SupportsSync => project.DataFormat == ProjectDataFormat.Harmony && api is CrdtMiniLcmApi; diff --git a/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs b/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs index 50324210c2..f8559f6ee0 100644 --- a/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs +++ b/backend/FwLite/FwLiteWeb/Hubs/CrdtMiniLcmApiHub.cs @@ -7,8 +7,7 @@ using Microsoft.Extensions.Caching.Memory; using MiniLcm; using MiniLcm.Models; -using MiniLcm.Normalization; -using MiniLcm.Validators; +using MiniLcm.Wrappers; using SystemTextJsonPatch; namespace FwLiteWeb.Hubs; @@ -22,10 +21,8 @@ public class CrdtMiniLcmApiHub( LexboxProjectService lexboxProjectService, IMemoryCache memoryCache, IHubContext hubContext, - MiniLcmApiValidationWrapperFactory validationWrapperFactory, - MiniLcmWriteApiNormalizationWrapperFactory writeNormalizationWrapperFactory, - MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory -) : MiniLcmApiHubBase(miniLcmApi, validationWrapperFactory, readNormalizationWrapperFactory, writeNormalizationWrapperFactory, projectContext.Project) + MiniLcmApiUserFacingWrappers userFacingWrappers +) : MiniLcmApiHubBase(miniLcmApi, userFacingWrappers, projectContext.Project) { public const string ProjectRouteKey = "project"; public static string ProjectGroup(string projectName) => "crdt-" + projectName; diff --git a/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs b/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs index 05e6de79a5..d71735f7fd 100644 --- a/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs +++ b/backend/FwLite/FwLiteWeb/Hubs/FwDataMiniLcmHub.cs @@ -1,7 +1,6 @@ using FwDataMiniLcmBridge; using MiniLcm; -using MiniLcm.Normalization; -using MiniLcm.Validators; +using MiniLcm.Wrappers; using SIL.LCModel; namespace FwLiteWeb.Hubs; @@ -11,12 +10,9 @@ public class FwDataMiniLcmHub( IMiniLcmApi miniLcmApi, FwDataFactory fwDataFactory, FwDataProjectContext context, - MiniLcmApiValidationWrapperFactory validationWrapperFactory, - MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory + MiniLcmApiUserFacingWrappers userFacingWrappers ) -// Note: FwData already handles string normalization internally (via LCModel), -// so we skip the write normalization wrapper for FwData APIs. -: MiniLcmApiHubBase(miniLcmApi, validationWrapperFactory, readNormalizationWrapperFactory, null, context.Project) +: MiniLcmApiHubBase(miniLcmApi, userFacingWrappers, context.Project ?? throw new InvalidOperationException("No project is set in the context.")) { public const string ProjectRouteKey = "fwdata"; public override async Task OnConnectedAsync() diff --git a/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs b/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs index 3936cea78f..071a1fc338 100644 --- a/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs +++ b/backend/FwLite/FwLiteWeb/Hubs/MiniLcmApiHubBase.cs @@ -1,25 +1,17 @@ using Microsoft.AspNetCore.SignalR; using MiniLcm; using MiniLcm.Models; -using MiniLcm.Normalization; -using MiniLcm.Validators; -using SystemTextJsonPatch; using MiniLcm.Wrappers; +using SystemTextJsonPatch; namespace FwLiteWeb.Hubs; public abstract class MiniLcmApiHubBase( IMiniLcmApi miniLcmApi, - MiniLcmApiValidationWrapperFactory validationWrapperFactory, - MiniLcmApiStringNormalizationWrapperFactory readNormalizationWrapperFactory, - MiniLcmWriteApiNormalizationWrapperFactory? writeNormalizationWrapperFactory, - IProjectIdentifier? projectIdentifier) : Hub + MiniLcmApiUserFacingWrappers userFacingWrappers, + IProjectIdentifier projectIdentifier) : Hub { - private readonly IMiniLcmApi _miniLcmApi = miniLcmApi.WrapWith([ - readNormalizationWrapperFactory, - writeNormalizationWrapperFactory, // normalize data before validating - validationWrapperFactory, - ], projectIdentifier!); + private readonly IMiniLcmApi _miniLcmApi = userFacingWrappers.Apply(miniLcmApi, projectIdentifier); public async Task GetWritingSystems() { diff --git a/backend/FwLite/FwLiteWeb/Routes/MiniLcmRoutes.cs b/backend/FwLite/FwLiteWeb/Routes/MiniLcmRoutes.cs index 8e84c19d4f..6161cc51bc 100644 --- a/backend/FwLite/FwLiteWeb/Routes/MiniLcmRoutes.cs +++ b/backend/FwLite/FwLiteWeb/Routes/MiniLcmRoutes.cs @@ -7,7 +7,7 @@ using MiniLcm.Filtering; using MiniLcm.Models; using MiniLcm.Project; -using MiniLcm.Validators; +using MiniLcm.Wrappers; namespace FwLiteWeb.Routes; @@ -86,12 +86,11 @@ public static IEndpointConventionBuilder MapMiniLcmRoutes(this IEndpointRouteBui return Results.Problem($"Project {projectCode} not found"); } - var validationWrapperFactory = context.HttpContext.RequestServices - .GetRequiredService(); + var services = context.HttpContext.RequestServices; + var userFacingWrappers = services.GetRequiredService(); - miniLcmHolder.MiniLcmApi = validationWrapperFactory.Create( - await projectProvider.OpenProject(project, context.HttpContext.RequestServices) - ); + miniLcmHolder.MiniLcmApi = userFacingWrappers.Apply( + await projectProvider.OpenProject(project, services), project); return await next(context); }); diff --git a/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs b/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs index 085a1dd952..09b0dd50c9 100644 --- a/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs +++ b/backend/FwLite/MiniLcm.Tests/MiniLcmTestBase.cs @@ -18,8 +18,8 @@ public virtual async Task InitializeAsync() BaseApi = await NewApi(); BaseApi.Should().NotBeNull(); Api = BaseApi.WrapWith([ - new MiniLcmApiStringNormalizationWrapperFactory(), - new MiniLcmWriteApiNormalizationWrapperFactory(), + new MiniLcmApiQueryNormalizationWrapperFactory(), + new MiniLcmApiWriteNormalizationWrapperFactory(), ], null!); Api.Should().NotBeNull(); } diff --git a/backend/FwLite/MiniLcm.Tests/NormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/NormalizationTests.cs index 4feff444c8..8038352c71 100644 --- a/backend/FwLite/MiniLcm.Tests/NormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/NormalizationTests.cs @@ -33,14 +33,14 @@ public NormalizationTests() { MockApi = Mock.Of(); // Mock.Get(MockApi).Setup(api => api.SearchEntries(It.IsAny(), null)).Returns(new List().ToAsyncEnumerable()); - var factory = new MiniLcmApiStringNormalizationWrapperFactory(); + var factory = new MiniLcmApiQueryNormalizationWrapperFactory(); NormalizingApi = factory.Create(MockApi); } [Fact] public void SearchEntriesIsNormalized() { - NormalizingApi.Should().BeOfType(); + NormalizingApi.Should().BeOfType(); var results = NormalizingApi.SearchEntries(NFCString, null); Mock.Get(MockApi).Verify(api => api.SearchEntries(NFDString, null)); } @@ -48,7 +48,7 @@ public void SearchEntriesIsNormalized() [Fact] public void SearchEntriesWithQueryOptionsAreNormalized() { - NormalizingApi.Should().BeOfType(); + NormalizingApi.Should().BeOfType(); var results = NormalizingApi.SearchEntries(NFCString, NFCQueryOptions); Mock.Get(MockApi).Verify(api => api.SearchEntries(NFDString, It.Is( opt => opt.Exemplar!.Value == NFDOptions.Exemplar!.Value && @@ -58,7 +58,7 @@ public void SearchEntriesWithQueryOptionsAreNormalized() [Fact] public void CountEntriesIsNormalized() { - NormalizingApi.Should().BeOfType(); + NormalizingApi.Should().BeOfType(); var results = NormalizingApi.CountEntries(NFCString, null); Mock.Get(MockApi).Verify(api => api.CountEntries(NFDString, null)); } @@ -66,7 +66,7 @@ public void CountEntriesIsNormalized() [Fact] public void CountEntriesWithFilterQueryOptionsIsNormalized() { - NormalizingApi.Should().BeOfType(); + NormalizingApi.Should().BeOfType(); var results = NormalizingApi.CountEntries(NFCString, NFCOptions); Mock.Get(MockApi).Verify(api => api.CountEntries(NFDString, It.Is( opt => opt.Exemplar!.Value == NFDOptions.Exemplar!.Value && @@ -76,7 +76,7 @@ public void CountEntriesWithFilterQueryOptionsIsNormalized() [Fact] public void GetEntriesIsNormalized() { - NormalizingApi.Should().BeOfType(); + NormalizingApi.Should().BeOfType(); var results = NormalizingApi.GetEntries(NFCQueryOptions); Mock.Get(MockApi).Verify(api => api.GetEntries(It.Is( opt => opt.Exemplar!.Value == NFDOptions.Exemplar!.Value && diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index a96e4ed92c..3e7bf20c22 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -11,7 +11,7 @@ namespace MiniLcm.Tests; #pragma warning disable IDE0022 // Use block body for method /// -/// Tests for the MiniLcmWriteApiNormalizationWrapper. +/// Tests for the MiniLcmApiWriteNormalizationWrapper. /// These tests verify that all user-entered text is normalized to NFD on write operations. /// public class WriteNormalizationTests @@ -22,7 +22,7 @@ public class WriteNormalizationTests public WriteNormalizationTests() { _mockApi = Mock.Of(); - var factory = new MiniLcmWriteApiNormalizationWrapperFactory(); + var factory = new MiniLcmApiWriteNormalizationWrapperFactory(); _normalizingApi = factory.Create(_mockApi); } diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiStringNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiQueryNormalizationWrapper.cs similarity index 86% rename from backend/FwLite/MiniLcm/Normalization/MiniLcmApiStringNormalizationWrapper.cs rename to backend/FwLite/MiniLcm/Normalization/MiniLcmApiQueryNormalizationWrapper.cs index ea756ef69d..a0bd5ec2fb 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiStringNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiQueryNormalizationWrapper.cs @@ -4,17 +4,17 @@ namespace MiniLcm.Normalization; -public class MiniLcmApiStringNormalizationWrapperFactory() : IMiniLcmWrapperFactory +public class MiniLcmApiQueryNormalizationWrapperFactory() : IMiniLcmWrapperFactory { public IMiniLcmApi Create(IMiniLcmApi api, IProjectIdentifier _unused) => Create(api); public IMiniLcmApi Create(IMiniLcmApi api) { - return new MiniLcmApiStringNormalizationWrapper(api); + return new MiniLcmApiQueryNormalizationWrapper(api); } } -public partial class MiniLcmApiStringNormalizationWrapper( +public partial class MiniLcmApiQueryNormalizationWrapper( IMiniLcmApi api) : IMiniLcmApi { public const NormalizationForm Form = NormalizationForm.FormD; diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmWriteApiNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs similarity index 95% rename from backend/FwLite/MiniLcm/Normalization/MiniLcmWriteApiNormalizationWrapper.cs rename to backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs index 047fc07aee..ba5f3c8b43 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmWriteApiNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs @@ -7,7 +7,7 @@ namespace MiniLcm.Normalization; -public class MiniLcmWriteApiNormalizationWrapperFactory : IMiniLcmWrapperFactory +public class MiniLcmApiWriteNormalizationWrapperFactory : IMiniLcmWrapperFactory { public IMiniLcmApi Create(IMiniLcmApi api, IProjectIdentifier _unused) { @@ -17,7 +17,7 @@ public IMiniLcmApi Create(IMiniLcmApi api, IProjectIdentifier _unused) public IMiniLcmApi Create(IMiniLcmApi api) { - return new MiniLcmWriteApiNormalizationWrapper(api); + return new MiniLcmApiWriteNormalizationWrapper(api); } } @@ -31,7 +31,7 @@ public IMiniLcmApi Create(IMiniLcmApi api) /// JsonElement values are only normalized when they are simple strings; complex JSON values are left as-is /// to avoid guessing the target type. /// -public partial class MiniLcmWriteApiNormalizationWrapper(IMiniLcmApi api) : IMiniLcmApi +public partial class MiniLcmApiWriteNormalizationWrapper(IMiniLcmApi api) : IMiniLcmApi { private readonly IMiniLcmApi _api = api; @@ -60,7 +60,7 @@ public Task UpdateWritingSystem(WritingSystemId id, WritingSystem public async Task UpdateWritingSystem(WritingSystem before, WritingSystem after, IMiniLcmApi? api = null) { - return await _api.UpdateWritingSystem(before, NormalizeWritingSystem(after), api ?? this); + return await _api.UpdateWritingSystem(before, NormalizeWritingSystem(after), api); } public Task MoveWritingSystem(WritingSystemId id, WritingSystemType type, BetweenPosition between) @@ -96,7 +96,7 @@ public Task UpdatePartOfSpeech(Guid id, UpdateObjectInput UpdatePartOfSpeech(PartOfSpeech before, PartOfSpeech after, IMiniLcmApi? api = null) { - return await _api.UpdatePartOfSpeech(before, NormalizePartOfSpeech(after), api ?? this); + return await _api.UpdatePartOfSpeech(before, NormalizePartOfSpeech(after), api); } public Task DeletePartOfSpeech(Guid id) @@ -132,7 +132,7 @@ public Task UpdatePublication(Guid id, UpdateObjectInput UpdatePublication(Publication before, Publication after, IMiniLcmApi? api = null) { - return await _api.UpdatePublication(before, NormalizePublication(after), api ?? this); + return await _api.UpdatePublication(before, NormalizePublication(after), api); } public Task DeletePublication(Guid id) @@ -167,7 +167,7 @@ public Task UpdateSemanticDomain(Guid id, UpdateObjectInput UpdateSemanticDomain(SemanticDomain before, SemanticDomain after, IMiniLcmApi? api = null) { - return await _api.UpdateSemanticDomain(before, NormalizeSemanticDomain(after), api ?? this); + return await _api.UpdateSemanticDomain(before, NormalizeSemanticDomain(after), api); } public Task DeleteSemanticDomain(Guid id) @@ -204,7 +204,7 @@ public Task UpdateComplexFormType(Guid id, UpdateObjectInput UpdateComplexFormType(ComplexFormType before, ComplexFormType after, IMiniLcmApi? api = null) { - return await _api.UpdateComplexFormType(before, NormalizeComplexFormType(after), api ?? this); + return await _api.UpdateComplexFormType(before, NormalizeComplexFormType(after), api); } public Task DeleteComplexFormType(Guid id) @@ -237,7 +237,7 @@ public Task UpdateMorphTypeData(Guid id, UpdateObjectInput UpdateMorphTypeData(MorphTypeData before, MorphTypeData after, IMiniLcmApi? api = null) { - return await _api.UpdateMorphTypeData(before, NormalizeMorphTypeData(after), api ?? this); + return await _api.UpdateMorphTypeData(before, NormalizeMorphTypeData(after), api); } public Task DeleteMorphTypeData(Guid id) @@ -278,7 +278,7 @@ public Task UpdateEntry(Guid id, UpdateObjectInput update) public async Task UpdateEntry(Entry before, Entry after, IMiniLcmApi? api = null) { - return await _api.UpdateEntry(before, NormalizeEntry(after), api ?? this); + return await _api.UpdateEntry(before, NormalizeEntry(after), api); } public Task DeleteEntry(Guid id) @@ -366,7 +366,7 @@ public Task UpdateSense(Guid entryId, Guid senseId, UpdateObjectInput UpdateSense(Guid entryId, Sense before, Sense after, IMiniLcmApi? api = null) { - return await _api.UpdateSense(entryId, before, NormalizeSense(after), api ?? this); + return await _api.UpdateSense(entryId, before, NormalizeSense(after), api); } public Task MoveSense(Guid entryId, Guid senseId, BetweenPosition position) @@ -428,7 +428,7 @@ public Task UpdateExampleSentence(Guid entryId, Guid senseId, G public async Task UpdateExampleSentence(Guid entryId, Guid senseId, ExampleSentence before, ExampleSentence after, IMiniLcmApi? api = null) { - return await _api.UpdateExampleSentence(entryId, senseId, before, NormalizeExampleSentence(after), api ?? this); + return await _api.UpdateExampleSentence(entryId, senseId, before, NormalizeExampleSentence(after), api); } public Task MoveExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId, BetweenPosition position) @@ -506,6 +506,26 @@ private static async IAsyncEnumerable NormalizeStream( #endregion + #region CustomView + + // CustomView data is view configuration, not user-entered linguistic text, so no normalization is applied. + public async Task CreateCustomView(CustomView customView) + { + return await _api.CreateCustomView(customView); + } + + public async Task UpdateCustomView(CustomView customView) + { + return await _api.UpdateCustomView(customView); + } + + public Task DeleteCustomView(Guid id) + { + return _api.DeleteCustomView(id); + } + + #endregion + #region File Operations public Task SaveFile(Stream stream, LcmFileMetadata metadata) diff --git a/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs b/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs index d9e3ce2fb4..ae5468250a 100644 --- a/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs +++ b/backend/FwLite/MiniLcm/Validators/MiniLcmValidators.cs @@ -2,6 +2,7 @@ using Microsoft.Extensions.DependencyInjection; using MiniLcm.Models; using MiniLcm.Normalization; +using MiniLcm.Wrappers; namespace MiniLcm.Validators; @@ -91,8 +92,9 @@ public static IServiceCollection AddMiniLcmValidators(this IServiceCollection se services.AddTransient, PublicationValidator>(); services.AddTransient>, MorphTypeUpdateValidator>(); services.AddTransient>, WritingSystemUpdateValidator>(); - services.AddTransient(); - services.AddTransient(); + services.AddTransient(); + services.AddTransient(); + services.AddTransient(); return services; } } diff --git a/backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs b/backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs new file mode 100644 index 0000000000..50dcb63998 --- /dev/null +++ b/backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs @@ -0,0 +1,34 @@ +using MiniLcm.Normalization; +using MiniLcm.Validators; + +namespace MiniLcm.Wrappers; + +/// +/// The standard wrapper stack applied at every user-facing API entry point: +/// MiniLcmJsInvokable, MiniLcmApiHubBase, and MiniLcmRoutes. +/// +/// Call order: read normalisation → validation → write normalisation → raw API. +/// Validation runs before write normalisation so that bad input is rejected with a +/// meaningful error before the normaliser sees it; the normaliser can therefore assume +/// structurally valid data. Write normalisation is applied uniformly to both CRDT and +/// FwData: for CRDT it ensures NFD storage; for FwData, LibLCM also normalises +/// internally, but the uniform approach avoids backend-specific reasoning. FwData is +/// desktop-only, so the redundant pass is inconsequential. +/// The wrapper creates new instances of every normalised object, so the caller's +/// original is never mutated and the inner API always receives a clean copy. +/// +public class MiniLcmApiUserFacingWrappers( + MiniLcmApiQueryNormalizationWrapperFactory readNormalization, + MiniLcmApiWriteNormalizationWrapperFactory writeNormalization, + MiniLcmApiValidationWrapperFactory validation) +{ + /// + /// Additional factories appended after write normalisation (i.e. applied innermost, closest to the raw API). + /// + public IMiniLcmApi Apply(IMiniLcmApi api, IProjectIdentifier project, params IMiniLcmWrapperFactory?[] innerWrappers) + { + // Validation before write normalisation: bad input is rejected with a clear error + // before the normaliser sees it, so the normaliser can assume structurally valid data. + return api.WrapWith([readNormalization, validation, writeNormalization, ..innerWrappers], project); + } +} From 3577c20462c608a92ae467ec45605ee1561fdddf Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 7 May 2026 14:58:11 +0200 Subject: [PATCH 04/12] Refactor and fix build --- .../CrdtFwdataProjectSyncService.cs | 4 ++-- .../Wrappers/MiniLcmApiUserFacingWrappers.cs | 24 +++++++------------ .../MiniLcm/Wrappers/MiniLcmWrappers.cs | 4 ++-- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs b/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs index 813422346e..588803caba 100644 --- a/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs +++ b/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs @@ -61,8 +61,8 @@ private async Task SyncOrImportInternal(IMiniLcmApi crdtApi, IMiniLc throw new InvalidOperationException("Project sync state does not match presence of snapshot."); } - // Write normalisation is not applied here: data is already normalised on both sides - // (FwData internally by LibLCM; CRDT at the user-facing entry points before persistence). + // No write normalization: Data is already normalised on both sides. + // No query normalization: The sync doesn't do any querying. crdtApi = validationWrapperFactory.Create(crdtApi); fwdataApi = validationWrapperFactory.Create(fwdataApi); diff --git a/backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs b/backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs index 50dcb63998..43ddef6913 100644 --- a/backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs +++ b/backend/FwLite/MiniLcm/Wrappers/MiniLcmApiUserFacingWrappers.cs @@ -1,3 +1,4 @@ +using MiniLcm.Models; using MiniLcm.Normalization; using MiniLcm.Validators; @@ -6,29 +7,20 @@ namespace MiniLcm.Wrappers; /// /// The standard wrapper stack applied at every user-facing API entry point: /// MiniLcmJsInvokable, MiniLcmApiHubBase, and MiniLcmRoutes. -/// -/// Call order: read normalisation → validation → write normalisation → raw API. -/// Validation runs before write normalisation so that bad input is rejected with a -/// meaningful error before the normaliser sees it; the normaliser can therefore assume -/// structurally valid data. Write normalisation is applied uniformly to both CRDT and -/// FwData: for CRDT it ensures NFD storage; for FwData, LibLCM also normalises -/// internally, but the uniform approach avoids backend-specific reasoning. FwData is -/// desktop-only, so the redundant pass is inconsequential. -/// The wrapper creates new instances of every normalised object, so the caller's -/// original is never mutated and the inner API always receives a clean copy. /// public class MiniLcmApiUserFacingWrappers( MiniLcmApiQueryNormalizationWrapperFactory readNormalization, MiniLcmApiWriteNormalizationWrapperFactory writeNormalization, MiniLcmApiValidationWrapperFactory validation) { - /// - /// Additional factories appended after write normalisation (i.e. applied innermost, closest to the raw API). - /// - public IMiniLcmApi Apply(IMiniLcmApi api, IProjectIdentifier project, params IMiniLcmWrapperFactory?[] innerWrappers) + public IMiniLcmApi Apply(IMiniLcmApi api, IProjectIdentifier project, params IMiniLcmWrapperFactory[] innerWrappers) { // Validation before write normalisation: bad input is rejected with a clear error - // before the normaliser sees it, so the normaliser can assume structurally valid data. - return api.WrapWith([readNormalization, validation, writeNormalization, ..innerWrappers], project); + // before the normaliser sees it, so the normaliser can assume structurally valid data.Write normalisation is applied uniformly to both CRDT and FwData: + // It's redundant for FwData (because LibLCM also normalises internally), + // but the uniformity is simpler and the normaliser creates new instances when it normalises. + // We want that behaviour to always be in place for simplicity. + // FwData is desktop-only, so the redundant pass is inconsequential cost-wise. + return api.WrapWith([readNormalization, validation, writeNormalization, .. innerWrappers], project); } } diff --git a/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs b/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs index a24d7b4cc4..d2e33b52b1 100644 --- a/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs +++ b/backend/FwLite/MiniLcm/Wrappers/MiniLcmWrappers.cs @@ -13,10 +13,10 @@ public static class MiniLcmWrapperExtensions /// Wraps with in runtime call order /// (outermost to innermost). The first factory becomes the outermost wrapper. /// - public static IMiniLcmApi WrapWith(this IMiniLcmApi api, IEnumerable factories, IProjectIdentifier project) + public static IMiniLcmApi WrapWith(this IMiniLcmApi api, IEnumerable factories, IProjectIdentifier project) { var wrappedApi = api; - foreach (var factory in factories.Reverse().Where(f => f != null).Cast()) + foreach (var factory in factories.Reverse()) { wrappedApi = factory.Create(wrappedApi, project); } From 938c72402d86718e96a28e26f3e8976d46e48c5b Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 7 May 2026 15:24:18 +0200 Subject: [PATCH 05/12] Exclude some things from normalization --- .../MiniLcm.Tests/WriteNormalizationTests.cs | 142 ++++++++++++++---- .../MiniLcmApiWriteNormalizationWrapper.cs | 44 +++--- 2 files changed, 131 insertions(+), 55 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index 3e7bf20c22..9d52b3803d 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -43,34 +43,51 @@ private static void AssertAllNfd(IEnumerable values) #region WritingSystem Tests + // WritingSystem.{Name, Abbreviation, Font, Exemplars} are plain strings; in liblcm they are + // LDML-managed by WritingSystemManager rather than stored as TsString, so the wrapper passes + // them through unchanged (no NFD normalization). + [Fact] - public async Task CreateWritingSystem_NormalizesToNfd() + public async Task CreateWritingSystem_DoesNotNormalize() { var ws = NfcTestData.CreateNfcWritingSystem(); AssertNfc(ws); + WritingSystem? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateWritingSystem(It.IsAny(), It.IsAny?>())) + .Callback?>((w, _) => captured = w) + .ReturnsAsync(ws); + await _normalizingApi.CreateWritingSystem(ws); - Mock.Get(_mockApi).Verify(api => api.CreateWritingSystem( - It.Is(w => IsNfd(w)), - null - )); + captured.Should().NotBeNull(); + captured.Name.Should().Be(NfcTestData.Nfc); + captured.Abbreviation.Should().Be(NfcTestData.Nfc); + captured.Font.Should().Be(NfcTestData.Nfc); + captured.Exemplars.Should().Equal(NfcTestData.Nfc, NfcTestData.Nfc); } [Fact] - public async Task UpdateWritingSystem_BeforeAfter_NormalizesToNfd() + public async Task UpdateWritingSystem_BeforeAfter_DoesNotNormalize() { var before = NfcTestData.CreateNfcWritingSystem(); var after = NfcTestData.CreateNfcWritingSystem(); AssertNfc(after); + WritingSystem? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateWritingSystem(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) + .ReturnsAsync(after); + await _normalizingApi.UpdateWritingSystem(before, after); - Mock.Get(_mockApi).Verify(api => api.UpdateWritingSystem( - It.IsAny(), - It.Is(w => IsNfd(w)), - It.IsAny() - )); + captured.Should().NotBeNull(); + captured.Name.Should().Be(NfcTestData.Nfc); + captured.Abbreviation.Should().Be(NfcTestData.Nfc); + captured.Font.Should().Be(NfcTestData.Nfc); + captured.Exemplars.Should().Equal(NfcTestData.Nfc, NfcTestData.Nfc); } #endregion @@ -243,33 +260,72 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() #region MorphTypeData Tests + // MorphTypeData's MultiString/RichMultiString fields (Name, Abbreviation, Description) are normalized + // to NFD (matching liblcm); LeadingToken/TrailingToken are punctuation markers and pass through unchanged. + [Fact] - public async Task CreateMorphTypeData_NormalizesToNfd() + public async Task CreateMorphTypeData_NormalizesMultiStringsToNfd_AndPassesTokensThrough() { var mtd = NfcTestData.CreateNfcMorphTypeData(); AssertNfc(mtd); + MorphTypeData? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateMorphTypeData(It.IsAny())) + .Callback(m => captured = m) + .ReturnsAsync(mtd); + await _normalizingApi.CreateMorphTypeData(mtd); - Mock.Get(_mockApi).Verify(api => api.CreateMorphTypeData( - It.Is(m => IsNfd(m)) - )); + captured.Should().NotBeNull(); + IsNfd(captured).Should().BeTrue(); + captured.LeadingToken.Should().Be(NfcTestData.Nfc); + captured.TrailingToken.Should().Be(NfcTestData.Nfc); } [Fact] - public async Task UpdateMorphTypeData_BeforeAfter_NormalizesToNfd() + public async Task UpdateMorphTypeData_BeforeAfter_NormalizesMultiStringsToNfd_AndPassesTokensThrough() { var before = NfcTestData.CreateNfcMorphTypeData(); var after = NfcTestData.CreateNfcMorphTypeData(); AssertNfc(after); + MorphTypeData? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateMorphTypeData(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) + .ReturnsAsync(after); + await _normalizingApi.UpdateMorphTypeData(before, after); - Mock.Get(_mockApi).Verify(api => api.UpdateMorphTypeData( - It.IsAny(), - It.Is(m => IsNfd(m)), - It.IsAny() - )); + captured.Should().NotBeNull(); + IsNfd(captured).Should().BeTrue(); + captured.LeadingToken.Should().Be(NfcTestData.Nfc); + captured.TrailingToken.Should().Be(NfcTestData.Nfc); + } + + [Fact] + public async Task UpdateMorphTypeData_JsonPatch_NormalizesMultiString_AndPassesTokensThrough() + { + var update = new UpdateObjectInput() + .Set(m => m.Name, NfcTestData.CreateNfcMultiString()) + .Set(m => m.LeadingToken, NfcTestData.Nfc) + .Set(m => m.TrailingToken, NfcTestData.Nfc); + + UpdateObjectInput? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateMorphTypeData(It.IsAny(), It.IsAny>())) + .Callback>((_, patch) => captured = patch) + .ReturnsAsync(NfcTestData.CreateNfcMorphTypeData()); + + await _normalizingApi.UpdateMorphTypeData(Guid.NewGuid(), update); + + captured.Should().NotBeNull(); + var ops = captured.Patch.Operations; + var nameValue = ops.Single(o => o.Path == "/Name").Value.Should().BeOfType().Subject; + NormalizationAssert.AssertAllNfd(nameValue); + ops.Single(o => o.Path == "/LeadingToken").Value.Should().Be(NfcTestData.Nfc); + ops.Single(o => o.Path == "/TrailingToken").Value.Should().Be(NfcTestData.Nfc); } #endregion @@ -402,9 +458,10 @@ public async Task UpdateEntry_JsonPatch_NormalizesJsonElementString() } [Fact] - public async Task UpdateWritingSystem_JsonPatch_NormalizesStringArray() + public async Task UpdateWritingSystem_JsonPatch_DoesNotNormalize() { var update = new UpdateObjectInput() + .Set(ws => ws.Name, NfcTestData.Nfc) .Set(ws => ws.Exemplars, [NfcTestData.Nfc, NfcTestData.Nfc]); UpdateObjectInput? captured = null; @@ -417,9 +474,12 @@ public async Task UpdateWritingSystem_JsonPatch_NormalizesStringArray() await _normalizingApi.UpdateWritingSystem("en", WritingSystemType.Analysis, update); - captured.Should().NotBeNull(); - var values = captured!.Patch.Operations.Single().Value.Should().BeOfType().Subject; - values.Should().AllSatisfy(value => value.IsNormalized(NormalizationForm.FormD).Should().BeTrue()); + captured.Should().BeSameAs(update); // pass-through, not rebuilt + var ops = captured.Patch.Operations; + ops.Should().HaveCount(2); + ops.Single(o => o.Path == "/Name").Value.Should().Be(NfcTestData.Nfc); + var exemplars = ops.Single(o => o.Path == "/Exemplars").Value.Should().BeOfType().Subject; + exemplars.Should().Equal(NfcTestData.Nfc, NfcTestData.Nfc); } private static void AssertPatchValuesNfd(UpdateObjectInput update) where T : class @@ -901,14 +961,29 @@ public static class NormalizationAssert ]; /// - /// Properties that should NOT be normalized (metadata, not user text) + /// Properties not normalized by the write wrapper, scoped per type. Includes both metadata + /// (e.g. SemanticDomain.Code, RichSpan.Ws) and fields liblcm itself doesn't NFD-normalize + /// (WritingSystem fields are LDML-managed plain strings; MorphType leading/trailing tokens are + /// punctuation markers, not linguistic text). /// - private static readonly HashSet ExcludedProperties = - [ - "Code", // SemanticDomain.Code is metadata - "WsId", // WritingSystemId is metadata - "Ws" // RichSpan.Ws is a writing system ID - ]; + private static readonly Dictionary> NotNormalizedPerType = new() + { + [typeof(WritingSystem)] = + [ + nameof(WritingSystem.WsId), + nameof(WritingSystem.Name), + nameof(WritingSystem.Abbreviation), + nameof(WritingSystem.Font), + nameof(WritingSystem.Exemplars), + ], + [typeof(MorphTypeData)] = + [ + nameof(MorphTypeData.LeadingToken), + nameof(MorphTypeData.TrailingToken), + ], + [typeof(SemanticDomain)] = [nameof(SemanticDomain.Code)], + [typeof(RichSpan)] = [nameof(RichSpan.Ws)], + }; /// /// Asserts that all normalizable properties in the object contain NFC strings. @@ -1020,10 +1095,11 @@ private static List FindNormalizationIssues(object obj, bool expectNfc, // Walk object properties var properties = type.GetProperties(BindingFlags.Public | BindingFlags.Instance); + var perTypeSkip = NotNormalizedPerType.GetValueOrDefault(type); foreach (var prop in properties) { if (!prop.CanRead) continue; - if (ExcludedProperties.Contains(prop.Name)) continue; + if (perTypeSkip is not null && perTypeSkip.Contains(prop.Name)) continue; var value = prop.GetValue(obj); if (value == null) continue; diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs index ba5f3c8b43..74e3acb071 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs @@ -22,11 +22,16 @@ public IMiniLcmApi Create(IMiniLcmApi api) } /// -/// Normalizes all user-entered text to NFD on write operations. +/// Normalizes user-entered linguistic text to NFD on write operations. /// /// Design notes: /// - Read operations are forwarded automatically via BeaKona.AutoInterface. /// - Write operations are manually implemented here (compile-time enforced by IMiniLcmApi). +/// - Scope mirrors liblcm: TsString-equivalents (MultiString, RichString, RichMultiString) are normalized +/// on every write, matching liblcm's generated property setters which call TsStringUtils.NormalizeNfd. +/// - Plain-string properties that liblcm does not NFD-normalize are passed through unchanged. This currently +/// covers WritingSystem.{Name, Abbreviation, Font, Exemplars} (LDML-managed) and +/// MorphTypeData.{LeadingToken, TrailingToken} (punctuation markers). /// - JsonPatch overloads normalize string-ish values best-effort (string, RichString, MultiString, RichMultiString). /// JsonElement values are only normalized when they are simple strings; complex JSON values are left as-is /// to avoid guessing the target type. @@ -47,20 +52,22 @@ public partial class MiniLcmApiWriteNormalizationWrapper(IMiniLcmApi api) : IMin #region WritingSystem - public async Task CreateWritingSystem(WritingSystem writingSystem, BetweenPosition? between = null) + // WritingSystem fields (Name, Abbreviation, Font, Exemplars) are plain strings; in liblcm they are + // LDML-managed by WritingSystemManager rather than stored as TsString, so they aren't NFD-normalized. + public Task CreateWritingSystem(WritingSystem writingSystem, BetweenPosition? between = null) { - return await _api.CreateWritingSystem(NormalizeWritingSystem(writingSystem), between); + return _api.CreateWritingSystem(writingSystem, between); } public Task UpdateWritingSystem(WritingSystemId id, WritingSystemType type, UpdateObjectInput update) { - return _api.UpdateWritingSystem(id, type, NormalizePatch(update)); + return _api.UpdateWritingSystem(id, type, update); } - public async Task UpdateWritingSystem(WritingSystem before, WritingSystem after, IMiniLcmApi? api = null) + public Task UpdateWritingSystem(WritingSystem before, WritingSystem after, IMiniLcmApi? api = null) { - return await _api.UpdateWritingSystem(before, NormalizeWritingSystem(after), api); + return _api.UpdateWritingSystem(before, after, api); } public Task MoveWritingSystem(WritingSystemId id, WritingSystemType type, BetweenPosition between) @@ -68,17 +75,6 @@ public Task MoveWritingSystem(WritingSystemId id, WritingSystemType type, Betwee return _api.MoveWritingSystem(id, type, between); } - private static WritingSystem NormalizeWritingSystem(WritingSystem ws) - { - return ws with - { - Name = StringNormalizer.Normalize(ws.Name), - Abbreviation = StringNormalizer.Normalize(ws.Abbreviation), - Font = StringNormalizer.Normalize(ws.Font), - Exemplars = StringNormalizer.Normalize(ws.Exemplars) - }; - } - #endregion #region PartOfSpeech @@ -231,7 +227,7 @@ public async Task CreateMorphTypeData(MorphTypeData morphType) public Task UpdateMorphTypeData(Guid id, UpdateObjectInput update) { - return _api.UpdateMorphTypeData(id, NormalizePatch(update)); + return _api.UpdateMorphTypeData(id, NormalizePatch(update, MorphTypeDataPlainStringPaths)); } @@ -245,6 +241,9 @@ public Task DeleteMorphTypeData(Guid id) return _api.DeleteMorphTypeData(id); } + // LeadingToken/TrailingToken are punctuation markers (e.g. "-", "="), not user-entered linguistic text. + private static readonly HashSet MorphTypeDataPlainStringPaths = ["/LeadingToken", "/TrailingToken"]; + private static MorphTypeData NormalizeMorphTypeData(MorphTypeData mtd) { return new MorphTypeData @@ -254,8 +253,8 @@ private static MorphTypeData NormalizeMorphTypeData(MorphTypeData mtd) Name = StringNormalizer.Normalize(mtd.Name), Abbreviation = StringNormalizer.Normalize(mtd.Abbreviation), Description = StringNormalizer.Normalize(mtd.Description), - LeadingToken = StringNormalizer.Normalize(mtd.LeadingToken), - TrailingToken = StringNormalizer.Normalize(mtd.TrailingToken), + LeadingToken = mtd.LeadingToken, + TrailingToken = mtd.TrailingToken, SecondaryOrder = mtd.SecondaryOrder, DeletedAt = mtd.DeletedAt }; @@ -538,14 +537,15 @@ public Task SaveFile(Stream stream, LcmFileMetadata metadata #region Patch Normalization - private static UpdateObjectInput NormalizePatch(UpdateObjectInput update) where T : class + private static UpdateObjectInput NormalizePatch(UpdateObjectInput update, HashSet? skipPaths = null) where T : class { if (update.Patch.Operations.Count == 0) return update; var normalizedPatch = new SystemTextJsonPatch.JsonPatchDocument(); foreach (var op in update.Patch.Operations) { - var normalizedValue = NormalizePatchValue(op.Value); + var skip = skipPaths is not null && op.Path is not null && skipPaths.Contains(op.Path); + var normalizedValue = skip ? op.Value : NormalizePatchValue(op.Value); var normalizedOp = new SystemTextJsonPatch.Operations.Operation { Op = op.Op, From c087e3b6803cf30f96782af9f778e8679f91e928 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 7 May 2026 15:51:46 +0200 Subject: [PATCH 06/12] Refactor --- .../MiniLcm.Tests/Helpers/NfcTestData.cs | 208 ++++ .../Helpers/NormalizationAssert.cs | 237 +++++ .../MiniLcm.Tests/WriteNormalizationTests.cs | 893 +++++------------- 3 files changed, 693 insertions(+), 645 deletions(-) create mode 100644 backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs create mode 100644 backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs new file mode 100644 index 0000000000..3a55322106 --- /dev/null +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs @@ -0,0 +1,208 @@ +namespace MiniLcm.Tests.Helpers; + +/// +/// Provides test data with NFC-normalized strings for all entity types. +/// Each Create method returns an object with ALL normalizable properties populated with NFC strings. +/// +public static class NfcTestData +{ + /// + /// NFC string: "naïve" with U+00EF LATIN SMALL LETTER I WITH DIAERESIS (composed form) + /// + public const string Nfc = "na\u00efve"; + + /// + /// NFD string: "naïve" with U+0069 LATIN SMALL LETTER I + U+0308 COMBINING DIAERESIS (decomposed form) + /// + public const string Nfd = "na\u0069\u0308ve"; + + public static MultiString CreateNfcMultiString() + { + return new() { Values = { { "en", Nfc }, { "fr", Nfc } } }; + } + + public static RichString CreateNfcRichString() + { + return new([ + new RichSpan { Text = Nfc, Ws = "en" }, + new RichSpan { Text = Nfc, Ws = "en", Bold = RichTextToggle.On } + ]); + } + + public static RichMultiString CreateNfcRichMultiString() + { + return new() + { + { "en", CreateNfcRichString() }, + { "fr", CreateNfcRichString() } + }; + } + + public static WritingSystem CreateNfcWritingSystem() + { + return new() + { + Id = Guid.NewGuid(), + WsId = "en", + Type = WritingSystemType.Analysis, + Name = Nfc, + Abbreviation = Nfc, + Font = Nfc, + Exemplars = [Nfc, Nfc] + }; + } + + public static PartOfSpeech CreateNfcPartOfSpeech() + { + return new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString() + }; + } + + public static Publication CreateNfcPublication() + { + return new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString() + }; + } + + public static SemanticDomain CreateNfcSemanticDomain() + { + return new() + { + Id = Guid.NewGuid(), + Code = "1.1.1", // Code is NOT normalized (metadata) + Name = CreateNfcMultiString() + }; + } + + public static ComplexFormType CreateNfcComplexFormType() + { + return new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString() + }; + } + + public static MorphTypeData CreateNfcMorphTypeData() + { + return new() + { + Id = Guid.NewGuid(), + Name = CreateNfcMultiString(), + Abbreviation = CreateNfcMultiString(), + Description = CreateNfcRichMultiString(), + LeadingToken = Nfc, + TrailingToken = Nfc + }; + } + + public static Translation CreateNfcTranslation() + { + return new() + { + Id = Guid.NewGuid(), + Text = CreateNfcRichMultiString() + }; + } + + public static ExampleSentence CreateNfcExampleSentence() + { + return new() + { + Id = Guid.NewGuid(), + Sentence = CreateNfcRichMultiString(), + Reference = CreateNfcRichString() + }; + } + + public static ExampleSentence CreateNfcExampleSentenceWithTranslations() + { + return new() + { + Id = Guid.NewGuid(), + Sentence = CreateNfcRichMultiString(), + Reference = CreateNfcRichString(), + Translations = [CreateNfcTranslation(), CreateNfcTranslation()] + }; + } + + public static Sense CreateNfcSense() + { + return new() + { + Id = Guid.NewGuid(), + Gloss = CreateNfcMultiString(), + Definition = CreateNfcRichMultiString() + }; + } + + public static Sense CreateNfcSenseWithExamples() + { + return new() + { + Id = Guid.NewGuid(), + Gloss = CreateNfcMultiString(), + Definition = CreateNfcRichMultiString(), + SemanticDomains = [CreateNfcSemanticDomain()], + PartOfSpeech = CreateNfcPartOfSpeech(), + ExampleSentences = [CreateNfcExampleSentenceWithTranslations()] + }; + } + + public static ComplexFormComponent CreateNfcComplexFormComponent() + { + return new() + { + Id = Guid.NewGuid(), + ComplexFormEntryId = Guid.NewGuid(), + ComponentEntryId = Guid.NewGuid(), + ComplexFormHeadword = Nfc, + ComponentHeadword = Nfc + }; + } + + public static Entry CreateNfcEntry() + { + return new() + { + Id = Guid.NewGuid(), + LexemeForm = CreateNfcMultiString(), + CitationForm = CreateNfcMultiString(), + LiteralMeaning = CreateNfcRichMultiString(), + Note = CreateNfcRichMultiString() + }; + } + + public static Entry CreateNfcEntryWithSenses() + { + return new() + { + Id = Guid.NewGuid(), + LexemeForm = CreateNfcMultiString(), + CitationForm = CreateNfcMultiString(), + LiteralMeaning = CreateNfcRichMultiString(), + Note = CreateNfcRichMultiString(), + Senses = [CreateNfcSenseWithExamples()] + }; + } + + public static Entry CreateNfcEntryWithComponents() + { + return new() + { + Id = Guid.NewGuid(), + LexemeForm = CreateNfcMultiString(), + CitationForm = CreateNfcMultiString(), + LiteralMeaning = CreateNfcRichMultiString(), + Note = CreateNfcRichMultiString(), + Components = [CreateNfcComplexFormComponent()], + ComplexForms = [CreateNfcComplexFormComponent()] + }; + } +} diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs new file mode 100644 index 0000000000..be0a7974a7 --- /dev/null +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -0,0 +1,237 @@ +using System.Collections; +using System.Reflection; +using System.Text; + +namespace MiniLcm.Tests.Helpers; + +/// +/// Assertion helpers for verifying NFC/NFD normalization state of objects. +/// Uses reflection to walk the object graph and check all normalizable properties. +/// On failure, error messages include the full property path (e.g., Senses[0].Gloss.Values[en]). +/// +public static class NormalizationAssert +{ + private static readonly HashSet NormalizableTypes = + [ + typeof(string), + typeof(string[]), + typeof(MultiString), + typeof(RichString), + typeof(RichMultiString) + ]; + + /// + /// Properties not normalized by the write wrapper, scoped per type. Includes both metadata + /// (e.g. SemanticDomain.Code, RichSpan.Ws) and fields liblcm itself doesn't NFD-normalize + /// (WritingSystem fields are LDML-managed plain strings; MorphType leading/trailing tokens are + /// punctuation markers, not linguistic text). + /// + private static readonly Dictionary> NotNormalizedPerType = new() + { + [typeof(WritingSystem)] = + [ + nameof(WritingSystem.WsId), + nameof(WritingSystem.Name), + nameof(WritingSystem.Abbreviation), + nameof(WritingSystem.Font), + nameof(WritingSystem.Exemplars), + ], + [typeof(MorphTypeData)] = + [ + nameof(MorphTypeData.LeadingToken), + nameof(MorphTypeData.TrailingToken), + ], + [typeof(SemanticDomain)] = [nameof(SemanticDomain.Code)], + [typeof(RichSpan)] = [nameof(RichSpan.Ws)], + }; + + /// + /// Asserts that all normalizable properties in the object contain NFC strings. + /// Throws if any property is null, empty, or contains NFD strings. + /// + public static void AssertAllNfc(object obj) + { + AssertAll(obj, expectNfc: true); + } + + /// + /// Asserts that all normalizable properties in the object contain NFD strings. + /// Throws if any property contains NFC strings. + /// + public static void AssertAllNfd(object obj) + { + AssertAll(obj, expectNfc: false); + } + + /// + /// Returns true if all normalizable properties contain NFD strings. + /// + public static bool IsAllNfd(object obj) + { + return FindNormalizationIssues(obj, expectNfc: false).Count == 0; + } + + private static void AssertAll(object obj, bool expectNfc) + { + var issues = FindNormalizationIssues(obj, expectNfc); + if (issues.Count == 0) return; + var form = expectNfc ? "NFC" : "NFD"; + throw new Xunit.Sdk.XunitException( + $"Expected all normalizable properties to contain {form} strings, but found issues:\n" + + string.Join("\n", issues.Select(i => $" - {i}")) + ); + } + + private static List FindNormalizationIssues(object obj, bool expectNfc, string path = "") + { + var issues = new List(); + if (obj == null) return issues; + + if (obj is string str) + { + CheckString(str, path, expectNfc, issues); + return issues; + } + + if (obj is string[] strArray) + { + for (var i = 0; i < strArray.Length; i++) + { + CheckString(strArray[i], $"{path}[{i}]", expectNfc, issues); + } + return issues; + } + + if (obj is MultiString ms) + { + if (ms.Values.Count == 0) + { + issues.Add($"{path}: MultiString has no values (must have at least one for testing)"); + } + foreach (var (key, value) in ms.Values) + { + CheckString(value, $"{path}.Values[{key}]", expectNfc, issues); + } + return issues; + } + + if (obj is RichString rs) + { + if (rs.Spans.Count == 0) + { + issues.Add($"{path}: RichString has no spans (must have at least one for testing)"); + } + for (var i = 0; i < rs.Spans.Count; i++) + { + CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", expectNfc, issues); + } + return issues; + } + + if (obj is RichMultiString rms) + { + if (rms.Count == 0) + { + issues.Add($"{path}: RichMultiString has no values (must have at least one for testing)"); + } + foreach (var (key, value) in rms) + { + issues.AddRange(FindNormalizationIssues(value, expectNfc, $"{path}[{key}]")); + } + return issues; + } + + // Top-level collection of items (e.g. a captured List) — walk each item. + // Property-level collections are handled below in the property loop, where IsModelType filtering applies. + if (obj is IEnumerable topLevelCollection) + { + var index = 0; + foreach (var item in topLevelCollection) + { + if (item != null) issues.AddRange(FindNormalizationIssues(item, expectNfc, $"{path}[{index}]")); + index++; + } + return issues; + } + + var type = obj.GetType(); + var perTypeSkip = NotNormalizedPerType.GetValueOrDefault(type); + foreach (var prop in type.GetProperties(BindingFlags.Public | BindingFlags.Instance)) + { + if (!prop.CanRead) continue; + if (perTypeSkip is not null && perTypeSkip.Contains(prop.Name)) continue; + + var value = prop.GetValue(obj); + if (value == null) continue; + + var propPath = string.IsNullOrEmpty(path) ? prop.Name : $"{path}.{prop.Name}"; + var propType = prop.PropertyType; + + if (NormalizableTypes.Contains(propType) || + propType == typeof(string[]) || + (propType.IsGenericType && propType.GetGenericTypeDefinition() == typeof(Nullable<>))) + { + issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); + } + else if (propType.IsPrimitive || propType.IsEnum || propType == typeof(Guid) || propType == typeof(DateTime) || propType == typeof(DateTimeOffset) || propType == typeof(decimal)) + { + continue; + } + else if (value is IEnumerable enumerable) + { + var index = 0; + foreach (var item in enumerable) + { + if (item != null && IsModelType(item.GetType())) + { + issues.AddRange(FindNormalizationIssues(item, expectNfc, $"{propPath}[{index}]")); + } + index++; + } + } + else if (IsModelType(propType)) + { + issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); + } + else + { + throw new Xunit.Sdk.XunitException($"Unexpected property type: {propType.FullName} at {propPath}"); + } + } + + return issues; + } + + private static void CheckString(string? value, string path, bool expectNfc, List issues) + { + if (string.IsNullOrEmpty(value)) + { + issues.Add($"{path}: string is null or empty (must have a value for testing)"); + return; + } + + var expected = expectNfc ? NormalizationForm.FormC : NormalizationForm.FormD; + if (!value.IsNormalized(expected)) + { + var formName = expectNfc ? "NFC" : "NFD"; + issues.Add($"{path}: expected {formName} but \"{value}\" is not {formName}-normalized"); + } + } + + private static bool IsModelType(Type type) + { + return + type.Namespace?.StartsWith("MiniLcm.Models") == true || + type == typeof(Entry) || + type == typeof(Sense) || + type == typeof(ExampleSentence) || + type == typeof(Translation) || + type == typeof(WritingSystem) || + type == typeof(PartOfSpeech) || + type == typeof(SemanticDomain) || + type == typeof(ComplexFormType) || + type == typeof(MorphTypeData) || + type == typeof(Publication) || + type == typeof(ComplexFormComponent); + } +} diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index 9d52b3803d..ae6919dc7e 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -1,18 +1,18 @@ -using System.Collections; -using System.Reflection; -using System.Text; using System.Text.Json; using MiniLcm.Normalization; +using MiniLcm.SyncHelpers; using Moq; using SystemTextJsonPatch.Operations; +using MiniLcm.Tests.Helpers; +using static MiniLcm.Tests.Helpers.NormalizationAssert; namespace MiniLcm.Tests; -#pragma warning disable IDE0022 // Use block body for method - /// /// Tests for the MiniLcmApiWriteNormalizationWrapper. /// These tests verify that all user-entered text is normalized to NFD on write operations. +/// Each test captures the value passed to the underlying API and asserts via +/// , which reports the failing property path on mismatch. /// public class WriteNormalizationTests { @@ -26,21 +26,6 @@ public WriteNormalizationTests() _normalizingApi = factory.Create(_mockApi); } - - private static void AssertNfc(object obj) => NormalizationAssert.AssertAllNfc(obj); - private static void AssertNfd(object obj) => NormalizationAssert.AssertAllNfd(obj); - private static bool IsNfd(object obj) => NormalizationAssert.IsAllNfd(obj); - - private static void AssertAllNfc(IEnumerable values) - { - foreach (var value in values) AssertNfc(value!); - } - - private static void AssertAllNfd(IEnumerable values) - { - foreach (var value in values) AssertNfd(value!); - } - #region WritingSystem Tests // WritingSystem.{Name, Abbreviation, Font, Exemplars} are plain strings; in liblcm they are @@ -51,12 +36,11 @@ private static void AssertAllNfd(IEnumerable values) public async Task CreateWritingSystem_DoesNotNormalize() { var ws = NfcTestData.CreateNfcWritingSystem(); - AssertNfc(ws); WritingSystem? captured = null; Mock.Get(_mockApi) - .Setup(api => api.CreateWritingSystem(It.IsAny(), It.IsAny?>())) - .Callback?>((w, _) => captured = w) + .Setup(api => api.CreateWritingSystem(It.IsAny(), It.IsAny?>())) + .Callback?>((w, _) => captured = w) .ReturnsAsync(ws); await _normalizingApi.CreateWritingSystem(ws); @@ -73,7 +57,6 @@ public async Task UpdateWritingSystem_BeforeAfter_DoesNotNormalize() { var before = NfcTestData.CreateNfcWritingSystem(); var after = NfcTestData.CreateNfcWritingSystem(); - AssertNfc(after); WritingSystem? captured = null; Mock.Get(_mockApi) @@ -98,13 +81,17 @@ public async Task UpdateWritingSystem_BeforeAfter_DoesNotNormalize() public async Task CreatePartOfSpeech_NormalizesToNfd() { var pos = NfcTestData.CreateNfcPartOfSpeech(); - AssertNfc(pos); + + PartOfSpeech? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreatePartOfSpeech(It.IsAny())) + .Callback(p => captured = p) + .ReturnsAsync(pos); await _normalizingApi.CreatePartOfSpeech(pos); - Mock.Get(_mockApi).Verify(api => api.CreatePartOfSpeech( - It.Is(p => IsNfd(p)) - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] @@ -112,15 +99,17 @@ public async Task UpdatePartOfSpeech_BeforeAfter_NormalizesToNfd() { var before = NfcTestData.CreateNfcPartOfSpeech(); var after = NfcTestData.CreateNfcPartOfSpeech(); - AssertNfc(after); + + PartOfSpeech? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdatePartOfSpeech(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) + .ReturnsAsync(after); await _normalizingApi.UpdatePartOfSpeech(before, after); - Mock.Get(_mockApi).Verify(api => api.UpdatePartOfSpeech( - It.IsAny(), - It.Is(p => IsNfd(p)), - It.IsAny() - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } #endregion @@ -131,13 +120,17 @@ public async Task UpdatePartOfSpeech_BeforeAfter_NormalizesToNfd() public async Task CreatePublication_NormalizesToNfd() { var pub = NfcTestData.CreateNfcPublication(); - AssertNfc(pub); + + Publication? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreatePublication(It.IsAny())) + .Callback(p => captured = p) + .ReturnsAsync(pub); await _normalizingApi.CreatePublication(pub); - Mock.Get(_mockApi).Verify(api => api.CreatePublication( - It.Is(p => IsNfd(p)) - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] @@ -145,15 +138,17 @@ public async Task UpdatePublication_BeforeAfter_NormalizesToNfd() { var before = NfcTestData.CreateNfcPublication(); var after = NfcTestData.CreateNfcPublication(); - AssertNfc(after); + + Publication? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdatePublication(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) + .ReturnsAsync(after); await _normalizingApi.UpdatePublication(before, after); - Mock.Get(_mockApi).Verify(api => api.UpdatePublication( - It.IsAny(), - It.Is(p => IsNfd(p)), - It.IsAny() - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } #endregion @@ -164,13 +159,17 @@ public async Task UpdatePublication_BeforeAfter_NormalizesToNfd() public async Task CreateSemanticDomain_NormalizesToNfd() { var sd = NfcTestData.CreateNfcSemanticDomain(); - AssertNfc(sd); + + SemanticDomain? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateSemanticDomain(It.IsAny())) + .Callback(s => captured = s) + .ReturnsAsync(sd); await _normalizingApi.CreateSemanticDomain(sd); - Mock.Get(_mockApi).Verify(api => api.CreateSemanticDomain( - It.Is(s => IsNfd(s)) - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] @@ -178,49 +177,53 @@ public async Task UpdateSemanticDomain_BeforeAfter_NormalizesToNfd() { var before = NfcTestData.CreateNfcSemanticDomain(); var after = NfcTestData.CreateNfcSemanticDomain(); - AssertNfc(after); + + SemanticDomain? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateSemanticDomain(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) + .ReturnsAsync(after); await _normalizingApi.UpdateSemanticDomain(before, after); - Mock.Get(_mockApi).Verify(api => api.UpdateSemanticDomain( - It.IsAny(), - It.Is(s => IsNfd(s)), - It.IsAny() - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] public async Task AddSemanticDomainToSense_NormalizesToNfd() { var sd = NfcTestData.CreateNfcSemanticDomain(); - AssertNfc(sd); + + SemanticDomain? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.AddSemanticDomainToSense(It.IsAny(), It.IsAny())) + .Callback((_, s) => captured = s) + .Returns(Task.CompletedTask); await _normalizingApi.AddSemanticDomainToSense(Guid.NewGuid(), sd); - Mock.Get(_mockApi).Verify(api => api.AddSemanticDomainToSense( - It.IsAny(), - It.Is(s => IsNfd(s)) - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] public async Task BulkImportSemanticDomains_NormalizesToNfd() { var domains = new[] { NfcTestData.CreateNfcSemanticDomain(), NfcTestData.CreateNfcSemanticDomain() }; - AssertAllNfc(domains); - var capturedDomains = new List(); + var captured = new List(); Mock.Get(_mockApi) .Setup(api => api.BulkImportSemanticDomains(It.IsAny>())) .Returns(async (IAsyncEnumerable stream) => { - await foreach (var sd in stream) capturedDomains.Add(sd); + await foreach (var sd in stream) captured.Add(sd); }); await _normalizingApi.BulkImportSemanticDomains(domains.ToAsyncEnumerable()); - capturedDomains.Should().HaveCount(2); - AssertAllNfd(capturedDomains); + captured.Should().HaveCount(2); + AssertAllNfd(captured); } #endregion @@ -231,13 +234,17 @@ public async Task BulkImportSemanticDomains_NormalizesToNfd() public async Task CreateComplexFormType_NormalizesToNfd() { var cft = NfcTestData.CreateNfcComplexFormType(); - AssertNfc(cft); + + ComplexFormType? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateComplexFormType(It.IsAny())) + .Callback(c => captured = c) + .ReturnsAsync(cft); await _normalizingApi.CreateComplexFormType(cft); - Mock.Get(_mockApi).Verify(api => api.CreateComplexFormType( - It.Is(c => IsNfd(c)) - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] @@ -245,15 +252,17 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() { var before = NfcTestData.CreateNfcComplexFormType(); var after = NfcTestData.CreateNfcComplexFormType(); - AssertNfc(after); + + ComplexFormType? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateComplexFormType(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) + .ReturnsAsync(after); await _normalizingApi.UpdateComplexFormType(before, after); - Mock.Get(_mockApi).Verify(api => api.UpdateComplexFormType( - It.IsAny(), - It.Is(c => IsNfd(c)), - It.IsAny() - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } #endregion @@ -267,7 +276,6 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() public async Task CreateMorphTypeData_NormalizesMultiStringsToNfd_AndPassesTokensThrough() { var mtd = NfcTestData.CreateNfcMorphTypeData(); - AssertNfc(mtd); MorphTypeData? captured = null; Mock.Get(_mockApi) @@ -278,7 +286,7 @@ public async Task CreateMorphTypeData_NormalizesMultiStringsToNfd_AndPassesToken await _normalizingApi.CreateMorphTypeData(mtd); captured.Should().NotBeNull(); - IsNfd(captured).Should().BeTrue(); + AssertAllNfd(captured); captured.LeadingToken.Should().Be(NfcTestData.Nfc); captured.TrailingToken.Should().Be(NfcTestData.Nfc); } @@ -288,7 +296,6 @@ public async Task UpdateMorphTypeData_BeforeAfter_NormalizesMultiStringsToNfd_An { var before = NfcTestData.CreateNfcMorphTypeData(); var after = NfcTestData.CreateNfcMorphTypeData(); - AssertNfc(after); MorphTypeData? captured = null; Mock.Get(_mockApi) @@ -299,7 +306,7 @@ public async Task UpdateMorphTypeData_BeforeAfter_NormalizesMultiStringsToNfd_An await _normalizingApi.UpdateMorphTypeData(before, after); captured.Should().NotBeNull(); - IsNfd(captured).Should().BeTrue(); + AssertAllNfd(captured); captured.LeadingToken.Should().Be(NfcTestData.Nfc); captured.TrailingToken.Should().Be(NfcTestData.Nfc); } @@ -321,11 +328,10 @@ public async Task UpdateMorphTypeData_JsonPatch_NormalizesMultiString_AndPassesT await _normalizingApi.UpdateMorphTypeData(Guid.NewGuid(), update); captured.Should().NotBeNull(); - var ops = captured.Patch.Operations; - var nameValue = ops.Single(o => o.Path == "/Name").Value.Should().BeOfType().Subject; - NormalizationAssert.AssertAllNfd(nameValue); - ops.Single(o => o.Path == "/LeadingToken").Value.Should().Be(NfcTestData.Nfc); - ops.Single(o => o.Path == "/TrailingToken").Value.Should().Be(NfcTestData.Nfc); + var byPath = captured.Patch.Operations.ToDictionary(o => o.Path!, o => o.Value); + AssertAllNfd(byPath["/Name"].Should().BeOfType().Subject); + byPath["/LeadingToken"].Should().Be(NfcTestData.Nfc); + byPath["/TrailingToken"].Should().Be(NfcTestData.Nfc); } #endregion @@ -336,14 +342,17 @@ public async Task UpdateMorphTypeData_JsonPatch_NormalizesMultiString_AndPassesT public async Task CreateEntry_NormalizesToNfd() { var entry = NfcTestData.CreateNfcEntry(); - AssertNfc(entry); + + Entry? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateEntry(It.IsAny(), It.IsAny())) + .Callback((e, _) => captured = e) + .ReturnsAsync(entry); await _normalizingApi.CreateEntry(entry); - Mock.Get(_mockApi).Verify(api => api.CreateEntry( - It.Is(e => IsNfd(e)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] @@ -351,63 +360,70 @@ public async Task UpdateEntry_BeforeAfter_NormalizesToNfd() { var before = NfcTestData.CreateNfcEntry(); var after = NfcTestData.CreateNfcEntry(); - AssertNfc(after); + + Entry? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateEntry(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) + .ReturnsAsync(after); await _normalizingApi.UpdateEntry(before, after); - Mock.Get(_mockApi).Verify(api => api.UpdateEntry( - It.IsAny(), - It.Is(e => IsNfd(e)), - It.IsAny() - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] public async Task CreateEntry_WithNestedSenses_NormalizesToNfd() { var entry = NfcTestData.CreateNfcEntryWithSenses(); - AssertNfc(entry); + + Entry? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateEntry(It.IsAny(), It.IsAny())) + .Callback((e, _) => captured = e) + .ReturnsAsync(entry); await _normalizingApi.CreateEntry(entry); - Mock.Get(_mockApi).Verify(api => api.CreateEntry( - It.Is(e => IsNfd(e)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] public async Task CreateEntry_WithComplexFormComponents_NormalizesToNfd() { var entry = NfcTestData.CreateNfcEntryWithComponents(); - AssertNfc(entry); + + Entry? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateEntry(It.IsAny(), It.IsAny())) + .Callback((e, _) => captured = e) + .ReturnsAsync(entry); await _normalizingApi.CreateEntry(entry); - Mock.Get(_mockApi).Verify(api => api.CreateEntry( - It.Is(e => IsNfd(e)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] public async Task BulkCreateEntries_NormalizesToNfd() { var entries = new[] { NfcTestData.CreateNfcEntry(), NfcTestData.CreateNfcEntryWithSenses() }; - AssertAllNfc(entries); - var capturedEntries = new List(); + var captured = new List(); Mock.Get(_mockApi) .Setup(api => api.BulkCreateEntries(It.IsAny>())) .Returns(async (IAsyncEnumerable stream) => { - await foreach (var e in stream) capturedEntries.Add(e); + await foreach (var e in stream) captured.Add(e); }); await _normalizingApi.BulkCreateEntries(entries.ToAsyncEnumerable()); - capturedEntries.Should().HaveCount(2); - AssertAllNfd(capturedEntries); + captured.Should().HaveCount(2); + AssertAllNfd(captured); } #endregion @@ -432,8 +448,8 @@ public async Task UpdateEntry_JsonPatch_NormalizesValuesToNfd() await _normalizingApi.UpdateEntry(Guid.NewGuid(), update); captured.Should().NotBeNull(); - captured.Should().NotBeSameAs(update); - AssertPatchValuesNfd(captured); + captured.Should().NotBeSameAs(update); // rebuilt because Entry path normalizes + AssertAllPatchValuesNfd(captured); } [Fact] @@ -452,9 +468,9 @@ public async Task UpdateEntry_JsonPatch_NormalizesJsonElementString() await _normalizingApi.UpdateEntry(Guid.NewGuid(), update); captured.Should().NotBeNull(); - var value = captured.Patch.Operations.Single().Value; - value.Should().BeOfType(); - ((string)value).Should().Be(NfcTestData.Nfd); + captured.Patch.Operations.Single().Value + .Should().BeOfType() + .Which.Should().Be(NfcTestData.Nfd); } [Fact] @@ -475,50 +491,24 @@ public async Task UpdateWritingSystem_JsonPatch_DoesNotNormalize() await _normalizingApi.UpdateWritingSystem("en", WritingSystemType.Analysis, update); captured.Should().BeSameAs(update); // pass-through, not rebuilt - var ops = captured.Patch.Operations; - ops.Should().HaveCount(2); - ops.Single(o => o.Path == "/Name").Value.Should().Be(NfcTestData.Nfc); - var exemplars = ops.Single(o => o.Path == "/Exemplars").Value.Should().BeOfType().Subject; - exemplars.Should().Equal(NfcTestData.Nfc, NfcTestData.Nfc); + var byPath = captured.Patch.Operations.ToDictionary(o => o.Path!, o => o.Value); + byPath.Should().HaveCount(2); + byPath["/Name"].Should().Be(NfcTestData.Nfc); + byPath["/Exemplars"].Should().BeOfType() + .Which.Should().Equal(NfcTestData.Nfc, NfcTestData.Nfc); } - private static void AssertPatchValuesNfd(UpdateObjectInput update) where T : class + /// + /// Asserts every non-null operation value in the patch is NFD. Delegates to + /// , which already understands string, + /// string[], MultiString, RichString, and RichMultiString — so failures report a property path. + /// + private static void AssertAllPatchValuesNfd(UpdateObjectInput update) where T : class { update.Patch.Operations.Should().NotBeEmpty(); foreach (var op in update.Patch.Operations) { - AssertPatchValueNfd(op.Value); - } - } - - private static void AssertPatchValueNfd(object? value) - { - switch (value) - { - case null: - return; - case string s: - s.IsNormalized(NormalizationForm.FormD).Should().BeTrue(); - return; - case string[] strings: - strings.Should().AllSatisfy(str => str.IsNormalized(NormalizationForm.FormD).Should().BeTrue()); - return; - case MultiString ms: - NormalizationAssert.AssertAllNfd(ms); - return; - case RichString rs: - NormalizationAssert.AssertAllNfd(rs); - return; - case RichMultiString rms: - NormalizationAssert.AssertAllNfd(rms); - return; - case JsonElement jsonElement when jsonElement.ValueKind == JsonValueKind.String: - var jsonText = jsonElement.GetString(); - jsonText.Should().NotBeNull(); - jsonText.IsNormalized(NormalizationForm.FormD).Should().BeTrue(); - return; - default: - throw new Xunit.Sdk.XunitException($"Unexpected patch value type: {value.GetType().FullName}"); + if (op.Value is not null) AssertAllNfd(op.Value); } } @@ -530,14 +520,18 @@ private static void AssertPatchValueNfd(object? value) public async Task CreateComplexFormComponent_NormalizesToNfd() { var cfc = NfcTestData.CreateNfcComplexFormComponent(); - AssertNfc(cfc); + + ComplexFormComponent? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateComplexFormComponent( + It.IsAny(), It.IsAny?>())) + .Callback?>((c, _) => captured = c) + .ReturnsAsync(cfc); await _normalizingApi.CreateComplexFormComponent(cfc); - Mock.Get(_mockApi).Verify(api => api.CreateComplexFormComponent( - It.Is(c => IsNfd(c)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } #endregion @@ -548,15 +542,17 @@ public async Task CreateComplexFormComponent_NormalizesToNfd() public async Task CreateSense_NormalizesToNfd() { var sense = NfcTestData.CreateNfcSense(); - AssertNfc(sense); + + Sense? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateSense(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, s, _) => captured = s) + .ReturnsAsync(sense); await _normalizingApi.CreateSense(Guid.NewGuid(), sense); - Mock.Get(_mockApi).Verify(api => api.CreateSense( - It.IsAny(), - It.Is(s => IsNfd(s)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] @@ -565,31 +561,34 @@ public async Task UpdateSense_BeforeAfter_NormalizesToNfd() var entryId = Guid.NewGuid(); var before = NfcTestData.CreateNfcSense(); var after = NfcTestData.CreateNfcSense(); - AssertNfc(after); + + Sense? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateSense(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, _, a, _) => captured = a) + .ReturnsAsync(after); await _normalizingApi.UpdateSense(entryId, before, after); - Mock.Get(_mockApi).Verify(api => api.UpdateSense( - entryId, - It.IsAny(), - It.Is(s => IsNfd(s)), - It.IsAny() - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] public async Task CreateSense_WithNestedExampleSentences_NormalizesToNfd() { var sense = NfcTestData.CreateNfcSenseWithExamples(); - AssertNfc(sense); + + Sense? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateSense(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, s, _) => captured = s) + .ReturnsAsync(sense); await _normalizingApi.CreateSense(Guid.NewGuid(), sense); - Mock.Get(_mockApi).Verify(api => api.CreateSense( - It.IsAny(), - It.Is(s => IsNfd(s)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } #endregion @@ -600,16 +599,18 @@ public async Task CreateSense_WithNestedExampleSentences_NormalizesToNfd() public async Task CreateExampleSentence_NormalizesToNfd() { var example = NfcTestData.CreateNfcExampleSentence(); - AssertNfc(example); + + ExampleSentence? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateExampleSentence( + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, _, e, _) => captured = e) + .ReturnsAsync(example); await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); - Mock.Get(_mockApi).Verify(api => api.CreateExampleSentence( - It.IsAny(), - It.IsAny(), - It.Is(e => IsNfd(e)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] @@ -619,33 +620,38 @@ public async Task UpdateExampleSentence_BeforeAfter_NormalizesToNfd() var senseId = Guid.NewGuid(); var before = NfcTestData.CreateNfcExampleSentence(); var after = NfcTestData.CreateNfcExampleSentence(); - AssertNfc(after); + + ExampleSentence? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.UpdateExampleSentence( + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), + It.IsAny())) + .Callback((_, _, _, a, _) => captured = a) + .ReturnsAsync(after); await _normalizingApi.UpdateExampleSentence(entryId, senseId, before, after); - Mock.Get(_mockApi).Verify(api => api.UpdateExampleSentence( - entryId, - senseId, - It.IsAny(), - It.Is(e => IsNfd(e)), - It.IsAny() - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } [Fact] public async Task CreateExampleSentence_WithTranslations_NormalizesToNfd() { var example = NfcTestData.CreateNfcExampleSentenceWithTranslations(); - AssertNfc(example); + + ExampleSentence? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.CreateExampleSentence( + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, _, e, _) => captured = e) + .ReturnsAsync(example); await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); - Mock.Get(_mockApi).Verify(api => api.CreateExampleSentence( - It.IsAny(), - It.IsAny(), - It.Is(e => IsNfd(e)), - null - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } #endregion @@ -656,16 +662,18 @@ public async Task CreateExampleSentence_WithTranslations_NormalizesToNfd() public async Task AddTranslation_NormalizesToNfd() { var translation = NfcTestData.CreateNfcTranslation(); - AssertNfc(translation); + + Translation? captured = null; + Mock.Get(_mockApi) + .Setup(api => api.AddTranslation( + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, _, _, t) => captured = t) + .Returns(Task.CompletedTask); await _normalizingApi.AddTranslation(Guid.NewGuid(), Guid.NewGuid(), Guid.NewGuid(), translation); - Mock.Get(_mockApi).Verify(api => api.AddTranslation( - It.IsAny(), - It.IsAny(), - It.IsAny(), - It.Is(t => IsNfd(t)) - )); + captured.Should().NotBeNull(); + AssertAllNfd(captured); } #endregion @@ -673,18 +681,36 @@ public async Task AddTranslation_NormalizesToNfd() /// -/// Tests for NormalizationAssert to ensure it correctly detects NFC/NFD issues. -/// These tests verify the assertion helpers don't have false negatives. +/// Tests for NormalizationAssert and the NfcTestData factories that feed it. /// public class NormalizationAssertTests { [Fact] - public void AssertAllNfc_WithNfcData_DoesNotThrow() + public void AllNfcFactories_ProduceNfcData() { - var entry = NfcTestData.CreateNfcEntry(); + // Single belt-and-braces test that every factory returns NFC data, replacing the + // per-test AssertNfc(input) preconditions that used to be sprinkled across WriteNormalizationTests. + AssertAllNfc(NfcTestData.CreateNfcWritingSystem()); + AssertAllNfc(NfcTestData.CreateNfcPartOfSpeech()); + AssertAllNfc(NfcTestData.CreateNfcPublication()); + AssertAllNfc(NfcTestData.CreateNfcSemanticDomain()); + AssertAllNfc(NfcTestData.CreateNfcComplexFormType()); + AssertAllNfc(NfcTestData.CreateNfcMorphTypeData()); + AssertAllNfc(NfcTestData.CreateNfcTranslation()); + AssertAllNfc(NfcTestData.CreateNfcExampleSentence()); + AssertAllNfc(NfcTestData.CreateNfcExampleSentenceWithTranslations()); + AssertAllNfc(NfcTestData.CreateNfcSense()); + AssertAllNfc(NfcTestData.CreateNfcSenseWithExamples()); + AssertAllNfc(NfcTestData.CreateNfcComplexFormComponent()); + AssertAllNfc(NfcTestData.CreateNfcEntry()); + AssertAllNfc(NfcTestData.CreateNfcEntryWithSenses()); + AssertAllNfc(NfcTestData.CreateNfcEntryWithComponents()); + } - // Should not throw - NormalizationAssert.AssertAllNfc(entry); + [Fact] + public void AssertAllNfc_WithNfcData_DoesNotThrow() + { + AssertAllNfc(NfcTestData.CreateNfcEntry()); } [Fact] @@ -699,10 +725,9 @@ public void AssertAllNfc_WithNfdData_Throws() Note = new RichMultiString { { "en", new RichString(NfcTestData.Nfc) } } }; - var act = () => NormalizationAssert.AssertAllNfc(entry); + var act = () => AssertAllNfc(entry); - act.Should().Throw() - .WithMessage("*NFC*"); + act.Should().Throw().WithMessage("*NFC*"); } [Fact] @@ -717,19 +742,15 @@ public void AssertAllNfd_WithNfdData_DoesNotThrow() Note = new RichMultiString { { "en", new RichString(NfcTestData.Nfd) } } }; - // Should not throw - NormalizationAssert.AssertAllNfd(entry); + AssertAllNfd(entry); } [Fact] public void AssertAllNfd_WithNfcData_Throws() { - var entry = NfcTestData.CreateNfcEntry(); - - var act = () => NormalizationAssert.AssertAllNfd(entry); + var act = () => AssertAllNfd(NfcTestData.CreateNfcEntry()); - act.Should().Throw() - .WithMessage("*NFD*"); + act.Should().Throw().WithMessage("*NFD*"); } [Fact] @@ -742,10 +763,9 @@ public void AssertAllNfc_WithEmptyMultiString_Throws() CitationForm = NfcTestData.CreateNfcMultiString() }; - var act = () => NormalizationAssert.AssertAllNfc(entry); + var act = () => AssertAllNfc(entry); - act.Should().Throw() - .WithMessage("*no values*"); + act.Should().Throw().WithMessage("*no values*"); } [Fact] @@ -769,10 +789,9 @@ public void AssertAllNfc_WithNestedNfdData_Throws() ] }; - var act = () => NormalizationAssert.AssertAllNfc(entry); + var act = () => AssertAllNfc(entry); - act.Should().Throw() - .WithMessage("*Senses*Gloss*"); + act.Should().Throw().WithMessage("*Senses*Gloss*"); } [Fact] @@ -780,428 +799,12 @@ public void IsAllNfd_WithNfdData_ReturnsTrue() { var multiString = new MultiString { Values = { { "en", NfcTestData.Nfd } } }; - NormalizationAssert.IsAllNfd(multiString).Should().BeTrue(); + IsAllNfd(multiString).Should().BeTrue(); } [Fact] public void IsAllNfd_WithNfcData_ReturnsFalse() { - var multiString = NfcTestData.CreateNfcMultiString(); - - NormalizationAssert.IsAllNfd(multiString).Should().BeFalse(); - } -} - -/// -/// Provides test data with NFC-normalized strings for all entity types. -/// Each Create method returns an object with ALL normalizable properties populated with NFC strings. -/// -public static class NfcTestData -{ - /// - /// NFC string: "naïve" with U+00EF LATIN SMALL LETTER I WITH DIAERESIS (composed form) - /// - public const string Nfc = "na\u00efve"; - - /// - /// NFD string: "naïve" with U+0069 LATIN SMALL LETTER I + U+0308 COMBINING DIAERESIS (decomposed form) - /// - public const string Nfd = "na\u0069\u0308ve"; - - public static MultiString CreateNfcMultiString() => new() { Values = { { "en", Nfc }, { "fr", Nfc } } }; - - public static RichString CreateNfcRichString() => new([ - new RichSpan { Text = Nfc, Ws = "en" }, - new RichSpan { Text = Nfc, Ws = "en", Bold = RichTextToggle.On } - ]); - - public static RichMultiString CreateNfcRichMultiString() => new() - { - { "en", CreateNfcRichString() }, - { "fr", CreateNfcRichString() } - }; - - public static WritingSystem CreateNfcWritingSystem() => new() - { - Id = Guid.NewGuid(), - WsId = "en", - Type = WritingSystemType.Analysis, - Name = Nfc, - Abbreviation = Nfc, - Font = Nfc, - Exemplars = [Nfc, Nfc] - }; - - public static PartOfSpeech CreateNfcPartOfSpeech() => new() - { - Id = Guid.NewGuid(), - Name = CreateNfcMultiString() - }; - - public static Publication CreateNfcPublication() => new() - { - Id = Guid.NewGuid(), - Name = CreateNfcMultiString() - }; - - public static SemanticDomain CreateNfcSemanticDomain() => new() - { - Id = Guid.NewGuid(), - Code = "1.1.1", // Code is NOT normalized (metadata) - Name = CreateNfcMultiString() - }; - - public static ComplexFormType CreateNfcComplexFormType() => new() - { - Id = Guid.NewGuid(), - Name = CreateNfcMultiString() - }; - - public static MorphTypeData CreateNfcMorphTypeData() => new() - { - Id = Guid.NewGuid(), - Name = CreateNfcMultiString(), - Abbreviation = CreateNfcMultiString(), - Description = CreateNfcRichMultiString(), - LeadingToken = Nfc, - TrailingToken = Nfc - }; - - public static Translation CreateNfcTranslation() => new() - { - Id = Guid.NewGuid(), - Text = CreateNfcRichMultiString() - }; - - public static ExampleSentence CreateNfcExampleSentence() => new() - { - Id = Guid.NewGuid(), - Sentence = CreateNfcRichMultiString(), - Reference = CreateNfcRichString() - }; - - public static ExampleSentence CreateNfcExampleSentenceWithTranslations() => new() - { - Id = Guid.NewGuid(), - Sentence = CreateNfcRichMultiString(), - Reference = CreateNfcRichString(), - Translations = [CreateNfcTranslation(), CreateNfcTranslation()] - }; - - public static Sense CreateNfcSense() => new() - { - Id = Guid.NewGuid(), - Gloss = CreateNfcMultiString(), - Definition = CreateNfcRichMultiString() - }; - - public static Sense CreateNfcSenseWithExamples() => new() - { - Id = Guid.NewGuid(), - Gloss = CreateNfcMultiString(), - Definition = CreateNfcRichMultiString(), - SemanticDomains = [CreateNfcSemanticDomain()], - PartOfSpeech = CreateNfcPartOfSpeech(), - ExampleSentences = [CreateNfcExampleSentenceWithTranslations()] - }; - - public static ComplexFormComponent CreateNfcComplexFormComponent() => new() - { - Id = Guid.NewGuid(), - ComplexFormEntryId = Guid.NewGuid(), - ComponentEntryId = Guid.NewGuid(), - ComplexFormHeadword = Nfc, - ComponentHeadword = Nfc - }; - - public static Entry CreateNfcEntry() => new() - { - Id = Guid.NewGuid(), - LexemeForm = CreateNfcMultiString(), - CitationForm = CreateNfcMultiString(), - LiteralMeaning = CreateNfcRichMultiString(), - Note = CreateNfcRichMultiString() - }; - - public static Entry CreateNfcEntryWithSenses() => new() - { - Id = Guid.NewGuid(), - LexemeForm = CreateNfcMultiString(), - CitationForm = CreateNfcMultiString(), - LiteralMeaning = CreateNfcRichMultiString(), - Note = CreateNfcRichMultiString(), - Senses = [CreateNfcSenseWithExamples()] - }; - - public static Entry CreateNfcEntryWithComponents() => new() - { - Id = Guid.NewGuid(), - LexemeForm = CreateNfcMultiString(), - CitationForm = CreateNfcMultiString(), - LiteralMeaning = CreateNfcRichMultiString(), - Note = CreateNfcRichMultiString(), - Components = [CreateNfcComplexFormComponent()], - ComplexForms = [CreateNfcComplexFormComponent()] - }; -} - -/// -/// Assertion helpers for verifying NFC/NFD normalization state of objects. -/// Uses reflection to walk the object graph and check all normalizable properties. -/// -public static class NormalizationAssert -{ - private static readonly HashSet NormalizableTypes = - [ - typeof(string), - typeof(string[]), - typeof(MultiString), - typeof(RichString), - typeof(RichMultiString) - ]; - - /// - /// Properties not normalized by the write wrapper, scoped per type. Includes both metadata - /// (e.g. SemanticDomain.Code, RichSpan.Ws) and fields liblcm itself doesn't NFD-normalize - /// (WritingSystem fields are LDML-managed plain strings; MorphType leading/trailing tokens are - /// punctuation markers, not linguistic text). - /// - private static readonly Dictionary> NotNormalizedPerType = new() - { - [typeof(WritingSystem)] = - [ - nameof(WritingSystem.WsId), - nameof(WritingSystem.Name), - nameof(WritingSystem.Abbreviation), - nameof(WritingSystem.Font), - nameof(WritingSystem.Exemplars), - ], - [typeof(MorphTypeData)] = - [ - nameof(MorphTypeData.LeadingToken), - nameof(MorphTypeData.TrailingToken), - ], - [typeof(SemanticDomain)] = [nameof(SemanticDomain.Code)], - [typeof(RichSpan)] = [nameof(RichSpan.Ws)], - }; - - /// - /// Asserts that all normalizable properties in the object contain NFC strings. - /// Throws if any property is null, empty, or contains NFD strings. - /// - public static void AssertAllNfc(object obj) - { - var issues = FindNormalizationIssues(obj, expectNfc: true); - if (issues.Count > 0) - { - throw new Xunit.Sdk.XunitException( - $"Expected all normalizable properties to contain NFC strings, but found issues:\n" + - string.Join("\n", issues.Select(i => $" - {i}")) - ); - } - } - - /// - /// Asserts that all normalizable properties in the object contain NFD strings. - /// Throws if any property contains NFC strings. - /// - public static void AssertAllNfd(object obj) - { - var issues = FindNormalizationIssues(obj, expectNfc: false); - if (issues.Count > 0) - { - throw new Xunit.Sdk.XunitException( - $"Expected all normalizable properties to contain NFD strings, but found issues:\n" + - string.Join("\n", issues.Select(i => $" - {i}")) - ); - } - } - - /// - /// Returns true if all normalizable properties contain NFD strings. - /// For use in Moq It.Is() expressions. - /// - public static bool IsAllNfd(object obj) - { - var issues = FindNormalizationIssues(obj, expectNfc: false); - return issues.Count == 0; - } - - private static List FindNormalizationIssues(object obj, bool expectNfc, string path = "") - { - var issues = new List(); - if (obj == null) return issues; - - var type = obj.GetType(); - - // Handle string directly - if (obj is string str) - { - CheckString(str, path, expectNfc, issues); - return issues; - } - - // Handle string array - if (obj is string[] strArray) - { - for (var i = 0; i < strArray.Length; i++) - { - CheckString(strArray[i], $"{path}[{i}]", expectNfc, issues); - } - return issues; - } - - // Handle MultiString - if (obj is MultiString ms) - { - if (ms.Values.Count == 0) - { - issues.Add($"{path}: MultiString has no values (must have at least one for testing)"); - } - foreach (var (key, value) in ms.Values) - { - CheckString(value, $"{path}.Values[{key}]", expectNfc, issues); - } - return issues; - } - - // Handle RichString - if (obj is RichString rs) - { - if (rs.Spans.Count == 0) - { - issues.Add($"{path}: RichString has no spans (must have at least one for testing)"); - } - for (var i = 0; i < rs.Spans.Count; i++) - { - CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", expectNfc, issues); - } - return issues; - } - - // Handle RichMultiString - if (obj is RichMultiString rms) - { - if (rms.Count == 0) - { - issues.Add($"{path}: RichMultiString has no values (must have at least one for testing)"); - } - foreach (var (key, value) in rms) - { - issues.AddRange(FindNormalizationIssues(value, expectNfc, $"{path}[{key}]")); - } - return issues; - } - - // Walk object properties - var properties = type.GetProperties(BindingFlags.Public | BindingFlags.Instance); - var perTypeSkip = NotNormalizedPerType.GetValueOrDefault(type); - foreach (var prop in properties) - { - if (!prop.CanRead) continue; - if (perTypeSkip is not null && perTypeSkip.Contains(prop.Name)) continue; - - var value = prop.GetValue(obj); - if (value == null) continue; - - var propPath = string.IsNullOrEmpty(path) ? prop.Name : $"{path}.{prop.Name}"; - var propType = prop.PropertyType; - - // Check if this is a normalizable type - if (NormalizableTypes.Contains(propType) || - propType == typeof(string[]) || - (propType.IsGenericType && propType.GetGenericTypeDefinition() == typeof(Nullable<>))) - { - issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); - } - else if (propType.IsPrimitive || propType.IsEnum || propType == typeof(Guid) || propType == typeof(DateTime) || propType == typeof(DateTimeOffset) || propType == typeof(decimal)) - { - continue; - } - // Check collections of model objects - else if (value is IEnumerable enumerable) - { - var index = 0; - foreach (var item in enumerable) - { - if (item != null && IsModelType(item.GetType())) - { - issues.AddRange(FindNormalizationIssues(item, expectNfc, $"{propPath}[{index}]")); - } - index++; - } - } - // Check nested model objects - else if (IsModelType(propType)) - { - issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); - } - else - { - throw new Xunit.Sdk.XunitException($"Unexpected property type: {propType.FullName} at {propPath}"); - } - } - - return issues; - } - - private static void CheckString(string? value, string path, bool expectNfc, List issues) - { - if (string.IsNullOrEmpty(value)) - { - issues.Add($"{path}: string is null or empty (must have a value for testing)"); - return; - } - - var isNfc = value.IsNormalized(NormalizationForm.FormC); - var isNfd = value.IsNormalized(NormalizationForm.FormD); - - if (expectNfc) - { - // When expecting NFC, the string should be NFC but NOT NFD (unless it has no decomposable chars) - // For our test string "naïve", NFC != NFD, so we check it's in NFC form - if (!isNfc) - { - issues.Add($"{path}: expected NFC but string is not NFC-normalized"); - } - // Also verify it's not already NFD (to ensure our test data is meaningful) - if (isNfd && !isNfc) - { - // This is fine - some strings are both NFC and NFD (e.g., ASCII) - } - else if (isNfd && isNfc) - { - // String is both NFC and NFD - this means it has no decomposable characters - // For testing purposes, we need strings that ARE different in NFC vs NFD - // But we'll allow it since it's still valid - } - } - else - { - // When expecting NFD, the string should be NFD - if (!isNfd) - { - issues.Add($"{path}: expected NFD but string is not NFD-normalized (value contains NFC)"); - } - } - } - - private static bool IsModelType(Type type) - { - // Model types are in the MiniLcm.Models namespace - return type.Namespace?.StartsWith("MiniLcm.Models") == true || - type == typeof(Entry) || - type == typeof(Sense) || - type == typeof(ExampleSentence) || - type == typeof(Translation) || - type == typeof(WritingSystem) || - type == typeof(PartOfSpeech) || - type == typeof(SemanticDomain) || - type == typeof(ComplexFormType) || - type == typeof(MorphTypeData) || - type == typeof(Publication) || - type == typeof(ComplexFormComponent); + IsAllNfd(NfcTestData.CreateNfcMultiString()).Should().BeFalse(); } } - -#pragma warning restore IDE0022 // Use block body for method From c953aa16305e8429a8080f24ff450d294e042e0a Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Tue, 12 May 2026 10:17:31 +0200 Subject: [PATCH 07/12] Polish up --- .../MiniLcm.Tests/Helpers/NfcTestData.cs | 19 +- .../Helpers/NormalizationAssert.cs | 229 +++++------------- .../MiniLcm.Tests/WriteNormalizationTests.cs | 11 - .../MiniLcmApiWriteNormalizationWrapper.cs | 10 +- .../MiniLcm/Normalization/StringNormalizer.cs | 40 +-- 5 files changed, 85 insertions(+), 224 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs index 3a55322106..b560f1fceb 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs @@ -1,20 +1,13 @@ namespace MiniLcm.Tests.Helpers; -/// -/// Provides test data with NFC-normalized strings for all entity types. -/// Each Create method returns an object with ALL normalizable properties populated with NFC strings. -/// public static class NfcTestData { - /// - /// NFC string: "naïve" with U+00EF LATIN SMALL LETTER I WITH DIAERESIS (composed form) - /// - public const string Nfc = "na\u00efve"; + // U+00EF LATIN SMALL LETTER I WITH DIAERESIS (composed) + public const string Nfc = "naïve"; + // U+0069 LATIN SMALL LETTER I + U+0308 COMBINING DIAERESIS (decomposed) + public const string Nfd = "naïve"; - /// - /// NFD string: "naïve" with U+0069 LATIN SMALL LETTER I + U+0308 COMBINING DIAERESIS (decomposed form) - /// - public const string Nfd = "na\u0069\u0308ve"; + // D: ï C: ï public static MultiString CreateNfcMultiString() { @@ -75,7 +68,7 @@ public static SemanticDomain CreateNfcSemanticDomain() return new() { Id = Guid.NewGuid(), - Code = "1.1.1", // Code is NOT normalized (metadata) + Code = Nfc, Name = CreateNfcMultiString() }; } diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index be0a7974a7..6582b67245 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -5,29 +5,13 @@ namespace MiniLcm.Tests.Helpers; /// -/// Assertion helpers for verifying NFC/NFD normalization state of objects. -/// Uses reflection to walk the object graph and check all normalizable properties. -/// On failure, error messages include the full property path (e.g., Senses[0].Gloss.Values[en]). +/// For verifying that every string-bearing property of an object is normalized (NFC or NFD). /// public static class NormalizationAssert { - private static readonly HashSet NormalizableTypes = - [ - typeof(string), - typeof(string[]), - typeof(MultiString), - typeof(RichString), - typeof(RichMultiString) - ]; - - /// - /// Properties not normalized by the write wrapper, scoped per type. Includes both metadata - /// (e.g. SemanticDomain.Code, RichSpan.Ws) and fields liblcm itself doesn't NFD-normalize - /// (WritingSystem fields are LDML-managed plain strings; MorphType leading/trailing tokens are - /// punctuation markers, not linguistic text). - /// - private static readonly Dictionary> NotNormalizedPerType = new() + private static readonly Dictionary> SkippedProperties = new() { + // WritingSystem fields are generally LDML-managed [typeof(WritingSystem)] = [ nameof(WritingSystem.WsId), @@ -36,202 +20,123 @@ public static class NormalizationAssert nameof(WritingSystem.Font), nameof(WritingSystem.Exemplars), ], + // MorphTypeData leading/trailing tokens are punctuation markers [typeof(MorphTypeData)] = [ nameof(MorphTypeData.LeadingToken), nameof(MorphTypeData.TrailingToken), ], - [typeof(SemanticDomain)] = [nameof(SemanticDomain.Code)], - [typeof(RichSpan)] = [nameof(RichSpan.Ws)], }; - /// - /// Asserts that all normalizable properties in the object contain NFC strings. - /// Throws if any property is null, empty, or contains NFD strings. - /// public static void AssertAllNfc(object obj) { - AssertAll(obj, expectNfc: true); + Assert(obj, NormalizationForm.FormC); } - /// - /// Asserts that all normalizable properties in the object contain NFD strings. - /// Throws if any property contains NFC strings. - /// public static void AssertAllNfd(object obj) { - AssertAll(obj, expectNfc: false); + Assert(obj, NormalizationForm.FormD); } - /// - /// Returns true if all normalizable properties contain NFD strings. - /// public static bool IsAllNfd(object obj) { - return FindNormalizationIssues(obj, expectNfc: false).Count == 0; + return FindIssues(obj, NormalizationForm.FormD).Count == 0; } - private static void AssertAll(object obj, bool expectNfc) + private static void Assert(object obj, NormalizationForm form) { - var issues = FindNormalizationIssues(obj, expectNfc); + var issues = FindIssues(obj, form); if (issues.Count == 0) return; - var form = expectNfc ? "NFC" : "NFD"; + var name = FormName(form); throw new Xunit.Sdk.XunitException( - $"Expected all normalizable properties to contain {form} strings, but found issues:\n" + - string.Join("\n", issues.Select(i => $" - {i}")) + $"Expected all normalizable properties to contain {name} strings, but found issues:\n" + + string.Join("\n", issues.Select(i => " - " + i)) ); } - private static List FindNormalizationIssues(object obj, bool expectNfc, string path = "") + private static List FindIssues(object obj, NormalizationForm form) { var issues = new List(); - if (obj == null) return issues; - - if (obj is string str) - { - CheckString(str, path, expectNfc, issues); - return issues; - } - - if (obj is string[] strArray) - { - for (var i = 0; i < strArray.Length; i++) - { - CheckString(strArray[i], $"{path}[{i}]", expectNfc, issues); - } - return issues; - } - - if (obj is MultiString ms) - { - if (ms.Values.Count == 0) - { - issues.Add($"{path}: MultiString has no values (must have at least one for testing)"); - } - foreach (var (key, value) in ms.Values) - { - CheckString(value, $"{path}.Values[{key}]", expectNfc, issues); - } - return issues; - } - - if (obj is RichString rs) - { - if (rs.Spans.Count == 0) - { - issues.Add($"{path}: RichString has no spans (must have at least one for testing)"); - } - for (var i = 0; i < rs.Spans.Count; i++) - { - CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", expectNfc, issues); - } - return issues; - } + Visit(obj, "", form, issues); + return issues; + } - if (obj is RichMultiString rms) + private static void Visit(object? obj, string path, NormalizationForm form, List issues) + { + switch (obj) { - if (rms.Count == 0) - { - issues.Add($"{path}: RichMultiString has no values (must have at least one for testing)"); - } - foreach (var (key, value) in rms) - { - issues.AddRange(FindNormalizationIssues(value, expectNfc, $"{path}[{key}]")); - } - return issues; + case null: + return; + case string s: + CheckString(s, path, form, issues); + return; + case MultiString ms: + if (ms.Values.Count == 0) issues.Add($"{path}: MultiString has no values (must have at least one for testing)"); + foreach (var (key, value) in ms.Values) CheckString(value, $"{path}.Values[{key}]", form, issues); + return; + case RichString rs: + if (rs.Spans.Count == 0) issues.Add($"{path}: RichString has no spans (must have at least one for testing)"); + for (var i = 0; i < rs.Spans.Count; i++) CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", form, issues); + return; + case RichMultiString rms: + if (rms.Count == 0) issues.Add($"{path}: RichMultiString has no values (must have at least one for testing)"); + foreach (var (key, value) in rms) Visit(value, $"{path}[{key}]", form, issues); + return; + case IEnumerable seq: + var i2 = 0; + foreach (var item in seq) Visit(item, $"{path}[{i2++}]", form, issues); + return; + default: + break; } - // Top-level collection of items (e.g. a captured List) — walk each item. - // Property-level collections are handled below in the property loop, where IsModelType filtering applies. - if (obj is IEnumerable topLevelCollection) - { - var index = 0; - foreach (var item in topLevelCollection) - { - if (item != null) issues.AddRange(FindNormalizationIssues(item, expectNfc, $"{path}[{index}]")); - index++; - } - return issues; - } + VisitModelProperties(obj, path, form, issues); + } + private static void VisitModelProperties(object obj, string path, NormalizationForm form, List issues) + { var type = obj.GetType(); - var perTypeSkip = NotNormalizedPerType.GetValueOrDefault(type); + if (type.Namespace?.StartsWith("MiniLcm.Models", StringComparison.Ordinal) != true) + throw new Xunit.Sdk.XunitException($"Unexpected type {type.FullName} at {(string.IsNullOrEmpty(path) ? "" : path)}"); + + var skipped = SkippedProperties.GetValueOrDefault(type); foreach (var prop in type.GetProperties(BindingFlags.Public | BindingFlags.Instance)) { - if (!prop.CanRead) continue; - if (perTypeSkip is not null && perTypeSkip.Contains(prop.Name)) continue; + if (!prop.CanRead || skipped?.Contains(prop.Name) == true) continue; + if (IsIgnoredType(prop.PropertyType)) continue; var value = prop.GetValue(obj); - if (value == null) continue; + if (value is null) continue; var propPath = string.IsNullOrEmpty(path) ? prop.Name : $"{path}.{prop.Name}"; - var propType = prop.PropertyType; - - if (NormalizableTypes.Contains(propType) || - propType == typeof(string[]) || - (propType.IsGenericType && propType.GetGenericTypeDefinition() == typeof(Nullable<>))) - { - issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); - } - else if (propType.IsPrimitive || propType.IsEnum || propType == typeof(Guid) || propType == typeof(DateTime) || propType == typeof(DateTimeOffset) || propType == typeof(decimal)) - { - continue; - } - else if (value is IEnumerable enumerable) - { - var index = 0; - foreach (var item in enumerable) - { - if (item != null && IsModelType(item.GetType())) - { - issues.AddRange(FindNormalizationIssues(item, expectNfc, $"{propPath}[{index}]")); - } - index++; - } - } - else if (IsModelType(propType)) - { - issues.AddRange(FindNormalizationIssues(value, expectNfc, propPath)); - } - else - { - throw new Xunit.Sdk.XunitException($"Unexpected property type: {propType.FullName} at {propPath}"); - } + Visit(value, propPath, form, issues); } + } - return issues; + private static bool IsIgnoredType(Type type) + { + var underlying = Nullable.GetUnderlyingType(type) ?? type; + return underlying.IsPrimitive || underlying.IsEnum || + underlying == typeof(Guid) || underlying == typeof(DateTime) || + underlying == typeof(DateTimeOffset) || underlying == typeof(decimal); } - private static void CheckString(string? value, string path, bool expectNfc, List issues) + private static void CheckString(string? value, string path, NormalizationForm form, List issues) { if (string.IsNullOrEmpty(value)) { issues.Add($"{path}: string is null or empty (must have a value for testing)"); return; } - - var expected = expectNfc ? NormalizationForm.FormC : NormalizationForm.FormD; - if (!value.IsNormalized(expected)) + if (!value.IsNormalized(form)) { - var formName = expectNfc ? "NFC" : "NFD"; - issues.Add($"{path}: expected {formName} but \"{value}\" is not {formName}-normalized"); + var name = FormName(form); + issues.Add($"{path}: expected {name} but \"{value}\" is not {name}-normalized"); } } - private static bool IsModelType(Type type) + private static string FormName(NormalizationForm form) { - return - type.Namespace?.StartsWith("MiniLcm.Models") == true || - type == typeof(Entry) || - type == typeof(Sense) || - type == typeof(ExampleSentence) || - type == typeof(Translation) || - type == typeof(WritingSystem) || - type == typeof(PartOfSpeech) || - type == typeof(SemanticDomain) || - type == typeof(ComplexFormType) || - type == typeof(MorphTypeData) || - type == typeof(Publication) || - type == typeof(ComplexFormComponent); + return form == NormalizationForm.FormC ? "NFC" : "NFD"; } } diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index ae6919dc7e..d0af15451a 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -8,12 +8,6 @@ namespace MiniLcm.Tests; -/// -/// Tests for the MiniLcmApiWriteNormalizationWrapper. -/// These tests verify that all user-entered text is normalized to NFD on write operations. -/// Each test captures the value passed to the underlying API and asserts via -/// , which reports the failing property path on mismatch. -/// public class WriteNormalizationTests { private readonly IMiniLcmApi _mockApi; @@ -680,16 +674,11 @@ public async Task AddTranslation_NormalizesToNfd() } -/// -/// Tests for NormalizationAssert and the NfcTestData factories that feed it. -/// public class NormalizationAssertTests { [Fact] public void AllNfcFactories_ProduceNfcData() { - // Single belt-and-braces test that every factory returns NFC data, replacing the - // per-test AssertNfc(input) preconditions that used to be sprinkled across WriteNormalizationTests. AssertAllNfc(NfcTestData.CreateNfcWritingSystem()); AssertAllNfc(NfcTestData.CreateNfcPartOfSpeech()); AssertAllNfc(NfcTestData.CreateNfcPublication()); diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs index 74e3acb071..c43c97b208 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs @@ -80,7 +80,7 @@ public Task MoveWritingSystem(WritingSystemId id, WritingSystemType type, Betwee #region PartOfSpeech public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech) - {; + { return await _api.CreatePartOfSpeech(NormalizePartOfSpeech(partOfSpeech)); } @@ -177,7 +177,7 @@ private static SemanticDomain NormalizeSemanticDomain(SemanticDomain sd) { Id = sd.Id, Name = StringNormalizer.Normalize(sd.Name), - Code = sd.Code, // Code is metadata, not user text + Code = StringNormalizer.Normalize(sd.Code), // yes, LibLcm normalizes this too DeletedAt = sd.DeletedAt, Predefined = sd.Predefined }; @@ -483,6 +483,11 @@ private static Translation NormalizeTranslation(Translation translation) #region Bulk Import + // Normalizing the bulk import methods may seem unintuitive: + // We currently only use them for importing from LibLcm, which should NOT be normalized. + // However, we don't use this normalization wrapper in that context and ít's likely that we'll + // start importing from other sources (Paratext 9, The Combine, Language Forge) in which case we DO want to normalize on import. + public Task BulkImportSemanticDomains(IAsyncEnumerable semanticDomains) { return _api.BulkImportSemanticDomains(NormalizeStream(semanticDomains, NormalizeSemanticDomain)); @@ -568,7 +573,6 @@ private static UpdateObjectInput NormalizePatch(UpdateObjectInput updat RichString richString => StringNormalizer.Normalize(richString), MultiString multiString => StringNormalizer.Normalize(multiString), RichMultiString richMultiString => StringNormalizer.Normalize(richMultiString), - string[] strings => StringNormalizer.Normalize(strings), JsonElement jsonElement => NormalizeJsonElement(jsonElement), _ => value }; diff --git a/backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs b/backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs index f12cd96a12..d213c5a05b 100644 --- a/backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs +++ b/backend/FwLite/MiniLcm/Normalization/StringNormalizer.cs @@ -4,73 +4,43 @@ namespace MiniLcm.Normalization; -/// -/// Helper class for normalizing strings to NFD (Unicode Normalization Form D - Canonical Decomposition) -/// public static class StringNormalizer { public const NormalizationForm Form = NormalizationForm.FormD; - /// - /// Normalizes a string to NFD. Returns null if input is null. - /// [return: NotNullIfNotNull(nameof(value))] public static string? Normalize(string? value) { return value?.Normalize(Form); } - /// - /// Normalizes all values in a MultiString to NFD - /// public static MultiString Normalize(MultiString multiString) { var normalized = new MultiString(multiString.Values.Count); foreach (var (key, value) in multiString.Values) { - // Preserve all keys, even if the value is empty or null normalized.Values[key] = string.IsNullOrEmpty(value) ? value : value.Normalize(Form); } return normalized; } - /// - /// Normalizes all text spans in a RichString to NFD - /// [return: NotNullIfNotNull(nameof(richString))] public static RichString? Normalize(RichString? richString) { if (richString is null) return null; - - var normalizedSpans = richString.Spans.Select(span => - span with { Text = span.Text.Normalize(Form) } - ).ToList(); - - return new RichString(normalizedSpans); + return new RichString([.. richString.Spans.Select(span => span with + { + Text = span.Text.Normalize(Form) + })]); } - /// - /// Normalizes all values in a RichMultiString to NFD - /// public static RichMultiString Normalize(RichMultiString richMultiString) { var normalized = new RichMultiString(richMultiString.Count); foreach (var (key, value) in richMultiString) { - var normalizedRichString = Normalize(value); - if (normalizedRichString is not null) - { - normalized[key] = normalizedRichString; - } + normalized[key] = Normalize(value); } return normalized; } - - /// - /// Normalizes an array of strings to NFD - /// - public static string[] Normalize(string[] values) - { - return [.. values.Select(v => v?.Normalize(Form) ?? string.Empty)]; - } } From e47fc7391940bcd300adf71f1b6a5639d0dfa744 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Tue, 12 May 2026 11:15:25 +0200 Subject: [PATCH 08/12] Adapt to morph-type renamings --- .../MiniLcm.Tests/Helpers/NfcTestData.cs | 7 +- .../Helpers/NormalizationAssert.cs | 8 +-- .../MiniLcm.Tests/WriteNormalizationTests.cs | 66 +++++++------------ .../MiniLcmApiWriteNormalizationWrapper.cs | 35 ++++------ 4 files changed, 40 insertions(+), 76 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs index b560f1fceb..61144e6a4f 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs @@ -82,16 +82,17 @@ public static ComplexFormType CreateNfcComplexFormType() }; } - public static MorphTypeData CreateNfcMorphTypeData() + public static MorphType CreateNfcMorphType() { return new() { Id = Guid.NewGuid(), + Kind = MorphTypeKind.Stem, Name = CreateNfcMultiString(), Abbreviation = CreateNfcMultiString(), Description = CreateNfcRichMultiString(), - LeadingToken = Nfc, - TrailingToken = Nfc + Prefix = Nfc, + Postfix = Nfc }; } diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index 6582b67245..089b43f59c 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -11,7 +11,7 @@ public static class NormalizationAssert { private static readonly Dictionary> SkippedProperties = new() { - // WritingSystem fields are generally LDML-managed + // WritingSystem fields are generally LDML-managed and seem to not be normalized by FieldWorks [typeof(WritingSystem)] = [ nameof(WritingSystem.WsId), @@ -20,12 +20,6 @@ public static class NormalizationAssert nameof(WritingSystem.Font), nameof(WritingSystem.Exemplars), ], - // MorphTypeData leading/trailing tokens are punctuation markers - [typeof(MorphTypeData)] = - [ - nameof(MorphTypeData.LeadingToken), - nameof(MorphTypeData.TrailingToken), - ], }; public static void AssertAllNfc(object obj) diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index d0af15451a..53ac646b7f 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -261,71 +261,49 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() #endregion - #region MorphTypeData Tests - - // MorphTypeData's MultiString/RichMultiString fields (Name, Abbreviation, Description) are normalized - // to NFD (matching liblcm); LeadingToken/TrailingToken are punctuation markers and pass through unchanged. - - [Fact] - public async Task CreateMorphTypeData_NormalizesMultiStringsToNfd_AndPassesTokensThrough() - { - var mtd = NfcTestData.CreateNfcMorphTypeData(); - - MorphTypeData? captured = null; - Mock.Get(_mockApi) - .Setup(api => api.CreateMorphTypeData(It.IsAny())) - .Callback(m => captured = m) - .ReturnsAsync(mtd); - - await _normalizingApi.CreateMorphTypeData(mtd); - - captured.Should().NotBeNull(); - AssertAllNfd(captured); - captured.LeadingToken.Should().Be(NfcTestData.Nfc); - captured.TrailingToken.Should().Be(NfcTestData.Nfc); - } + #region MorphType Tests [Fact] - public async Task UpdateMorphTypeData_BeforeAfter_NormalizesMultiStringsToNfd_AndPassesTokensThrough() + public async Task UpdateMorphType_BeforeAfter_NormalizesMultiStringsToNfd_AndPassesTokensThrough() { - var before = NfcTestData.CreateNfcMorphTypeData(); - var after = NfcTestData.CreateNfcMorphTypeData(); + var before = NfcTestData.CreateNfcMorphType(); + var after = NfcTestData.CreateNfcMorphType(); - MorphTypeData? captured = null; + MorphType? captured = null; Mock.Get(_mockApi) - .Setup(api => api.UpdateMorphTypeData(It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, a, _) => captured = a) + .Setup(api => api.UpdateMorphType(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, a, _) => captured = a) .ReturnsAsync(after); - await _normalizingApi.UpdateMorphTypeData(before, after); + await _normalizingApi.UpdateMorphType(before, after); captured.Should().NotBeNull(); AssertAllNfd(captured); - captured.LeadingToken.Should().Be(NfcTestData.Nfc); - captured.TrailingToken.Should().Be(NfcTestData.Nfc); + captured.Prefix.Should().Be(NfcTestData.Nfc); + captured.Postfix.Should().Be(NfcTestData.Nfc); } [Fact] - public async Task UpdateMorphTypeData_JsonPatch_NormalizesMultiString_AndPassesTokensThrough() + public async Task UpdateMorphType_JsonPatch_NormalizesMultiString_AndPassesTokensThrough() { - var update = new UpdateObjectInput() + var update = new UpdateObjectInput() .Set(m => m.Name, NfcTestData.CreateNfcMultiString()) - .Set(m => m.LeadingToken, NfcTestData.Nfc) - .Set(m => m.TrailingToken, NfcTestData.Nfc); + .Set(m => m.Prefix, NfcTestData.Nfc) + .Set(m => m.Postfix, NfcTestData.Nfc); - UpdateObjectInput? captured = null; + UpdateObjectInput? captured = null; Mock.Get(_mockApi) - .Setup(api => api.UpdateMorphTypeData(It.IsAny(), It.IsAny>())) - .Callback>((_, patch) => captured = patch) - .ReturnsAsync(NfcTestData.CreateNfcMorphTypeData()); + .Setup(api => api.UpdateMorphType(It.IsAny(), It.IsAny>())) + .Callback>((_, patch) => captured = patch) + .ReturnsAsync(NfcTestData.CreateNfcMorphType()); - await _normalizingApi.UpdateMorphTypeData(Guid.NewGuid(), update); + await _normalizingApi.UpdateMorphType(Guid.NewGuid(), update); captured.Should().NotBeNull(); var byPath = captured.Patch.Operations.ToDictionary(o => o.Path!, o => o.Value); AssertAllNfd(byPath["/Name"].Should().BeOfType().Subject); - byPath["/LeadingToken"].Should().Be(NfcTestData.Nfc); - byPath["/TrailingToken"].Should().Be(NfcTestData.Nfc); + byPath["/Prefix"].Should().Be(NfcTestData.Nfc); + byPath["/Postfix"].Should().Be(NfcTestData.Nfc); } #endregion @@ -684,7 +662,7 @@ public void AllNfcFactories_ProduceNfcData() AssertAllNfc(NfcTestData.CreateNfcPublication()); AssertAllNfc(NfcTestData.CreateNfcSemanticDomain()); AssertAllNfc(NfcTestData.CreateNfcComplexFormType()); - AssertAllNfc(NfcTestData.CreateNfcMorphTypeData()); + AssertAllNfc(NfcTestData.CreateNfcMorphType()); AssertAllNfc(NfcTestData.CreateNfcTranslation()); AssertAllNfc(NfcTestData.CreateNfcExampleSentence()); AssertAllNfc(NfcTestData.CreateNfcExampleSentenceWithTranslations()); diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs index c43c97b208..7ad73487ca 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs @@ -31,7 +31,7 @@ public IMiniLcmApi Create(IMiniLcmApi api) /// on every write, matching liblcm's generated property setters which call TsStringUtils.NormalizeNfd. /// - Plain-string properties that liblcm does not NFD-normalize are passed through unchanged. This currently /// covers WritingSystem.{Name, Abbreviation, Font, Exemplars} (LDML-managed) and -/// MorphTypeData.{LeadingToken, TrailingToken} (punctuation markers). +/// MorphType.{Prefix, Postfix} (punctuation markers). /// - JsonPatch overloads normalize string-ish values best-effort (string, RichString, MultiString, RichMultiString). /// JsonElement values are only normalized when they are simple strings; complex JSON values are left as-is /// to avoid guessing the target type. @@ -220,41 +220,31 @@ private static ComplexFormType NormalizeComplexFormType(ComplexFormType cft) #region MorphType - public async Task CreateMorphTypeData(MorphTypeData morphType) + public Task UpdateMorphType(Guid id, UpdateObjectInput update) { - return await _api.CreateMorphTypeData(NormalizeMorphTypeData(morphType)); + return _api.UpdateMorphType(id, NormalizePatch(update, MorphTypePlainStringPaths)); } - public Task UpdateMorphTypeData(Guid id, UpdateObjectInput update) - { - return _api.UpdateMorphTypeData(id, NormalizePatch(update, MorphTypeDataPlainStringPaths)); - } - - public async Task UpdateMorphTypeData(MorphTypeData before, MorphTypeData after, IMiniLcmApi? api = null) + public async Task UpdateMorphType(MorphType before, MorphType after, IMiniLcmApi? api = null) { - return await _api.UpdateMorphTypeData(before, NormalizeMorphTypeData(after), api); + return await _api.UpdateMorphType(before, NormalizeMorphType(after), api); } - public Task DeleteMorphTypeData(Guid id) - { - return _api.DeleteMorphTypeData(id); - } - - // LeadingToken/TrailingToken are punctuation markers (e.g. "-", "="), not user-entered linguistic text. - private static readonly HashSet MorphTypeDataPlainStringPaths = ["/LeadingToken", "/TrailingToken"]; + // Prefix/Postfix are punctuation markers (e.g. "-", "="), not user-entered linguistic text. + private static readonly HashSet MorphTypePlainStringPaths = ["/Prefix", "/Postfix"]; - private static MorphTypeData NormalizeMorphTypeData(MorphTypeData mtd) + private static MorphType NormalizeMorphType(MorphType mtd) { - return new MorphTypeData + return new MorphType { Id = mtd.Id, - MorphType = mtd.MorphType, + Kind = mtd.Kind, Name = StringNormalizer.Normalize(mtd.Name), Abbreviation = StringNormalizer.Normalize(mtd.Abbreviation), Description = StringNormalizer.Normalize(mtd.Description), - LeadingToken = mtd.LeadingToken, - TrailingToken = mtd.TrailingToken, + Prefix = mtd.Prefix, + Postfix = mtd.Postfix, SecondaryOrder = mtd.SecondaryOrder, DeletedAt = mtd.DeletedAt }; @@ -591,4 +581,5 @@ void IDisposable.Dispose() { // No resources to dispose } + } From 9f451e35b924215633ee4bb53309769e0c998995 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Tue, 12 May 2026 11:51:30 +0200 Subject: [PATCH 09/12] Also normalize morph-type tokens like FieldWorks --- .../MiniLcm.Tests/WriteNormalizationTests.cs | 10 +++--- .../MiniLcmApiWriteNormalizationWrapper.cs | 32 +++++++------------ 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index 53ac646b7f..5a764c6ff5 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -264,7 +264,7 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() #region MorphType Tests [Fact] - public async Task UpdateMorphType_BeforeAfter_NormalizesMultiStringsToNfd_AndPassesTokensThrough() + public async Task UpdateMorphType_BeforeAfter_NormalizesToNfd() { var before = NfcTestData.CreateNfcMorphType(); var after = NfcTestData.CreateNfcMorphType(); @@ -279,12 +279,10 @@ public async Task UpdateMorphType_BeforeAfter_NormalizesMultiStringsToNfd_AndPas captured.Should().NotBeNull(); AssertAllNfd(captured); - captured.Prefix.Should().Be(NfcTestData.Nfc); - captured.Postfix.Should().Be(NfcTestData.Nfc); } [Fact] - public async Task UpdateMorphType_JsonPatch_NormalizesMultiString_AndPassesTokensThrough() + public async Task UpdateMorphType_JsonPatch_NormalizesToNfd() { var update = new UpdateObjectInput() .Set(m => m.Name, NfcTestData.CreateNfcMultiString()) @@ -302,8 +300,8 @@ public async Task UpdateMorphType_JsonPatch_NormalizesMultiString_AndPassesToken captured.Should().NotBeNull(); var byPath = captured.Patch.Operations.ToDictionary(o => o.Path!, o => o.Value); AssertAllNfd(byPath["/Name"].Should().BeOfType().Subject); - byPath["/Prefix"].Should().Be(NfcTestData.Nfc); - byPath["/Postfix"].Should().Be(NfcTestData.Nfc); + byPath["/Prefix"].Should().Be(NfcTestData.Nfd); + byPath["/Postfix"].Should().Be(NfcTestData.Nfd); } #endregion diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs index 7ad73487ca..59fc008908 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs @@ -26,12 +26,11 @@ public IMiniLcmApi Create(IMiniLcmApi api) /// /// Design notes: /// - Read operations are forwarded automatically via BeaKona.AutoInterface. -/// - Write operations are manually implemented here (compile-time enforced by IMiniLcmApi). -/// - Scope mirrors liblcm: TsString-equivalents (MultiString, RichString, RichMultiString) are normalized -/// on every write, matching liblcm's generated property setters which call TsStringUtils.NormalizeNfd. -/// - Plain-string properties that liblcm does not NFD-normalize are passed through unchanged. This currently -/// covers WritingSystem.{Name, Abbreviation, Font, Exemplars} (LDML-managed) and -/// MorphType.{Prefix, Postfix} (punctuation markers). +/// - Write operations need to be manually implemented so nothing is missed (compile-time enforced by IMiniLcmApi). +/// - Should mirror what LibLcm/FieldWorks normalizes, which seems to be EVERYTHING in the "standard editor" UI +/// (so entry fields, but also list fields e.g. Semantic Domains, Morph Types, etc. - not only multi-strings) +/// - Properties that liblcm/FieldWorks does not NFD-normalize are passed through unchanged. +/// Currently only WritingSystem properties. /// - JsonPatch overloads normalize string-ish values best-effort (string, RichString, MultiString, RichMultiString). /// JsonElement values are only normalized when they are simple strings; complex JSON values are left as-is /// to avoid guessing the target type. @@ -46,14 +45,9 @@ public partial class MiniLcmApiWriteNormalizationWrapper(IMiniLcmApi api) : IMin [BeaKona.AutoInterface] private IMiniLcmReadApi ReadApi => _api; - // ********** IMiniLcmWriteApi Manual Implementations ********** - // All write methods are implemented manually to ensure NFD normalization - // and guarantee compile-time coverage of all write operations. - #region WritingSystem - // WritingSystem fields (Name, Abbreviation, Font, Exemplars) are plain strings; in liblcm they are - // LDML-managed by WritingSystemManager rather than stored as TsString, so they aren't NFD-normalized. + // Intentionally NOT normalized, because FieldWorks/LibLcm doesn't seem to either public Task CreateWritingSystem(WritingSystem writingSystem, BetweenPosition? between = null) { return _api.CreateWritingSystem(writingSystem, between); @@ -222,7 +216,7 @@ private static ComplexFormType NormalizeComplexFormType(ComplexFormType cft) public Task UpdateMorphType(Guid id, UpdateObjectInput update) { - return _api.UpdateMorphType(id, NormalizePatch(update, MorphTypePlainStringPaths)); + return _api.UpdateMorphType(id, NormalizePatch(update)); } @@ -231,9 +225,6 @@ public async Task UpdateMorphType(MorphType before, MorphType after, return await _api.UpdateMorphType(before, NormalizeMorphType(after), api); } - // Prefix/Postfix are punctuation markers (e.g. "-", "="), not user-entered linguistic text. - private static readonly HashSet MorphTypePlainStringPaths = ["/Prefix", "/Postfix"]; - private static MorphType NormalizeMorphType(MorphType mtd) { return new MorphType @@ -243,8 +234,8 @@ private static MorphType NormalizeMorphType(MorphType mtd) Name = StringNormalizer.Normalize(mtd.Name), Abbreviation = StringNormalizer.Normalize(mtd.Abbreviation), Description = StringNormalizer.Normalize(mtd.Description), - Prefix = mtd.Prefix, - Postfix = mtd.Postfix, + Prefix = StringNormalizer.Normalize(mtd.Prefix), + Postfix = StringNormalizer.Normalize(mtd.Postfix), SecondaryOrder = mtd.SecondaryOrder, DeletedAt = mtd.DeletedAt }; @@ -532,15 +523,14 @@ public Task SaveFile(Stream stream, LcmFileMetadata metadata #region Patch Normalization - private static UpdateObjectInput NormalizePatch(UpdateObjectInput update, HashSet? skipPaths = null) where T : class + private static UpdateObjectInput NormalizePatch(UpdateObjectInput update) where T : class { if (update.Patch.Operations.Count == 0) return update; var normalizedPatch = new SystemTextJsonPatch.JsonPatchDocument(); foreach (var op in update.Patch.Operations) { - var skip = skipPaths is not null && op.Path is not null && skipPaths.Contains(op.Path); - var normalizedValue = skip ? op.Value : NormalizePatchValue(op.Value); + var normalizedValue = NormalizePatchValue(op.Value); var normalizedOp = new SystemTextJsonPatch.Operations.Operation { Op = op.Op, From 19d09a2e118b91b57825a9e3b81b053e1cb4bcd4 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Fri, 15 May 2026 11:54:47 +0200 Subject: [PATCH 10/12] Also normalize 'before' parameters --- .../Helpers/NormalizationAssert.cs | 7 +- .../MiniLcm.Tests/WriteNormalizationTests.cs | 88 ++++++++++--------- .../MiniLcmApiWriteNormalizationWrapper.cs | 39 ++++---- 3 files changed, 73 insertions(+), 61 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index 089b43f59c..a0b048464b 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -22,12 +22,12 @@ public static class NormalizationAssert ], }; - public static void AssertAllNfc(object obj) + public static void AssertAllNfc(object? obj) { Assert(obj, NormalizationForm.FormC); } - public static void AssertAllNfd(object obj) + public static void AssertAllNfd(object? obj) { Assert(obj, NormalizationForm.FormD); } @@ -37,8 +37,9 @@ public static bool IsAllNfd(object obj) return FindIssues(obj, NormalizationForm.FormD).Count == 0; } - private static void Assert(object obj, NormalizationForm form) + private static void Assert(object? obj, NormalizationForm form) { + if (obj is null) throw new Xunit.Sdk.XunitException("Expected object to be non-null but was null"); var issues = FindIssues(obj, form); if (issues.Count == 0) return; var name = FormName(form); diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index 5a764c6ff5..78e7ed066d 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -89,21 +89,22 @@ public async Task CreatePartOfSpeech_NormalizesToNfd() } [Fact] - public async Task UpdatePartOfSpeech_BeforeAfter_NormalizesToNfd() + public async Task UpdatePartOfSpeech_BeforeAfter_NormalizesBothToNfd() { var before = NfcTestData.CreateNfcPartOfSpeech(); var after = NfcTestData.CreateNfcPartOfSpeech(); - PartOfSpeech? captured = null; + PartOfSpeech? capturedBefore = null; + PartOfSpeech? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdatePartOfSpeech(It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, a, _) => captured = a) + .Callback((b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdatePartOfSpeech(before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } #endregion @@ -128,21 +129,22 @@ public async Task CreatePublication_NormalizesToNfd() } [Fact] - public async Task UpdatePublication_BeforeAfter_NormalizesToNfd() + public async Task UpdatePublication_BeforeAfter_NormalizesBothToNfd() { var before = NfcTestData.CreateNfcPublication(); var after = NfcTestData.CreateNfcPublication(); - Publication? captured = null; + Publication? capturedBefore = null; + Publication? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdatePublication(It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, a, _) => captured = a) + .Callback((b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdatePublication(before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } #endregion @@ -167,21 +169,22 @@ public async Task CreateSemanticDomain_NormalizesToNfd() } [Fact] - public async Task UpdateSemanticDomain_BeforeAfter_NormalizesToNfd() + public async Task UpdateSemanticDomain_BeforeAfter_NormalizesBothToNfd() { var before = NfcTestData.CreateNfcSemanticDomain(); var after = NfcTestData.CreateNfcSemanticDomain(); - SemanticDomain? captured = null; + SemanticDomain? capturedBefore = null; + SemanticDomain? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdateSemanticDomain(It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, a, _) => captured = a) + .Callback((b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdateSemanticDomain(before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } [Fact] @@ -242,21 +245,22 @@ public async Task CreateComplexFormType_NormalizesToNfd() } [Fact] - public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() + public async Task UpdateComplexFormType_BeforeAfter_NormalizesBothToNfd() { var before = NfcTestData.CreateNfcComplexFormType(); var after = NfcTestData.CreateNfcComplexFormType(); - ComplexFormType? captured = null; + ComplexFormType? capturedBefore = null; + ComplexFormType? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdateComplexFormType(It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, a, _) => captured = a) + .Callback((b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdateComplexFormType(before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } #endregion @@ -264,21 +268,22 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesToNfd() #region MorphType Tests [Fact] - public async Task UpdateMorphType_BeforeAfter_NormalizesToNfd() + public async Task UpdateMorphType_BeforeAfter_NormalizesBothToNfd() { var before = NfcTestData.CreateNfcMorphType(); var after = NfcTestData.CreateNfcMorphType(); - MorphType? captured = null; + MorphType? capturedBefore = null; + MorphType? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdateMorphType(It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, a, _) => captured = a) + .Callback((b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdateMorphType(before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } [Fact] @@ -326,21 +331,22 @@ public async Task CreateEntry_NormalizesToNfd() } [Fact] - public async Task UpdateEntry_BeforeAfter_NormalizesToNfd() + public async Task UpdateEntry_BeforeAfter_NormalizesBothToNfd() { var before = NfcTestData.CreateNfcEntry(); var after = NfcTestData.CreateNfcEntry(); - Entry? captured = null; + Entry? capturedBefore = null; + Entry? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdateEntry(It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, a, _) => captured = a) + .Callback((b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdateEntry(before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } [Fact] @@ -526,22 +532,23 @@ public async Task CreateSense_NormalizesToNfd() } [Fact] - public async Task UpdateSense_BeforeAfter_NormalizesToNfd() + public async Task UpdateSense_BeforeAfter_NormalizesBothToNfd() { var entryId = Guid.NewGuid(); var before = NfcTestData.CreateNfcSense(); var after = NfcTestData.CreateNfcSense(); - Sense? captured = null; + Sense? capturedBefore = null; + Sense? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdateSense(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, _, a, _) => captured = a) + .Callback((_, b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdateSense(entryId, before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } [Fact] @@ -584,26 +591,27 @@ public async Task CreateExampleSentence_NormalizesToNfd() } [Fact] - public async Task UpdateExampleSentence_BeforeAfter_NormalizesToNfd() + public async Task UpdateExampleSentence_BeforeAfter_NormalizesBothToNfd() { var entryId = Guid.NewGuid(); var senseId = Guid.NewGuid(); var before = NfcTestData.CreateNfcExampleSentence(); var after = NfcTestData.CreateNfcExampleSentence(); - ExampleSentence? captured = null; + ExampleSentence? capturedBefore = null; + ExampleSentence? capturedAfter = null; Mock.Get(_mockApi) .Setup(api => api.UpdateExampleSentence( It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) - .Callback((_, _, _, a, _) => captured = a) + .Callback((_, _, b, a, _) => { capturedBefore = b; capturedAfter = a; }) .ReturnsAsync(after); await _normalizingApi.UpdateExampleSentence(entryId, senseId, before, after); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertAllNfd(capturedBefore); + AssertAllNfd(capturedAfter); } [Fact] diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs index 59fc008908..c182aca5e2 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs @@ -29,6 +29,9 @@ public IMiniLcmApi Create(IMiniLcmApi api) /// - Write operations need to be manually implemented so nothing is missed (compile-time enforced by IMiniLcmApi). /// - Should mirror what LibLcm/FieldWorks normalizes, which seems to be EVERYTHING in the "standard editor" UI /// (so entry fields, but also list fields e.g. Semantic Domains, Morph Types, etc. - not only multi-strings) +/// - For update methods that take both "before" and "after" objects, BOTH are normalized to ensure that any string comparisons done by the API are normalized. +/// This prevents potential diff-noise caused by "before" being bad-data even though we expect it to always already be normalized (because we presumably served it). +/// Non-normalized data that is already persisted (before this wrapper was introduced) will be normalized by LibLcm. That's not something we want to fix here. /// - Properties that liblcm/FieldWorks does not NFD-normalize are passed through unchanged. /// Currently only WritingSystem properties. /// - JsonPatch overloads normalize string-ish values best-effort (string, RichString, MultiString, RichMultiString). @@ -86,7 +89,7 @@ public Task UpdatePartOfSpeech(Guid id, UpdateObjectInput UpdatePartOfSpeech(PartOfSpeech before, PartOfSpeech after, IMiniLcmApi? api = null) { - return await _api.UpdatePartOfSpeech(before, NormalizePartOfSpeech(after), api); + return await _api.UpdatePartOfSpeech(NormalizePartOfSpeech(before), NormalizePartOfSpeech(after), api); } public Task DeletePartOfSpeech(Guid id) @@ -122,7 +125,7 @@ public Task UpdatePublication(Guid id, UpdateObjectInput UpdatePublication(Publication before, Publication after, IMiniLcmApi? api = null) { - return await _api.UpdatePublication(before, NormalizePublication(after), api); + return await _api.UpdatePublication(NormalizePublication(before), NormalizePublication(after), api); } public Task DeletePublication(Guid id) @@ -157,7 +160,7 @@ public Task UpdateSemanticDomain(Guid id, UpdateObjectInput UpdateSemanticDomain(SemanticDomain before, SemanticDomain after, IMiniLcmApi? api = null) { - return await _api.UpdateSemanticDomain(before, NormalizeSemanticDomain(after), api); + return await _api.UpdateSemanticDomain(NormalizeSemanticDomain(before), NormalizeSemanticDomain(after), api); } public Task DeleteSemanticDomain(Guid id) @@ -194,7 +197,7 @@ public Task UpdateComplexFormType(Guid id, UpdateObjectInput UpdateComplexFormType(ComplexFormType before, ComplexFormType after, IMiniLcmApi? api = null) { - return await _api.UpdateComplexFormType(before, NormalizeComplexFormType(after), api); + return await _api.UpdateComplexFormType(NormalizeComplexFormType(before), NormalizeComplexFormType(after), api); } public Task DeleteComplexFormType(Guid id) @@ -222,22 +225,22 @@ public Task UpdateMorphType(Guid id, UpdateObjectInput upd public async Task UpdateMorphType(MorphType before, MorphType after, IMiniLcmApi? api = null) { - return await _api.UpdateMorphType(before, NormalizeMorphType(after), api); + return await _api.UpdateMorphType(NormalizeMorphType(before), NormalizeMorphType(after), api); } - private static MorphType NormalizeMorphType(MorphType mtd) + private static MorphType NormalizeMorphType(MorphType mt) { return new MorphType { - Id = mtd.Id, - Kind = mtd.Kind, - Name = StringNormalizer.Normalize(mtd.Name), - Abbreviation = StringNormalizer.Normalize(mtd.Abbreviation), - Description = StringNormalizer.Normalize(mtd.Description), - Prefix = StringNormalizer.Normalize(mtd.Prefix), - Postfix = StringNormalizer.Normalize(mtd.Postfix), - SecondaryOrder = mtd.SecondaryOrder, - DeletedAt = mtd.DeletedAt + Id = mt.Id, + Kind = mt.Kind, + Name = StringNormalizer.Normalize(mt.Name), + Abbreviation = StringNormalizer.Normalize(mt.Abbreviation), + Description = StringNormalizer.Normalize(mt.Description), + Prefix = StringNormalizer.Normalize(mt.Prefix), + Postfix = StringNormalizer.Normalize(mt.Postfix), + SecondaryOrder = mt.SecondaryOrder, + DeletedAt = mt.DeletedAt }; } @@ -258,7 +261,7 @@ public Task UpdateEntry(Guid id, UpdateObjectInput update) public async Task UpdateEntry(Entry before, Entry after, IMiniLcmApi? api = null) { - return await _api.UpdateEntry(before, NormalizeEntry(after), api); + return await _api.UpdateEntry(NormalizeEntry(before), NormalizeEntry(after), api); } public Task DeleteEntry(Guid id) @@ -346,7 +349,7 @@ public Task UpdateSense(Guid entryId, Guid senseId, UpdateObjectInput UpdateSense(Guid entryId, Sense before, Sense after, IMiniLcmApi? api = null) { - return await _api.UpdateSense(entryId, before, NormalizeSense(after), api); + return await _api.UpdateSense(entryId, NormalizeSense(before), NormalizeSense(after), api); } public Task MoveSense(Guid entryId, Guid senseId, BetweenPosition position) @@ -408,7 +411,7 @@ public Task UpdateExampleSentence(Guid entryId, Guid senseId, G public async Task UpdateExampleSentence(Guid entryId, Guid senseId, ExampleSentence before, ExampleSentence after, IMiniLcmApi? api = null) { - return await _api.UpdateExampleSentence(entryId, senseId, before, NormalizeExampleSentence(after), api); + return await _api.UpdateExampleSentence(entryId, senseId, NormalizeExampleSentence(before), NormalizeExampleSentence(after), api); } public Task MoveExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId, BetweenPosition position) From 6c9ba0efee9dd61cac001e1b3a1548d2893bdba7 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Fri, 15 May 2026 12:18:01 +0200 Subject: [PATCH 11/12] Set sense ID to fix tests --- backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs index 48ab0f8f97..fcf7014f4a 100644 --- a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs @@ -143,7 +143,7 @@ static Entry[] CreateSortedEntrySet(string headword) var lastLongestContainsMatches = CreateSortedEntrySet("ccaaaa"); var entryId = Guid.NewGuid(); - Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; + Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { Id = Guid.NewGuid(), EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; Entry[] expected = [ .. exactMatches, @@ -201,7 +201,7 @@ static Entry[] CreateSortedEntrySet(string headword) var lastLongestContainsMatches = CreateSortedEntrySet("ccaaaa"); var entryId = Guid.NewGuid(); - Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; + Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { Id = Guid.NewGuid(), EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; Entry[] expected = [ .. exactMatches, From 6fc11c6ce30b8745a7c0b48faadc46486fa43a82 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Fri, 15 May 2026 12:56:21 +0200 Subject: [PATCH 12/12] Exclude Order from sorting test equivalence checks The IOrderable.Order field is auto-assigned by the API on create, so expected entries (with default Order=0) don't match results. Also removes redundant no-op `options => options` asserts that duplicated the strict-ordering assertions. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../FwLite/MiniLcm.Tests/SortingTestsBase.cs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs index fcf7014f4a..c4f920af6c 100644 --- a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs @@ -167,10 +167,9 @@ static Entry[] CreateSortedEntrySet(string headword) .Where(e => ids.Contains(e.Id)) .ToList(); - results.Should().BeEquivalentTo(expected, - options => options); - results.Should().BeEquivalentTo(expected, - options => options.WithStrictOrdering()); + results.Should().BeEquivalentTo(expected, options => options + .WithStrictOrdering() + .For(e => e.Senses).Exclude(s => s.Order)); } [Theory] @@ -225,10 +224,9 @@ static Entry[] CreateSortedEntrySet(string headword) .Where(e => ids.Contains(e.Id)) .ToList(); - results.Should().BeEquivalentTo(expected, - options => options); - results.Should().BeEquivalentTo(expected, - options => options.WithStrictOrdering()); + results.Should().BeEquivalentTo(expected, options => options + .WithStrictOrdering() + .For(e => e.Senses).Exclude(s => s.Order)); } [Theory] @@ -260,8 +258,6 @@ public async Task SecondaryOrder_Headword_LexemeForm(string searchTerm) .Where(e => ids.Contains(e.Id)) .ToList(); - results.Should().BeEquivalentTo(expected, - options => options); results.Should().BeEquivalentTo(expected, options => options.WithStrictOrdering()); } @@ -295,8 +291,6 @@ public async Task SecondaryOrder_Headword_CitationForm(string searchTerm) .Where(e => ids.Contains(e.Id)) .ToList(); - results.Should().BeEquivalentTo(expected, - options => options); results.Should().BeEquivalentTo(expected, options => options.WithStrictOrdering()); }