From 93b9a6ef952b1faab7be950bd1b62d9488c722a0 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Fri, 29 May 2026 13:42:46 +0200 Subject: [PATCH 1/7] Add non-trivial NFC/NFD check to catch ASCII test data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously NormalizationAssert only verified strings were in the expected form. ASCII satisfies both NFC and NFD, so a factory or test fixture using ASCII content silently bypassed the normalizer — the assertion passed without exercising any normalization work. New `requireNonTrivial` parameter asserts each string differs from its conversion to the opposite form, ensuring the input actually contains content the normalizer would have to act on. Applied to `AllNfcFactories_ProduceNfcData`, which now enforces non-triviality for every NfcTestData factory in one place. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Helpers/NormalizationAssert.cs | 53 ++++++++++++------- .../MiniLcm.Tests/WriteNormalizationTests.cs | 36 +++++++------ 2 files changed, 53 insertions(+), 36 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index a0b048464b..9d6b72ce29 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -22,25 +22,25 @@ public static class NormalizationAssert ], }; - public static void AssertAllNfc(object? obj) + public static void AssertAllNfc(object? obj, bool requireNonTrivial = false) { - Assert(obj, NormalizationForm.FormC); + Assert(obj, NormalizationForm.FormC, requireNonTrivial); } - public static void AssertAllNfd(object? obj) + public static void AssertAllNfd(object? obj, bool requireNonTrivial = false) { - Assert(obj, NormalizationForm.FormD); + Assert(obj, NormalizationForm.FormD, requireNonTrivial); } public static bool IsAllNfd(object obj) { - return FindIssues(obj, NormalizationForm.FormD).Count == 0; + return FindIssues(obj, NormalizationForm.FormD, requireNonTrivial: false).Count == 0; } - private static void Assert(object? obj, NormalizationForm form) + private static void Assert(object? obj, NormalizationForm form, bool requireNonTrivial) { if (obj is null) throw new Xunit.Sdk.XunitException("Expected object to be non-null but was null"); - var issues = FindIssues(obj, form); + var issues = FindIssues(obj, form, requireNonTrivial); if (issues.Count == 0) return; var name = FormName(form); throw new Xunit.Sdk.XunitException( @@ -49,46 +49,46 @@ private static void Assert(object? obj, NormalizationForm form) ); } - private static List FindIssues(object obj, NormalizationForm form) + private static List FindIssues(object obj, NormalizationForm form, bool requireNonTrivial) { var issues = new List(); - Visit(obj, "", form, issues); + Visit(obj, "", form, requireNonTrivial, issues); return issues; } - private static void Visit(object? obj, string path, NormalizationForm form, List issues) + private static void Visit(object? obj, string path, NormalizationForm form, bool requireNonTrivial, List issues) { switch (obj) { case null: return; case string s: - CheckString(s, path, form, issues); + CheckString(s, path, form, requireNonTrivial, issues); return; case MultiString ms: if (ms.Values.Count == 0) issues.Add($"{path}: MultiString has no values (must have at least one for testing)"); - foreach (var (key, value) in ms.Values) CheckString(value, $"{path}.Values[{key}]", form, issues); + foreach (var (key, value) in ms.Values) CheckString(value, $"{path}.Values[{key}]", form, requireNonTrivial, issues); return; case RichString rs: if (rs.Spans.Count == 0) issues.Add($"{path}: RichString has no spans (must have at least one for testing)"); - for (var i = 0; i < rs.Spans.Count; i++) CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", form, issues); + for (var i = 0; i < rs.Spans.Count; i++) CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", form, requireNonTrivial, issues); return; case RichMultiString rms: if (rms.Count == 0) issues.Add($"{path}: RichMultiString has no values (must have at least one for testing)"); - foreach (var (key, value) in rms) Visit(value, $"{path}[{key}]", form, issues); + foreach (var (key, value) in rms) Visit(value, $"{path}[{key}]", form, requireNonTrivial, issues); return; case IEnumerable seq: var i2 = 0; - foreach (var item in seq) Visit(item, $"{path}[{i2++}]", form, issues); + foreach (var item in seq) Visit(item, $"{path}[{i2++}]", form, requireNonTrivial, issues); return; default: break; } - VisitModelProperties(obj, path, form, issues); + VisitModelProperties(obj, path, form, requireNonTrivial, issues); } - private static void VisitModelProperties(object obj, string path, NormalizationForm form, List issues) + private static void VisitModelProperties(object obj, string path, NormalizationForm form, bool requireNonTrivial, List issues) { var type = obj.GetType(); if (type.Namespace?.StartsWith("MiniLcm.Models", StringComparison.Ordinal) != true) @@ -104,7 +104,7 @@ private static void VisitModelProperties(object obj, string path, NormalizationF if (value is null) continue; var propPath = string.IsNullOrEmpty(path) ? prop.Name : $"{path}.{prop.Name}"; - Visit(value, propPath, form, issues); + Visit(value, propPath, form, requireNonTrivial, issues); } } @@ -116,7 +116,7 @@ private static bool IsIgnoredType(Type type) underlying == typeof(DateTimeOffset) || underlying == typeof(decimal); } - private static void CheckString(string? value, string path, NormalizationForm form, List issues) + private static void CheckString(string? value, string path, NormalizationForm form, bool requireNonTrivial, List issues) { if (string.IsNullOrEmpty(value)) { @@ -127,6 +127,21 @@ private static void CheckString(string? value, string path, NormalizationForm fo { var name = FormName(form); issues.Add($"{path}: expected {name} but \"{value}\" is not {name}-normalized"); + return; + } + // Non-trivial check: the string must differ from its conversion to the OTHER form. + // If they're equal, the string is byte-identical in both NFC and NFD (e.g., pure ASCII), + // so it doesn't actually exercise normalization. This catches dead test coverage where + // a field gets ASCII content and silently bypasses the normalizer. + if (requireNonTrivial) + { + var otherForm = form == NormalizationForm.FormC ? NormalizationForm.FormD : NormalizationForm.FormC; + if (value == value.Normalize(otherForm)) + { + var name = FormName(form); + var otherName = FormName(otherForm); + issues.Add($"{path}: \"{value}\" is trivially {name} (identical to its {otherName} form); use content that actually exercises normalization"); + } } } diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index 78e7ed066d..ea64a6ac30 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -661,23 +661,25 @@ public async Task AddTranslation_NormalizesToNfd() public class NormalizationAssertTests { [Fact] - public void AllNfcFactories_ProduceNfcData() - { - AssertAllNfc(NfcTestData.CreateNfcWritingSystem()); - AssertAllNfc(NfcTestData.CreateNfcPartOfSpeech()); - AssertAllNfc(NfcTestData.CreateNfcPublication()); - AssertAllNfc(NfcTestData.CreateNfcSemanticDomain()); - AssertAllNfc(NfcTestData.CreateNfcComplexFormType()); - AssertAllNfc(NfcTestData.CreateNfcMorphType()); - AssertAllNfc(NfcTestData.CreateNfcTranslation()); - AssertAllNfc(NfcTestData.CreateNfcExampleSentence()); - AssertAllNfc(NfcTestData.CreateNfcExampleSentenceWithTranslations()); - AssertAllNfc(NfcTestData.CreateNfcSense()); - AssertAllNfc(NfcTestData.CreateNfcSenseWithExamples()); - AssertAllNfc(NfcTestData.CreateNfcComplexFormComponent()); - AssertAllNfc(NfcTestData.CreateNfcEntry()); - AssertAllNfc(NfcTestData.CreateNfcEntryWithSenses()); - AssertAllNfc(NfcTestData.CreateNfcEntryWithComponents()); + public void AllNfcFactories_ProduceNonTriviallyNfcData() + { + // requireNonTrivial: every string must differ from its NFD form. Catches ASCII-only test data, + // which would silently bypass the normalizer (ASCII is byte-identical in NFC and NFD). + AssertAllNfc(NfcTestData.CreateNfcWritingSystem(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcPartOfSpeech(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcPublication(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcSemanticDomain(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcComplexFormType(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcMorphType(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcTranslation(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcExampleSentence(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcExampleSentenceWithTranslations(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcSense(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcSenseWithExamples(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcComplexFormComponent(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcEntry(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcEntryWithSenses(), requireNonTrivial: true); + AssertAllNfc(NfcTestData.CreateNfcEntryWithComponents(), requireNonTrivial: true); } [Fact] From 484261b5d312aa2210fce1c0eae05a6b20d49368 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Fri, 29 May 2026 13:55:27 +0200 Subject: [PATCH 2/7] Assert input identity is preserved through normalization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add BeEquivalentTo(input) with an NFC≡NFD string comparer alongside the existing AssertAllNfd(captured). The pure form-check ignores primitives, enums, Guids, and dates, so a normalizer that drops a non-string field (the HomographNumber regression) passes silently — this commit closes that hole. - New `NormalizedStrings()` equivalency option treats NFC-equal-to-NFD as equal so the assertion accepts the normalizer's string changes while requiring everything else to match exactly. - New `AssertNormalized` helper combines AssertAllNfd + BeEquivalentTo. - NfcTestData factories now populate non-default values for non-string scalars (HomographNumber, Order, EntryId, etc.) so the identity assertion has something to compare. A default-valued field can't catch a normalizer that drops it. Verified by reintroducing the Sense.Order drop: previously 0 tests caught it; now 5 fail with `Expected property Senses[0].Order to be 1.5, but found 0`. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../MiniLcm.Tests/Helpers/NfcTestData.cs | 31 ++- .../Helpers/NormalizationAssert.cs | 56 +++++- .../MiniLcm.Tests/WriteNormalizationTests.cs | 189 ++++++++---------- 3 files changed, 151 insertions(+), 125 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs index 61144e6a4f..bd727c2293 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NfcTestData.cs @@ -50,7 +50,8 @@ public static PartOfSpeech CreateNfcPartOfSpeech() return new() { Id = Guid.NewGuid(), - Name = CreateNfcMultiString() + Name = CreateNfcMultiString(), + Predefined = true, }; } @@ -69,7 +70,8 @@ public static SemanticDomain CreateNfcSemanticDomain() { Id = Guid.NewGuid(), Code = Nfc, - Name = CreateNfcMultiString() + Name = CreateNfcMultiString(), + Predefined = true, }; } @@ -105,11 +107,17 @@ public static Translation CreateNfcTranslation() }; } + // Non-string scalars below are deliberately populated with non-default values so that + // identity-preservation assertions (e.g. BeEquivalentTo on the wrapper's output) actually + // exercise those fields. A default-valued field can't catch a normalizer that drops it. + public static ExampleSentence CreateNfcExampleSentence() { return new() { Id = Guid.NewGuid(), + Order = 2.5, + SenseId = Guid.NewGuid(), Sentence = CreateNfcRichMultiString(), Reference = CreateNfcRichString() }; @@ -120,6 +128,8 @@ public static ExampleSentence CreateNfcExampleSentenceWithTranslations() return new() { Id = Guid.NewGuid(), + Order = 2.5, + SenseId = Guid.NewGuid(), Sentence = CreateNfcRichMultiString(), Reference = CreateNfcRichString(), Translations = [CreateNfcTranslation(), CreateNfcTranslation()] @@ -131,6 +141,9 @@ public static Sense CreateNfcSense() return new() { Id = Guid.NewGuid(), + Order = 1.5, + EntryId = Guid.NewGuid(), + PartOfSpeechId = Guid.NewGuid(), Gloss = CreateNfcMultiString(), Definition = CreateNfcRichMultiString() }; @@ -138,13 +151,17 @@ public static Sense CreateNfcSense() public static Sense CreateNfcSenseWithExamples() { + var pos = CreateNfcPartOfSpeech(); return new() { Id = Guid.NewGuid(), + Order = 1.5, + EntryId = Guid.NewGuid(), Gloss = CreateNfcMultiString(), Definition = CreateNfcRichMultiString(), SemanticDomains = [CreateNfcSemanticDomain()], - PartOfSpeech = CreateNfcPartOfSpeech(), + PartOfSpeech = pos, + PartOfSpeechId = pos.Id, ExampleSentences = [CreateNfcExampleSentenceWithTranslations()] }; } @@ -154,8 +171,10 @@ public static ComplexFormComponent CreateNfcComplexFormComponent() return new() { Id = Guid.NewGuid(), + Order = 1.5, ComplexFormEntryId = Guid.NewGuid(), ComponentEntryId = Guid.NewGuid(), + ComponentSenseId = Guid.NewGuid(), ComplexFormHeadword = Nfc, ComponentHeadword = Nfc }; @@ -166,6 +185,8 @@ public static Entry CreateNfcEntry() return new() { Id = Guid.NewGuid(), + HomographNumber = 3, + MorphType = MorphTypeKind.Root, LexemeForm = CreateNfcMultiString(), CitationForm = CreateNfcMultiString(), LiteralMeaning = CreateNfcRichMultiString(), @@ -178,6 +199,8 @@ public static Entry CreateNfcEntryWithSenses() return new() { Id = Guid.NewGuid(), + HomographNumber = 3, + MorphType = MorphTypeKind.Root, LexemeForm = CreateNfcMultiString(), CitationForm = CreateNfcMultiString(), LiteralMeaning = CreateNfcRichMultiString(), @@ -191,6 +214,8 @@ public static Entry CreateNfcEntryWithComponents() return new() { Id = Guid.NewGuid(), + HomographNumber = 3, + MorphType = MorphTypeKind.Root, LexemeForm = CreateNfcMultiString(), CitationForm = CreateNfcMultiString(), LiteralMeaning = CreateNfcRichMultiString(), diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index 9d6b72ce29..e8eb73eca1 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -1,9 +1,34 @@ using System.Collections; using System.Reflection; using System.Text; +using FluentAssertions.Equivalency; namespace MiniLcm.Tests.Helpers; +public static class NormalizationEquivalency +{ + /// + /// Configure BeEquivalentTo so strings compare equal modulo NFC/NFD normalization, + /// catching non-string fields the wrapper drops (HomographNumber, Order, etc.) + /// that the form-only check ignores. + /// + public static EquivalencyOptions NormalizedStrings(this EquivalencyOptions options) + { + return options + .Using(ctx => + { + if (ctx.Subject is null || ctx.Expectation is null) + { + ctx.Subject.Should().Be(ctx.Expectation); + return; + } + ctx.Subject.Normalize(NormalizationForm.FormD) + .Should().Be(ctx.Expectation.Normalize(NormalizationForm.FormD)); + }) + .WhenTypeIs(); + } +} + /// /// For verifying that every string-bearing property of an object is normalized (NFC or NFD). /// @@ -22,22 +47,35 @@ public static class NormalizationAssert ], }; - public static void AssertAllNfc(object? obj, bool requireNonTrivial = false) + public static void AssertAllDecomposed(object? obj) { - Assert(obj, NormalizationForm.FormC, requireNonTrivial); + Assert(obj, NormalizationForm.FormD, requireNonTrivial: false); } - public static void AssertAllNfd(object? obj, bool requireNonTrivial = false) + /// + /// Strict NFC plus a non-triviality check: every string must differ from its NFD form. + /// Catches ASCII-only test data, which is byte-identical in NFC and NFD and would + /// silently bypass the normalizer. + /// + public static void AssertAllDecomposable(object? obj) { - Assert(obj, NormalizationForm.FormD, requireNonTrivial); + Assert(obj, NormalizationForm.FormC, requireNonTrivial: true); } - public static bool IsAllNfd(object obj) + /// + /// For verifying the output of the write-normalization wrapper: + /// asserts every string is NFD AND that no non-string field was dropped or mutated + /// (BeEquivalentTo on the input, with NFC≡NFD string equivalence). + /// Catches the field-drop regression class that pure string-form checks ignore. + /// + public static void AssertNormalized(T? captured, T input) where T : class { - return FindIssues(obj, NormalizationForm.FormD, requireNonTrivial: false).Count == 0; + captured.Should().NotBeNull(); + AssertAllDecomposed(captured); + captured.Should().BeEquivalentTo(input, opts => opts.NormalizedStrings()); } - private static void Assert(object? obj, NormalizationForm form, bool requireNonTrivial) +private static void Assert(object? obj, NormalizationForm form, bool requireNonTrivial) { if (obj is null) throw new Xunit.Sdk.XunitException("Expected object to be non-null but was null"); var issues = FindIssues(obj, form, requireNonTrivial); @@ -129,10 +167,6 @@ private static void CheckString(string? value, string path, NormalizationForm fo issues.Add($"{path}: expected {name} but \"{value}\" is not {name}-normalized"); return; } - // Non-trivial check: the string must differ from its conversion to the OTHER form. - // If they're equal, the string is byte-identical in both NFC and NFD (e.g., pure ASCII), - // so it doesn't actually exercise normalization. This catches dead test coverage where - // a field gets ASCII content and silently bypasses the normalizer. if (requireNonTrivial) { var otherForm = form == NormalizationForm.FormC ? NormalizationForm.FormD : NormalizationForm.FormC; diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index ea64a6ac30..5402ffc014 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -84,8 +84,7 @@ public async Task CreatePartOfSpeech_NormalizesToNfd() await _normalizingApi.CreatePartOfSpeech(pos); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, pos); } [Fact] @@ -103,8 +102,8 @@ public async Task UpdatePartOfSpeech_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdatePartOfSpeech(before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } #endregion @@ -124,8 +123,7 @@ public async Task CreatePublication_NormalizesToNfd() await _normalizingApi.CreatePublication(pub); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, pub); } [Fact] @@ -143,8 +141,8 @@ public async Task UpdatePublication_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdatePublication(before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } #endregion @@ -164,8 +162,7 @@ public async Task CreateSemanticDomain_NormalizesToNfd() await _normalizingApi.CreateSemanticDomain(sd); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, sd); } [Fact] @@ -183,8 +180,8 @@ public async Task UpdateSemanticDomain_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateSemanticDomain(before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } [Fact] @@ -200,8 +197,7 @@ public async Task AddSemanticDomainToSense_NormalizesToNfd() await _normalizingApi.AddSemanticDomainToSense(Guid.NewGuid(), sd); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, sd); } [Fact] @@ -219,8 +215,9 @@ public async Task BulkImportSemanticDomains_NormalizesToNfd() await _normalizingApi.BulkImportSemanticDomains(domains.ToAsyncEnumerable()); - captured.Should().HaveCount(2); - AssertAllNfd(captured); + captured.Should().HaveCount(domains.Length); + AssertAllDecomposed(captured); + captured.Should().BeEquivalentTo(domains, opts => opts.NormalizedStrings().WithStrictOrdering()); } #endregion @@ -240,8 +237,7 @@ public async Task CreateComplexFormType_NormalizesToNfd() await _normalizingApi.CreateComplexFormType(cft); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, cft); } [Fact] @@ -259,8 +255,8 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateComplexFormType(before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } #endregion @@ -282,8 +278,8 @@ public async Task UpdateMorphType_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateMorphType(before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } [Fact] @@ -304,7 +300,7 @@ public async Task UpdateMorphType_JsonPatch_NormalizesToNfd() captured.Should().NotBeNull(); var byPath = captured.Patch.Operations.ToDictionary(o => o.Path!, o => o.Value); - AssertAllNfd(byPath["/Name"].Should().BeOfType().Subject); + AssertAllDecomposed(byPath["/Name"].Should().BeOfType().Subject); byPath["/Prefix"].Should().Be(NfcTestData.Nfd); byPath["/Postfix"].Should().Be(NfcTestData.Nfd); } @@ -326,8 +322,7 @@ public async Task CreateEntry_NormalizesToNfd() await _normalizingApi.CreateEntry(entry); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, entry); } [Fact] @@ -345,8 +340,8 @@ public async Task UpdateEntry_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateEntry(before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } [Fact] @@ -362,8 +357,7 @@ public async Task CreateEntry_WithNestedSenses_NormalizesToNfd() await _normalizingApi.CreateEntry(entry); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, entry); } [Fact] @@ -379,8 +373,7 @@ public async Task CreateEntry_WithComplexFormComponents_NormalizesToNfd() await _normalizingApi.CreateEntry(entry); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, entry); } [Fact] @@ -398,8 +391,9 @@ public async Task BulkCreateEntries_NormalizesToNfd() await _normalizingApi.BulkCreateEntries(entries.ToAsyncEnumerable()); - captured.Should().HaveCount(2); - AssertAllNfd(captured); + captured.Should().HaveCount(entries.Length); + AssertAllDecomposed(captured); + captured.Should().BeEquivalentTo(entries, opts => opts.NormalizedStrings().WithStrictOrdering()); } #endregion @@ -425,7 +419,7 @@ public async Task UpdateEntry_JsonPatch_NormalizesValuesToNfd() captured.Should().NotBeNull(); captured.Should().NotBeSameAs(update); // rebuilt because Entry path normalizes - AssertAllPatchValuesNfd(captured); + AssertAllPatchValuesDecomposed(captured); } [Fact] @@ -475,16 +469,16 @@ public async Task UpdateWritingSystem_JsonPatch_DoesNotNormalize() } /// - /// Asserts every non-null operation value in the patch is NFD. Delegates to - /// , which already understands string, + /// Asserts every non-null operation value in the patch is decomposed. Delegates to + /// , which already understands string, /// string[], MultiString, RichString, and RichMultiString — so failures report a property path. /// - private static void AssertAllPatchValuesNfd(UpdateObjectInput update) where T : class + private static void AssertAllPatchValuesDecomposed(UpdateObjectInput update) where T : class { update.Patch.Operations.Should().NotBeEmpty(); foreach (var op in update.Patch.Operations) { - if (op.Value is not null) AssertAllNfd(op.Value); + if (op.Value is not null) AssertAllDecomposed(op.Value); } } @@ -506,8 +500,7 @@ public async Task CreateComplexFormComponent_NormalizesToNfd() await _normalizingApi.CreateComplexFormComponent(cfc); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, cfc); } #endregion @@ -527,8 +520,7 @@ public async Task CreateSense_NormalizesToNfd() await _normalizingApi.CreateSense(Guid.NewGuid(), sense); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, sense); } [Fact] @@ -547,8 +539,8 @@ public async Task UpdateSense_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateSense(entryId, before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } [Fact] @@ -564,8 +556,7 @@ public async Task CreateSense_WithNestedExampleSentences_NormalizesToNfd() await _normalizingApi.CreateSense(Guid.NewGuid(), sense); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, sense); } #endregion @@ -586,8 +577,7 @@ public async Task CreateExampleSentence_NormalizesToNfd() await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, example); } [Fact] @@ -610,8 +600,8 @@ public async Task UpdateExampleSentence_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateExampleSentence(entryId, senseId, before, after); - AssertAllNfd(capturedBefore); - AssertAllNfd(capturedAfter); + AssertNormalized(capturedBefore, before); + AssertNormalized(capturedAfter, after); } [Fact] @@ -628,8 +618,7 @@ public async Task CreateExampleSentence_WithTranslations_NormalizesToNfd() await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, example); } #endregion @@ -650,8 +639,7 @@ public async Task AddTranslation_NormalizesToNfd() await _normalizingApi.AddTranslation(Guid.NewGuid(), Guid.NewGuid(), Guid.NewGuid(), translation); - captured.Should().NotBeNull(); - AssertAllNfd(captured); + AssertNormalized(captured, translation); } #endregion @@ -661,52 +649,27 @@ public async Task AddTranslation_NormalizesToNfd() public class NormalizationAssertTests { [Fact] - public void AllNfcFactories_ProduceNonTriviallyNfcData() - { - // requireNonTrivial: every string must differ from its NFD form. Catches ASCII-only test data, - // which would silently bypass the normalizer (ASCII is byte-identical in NFC and NFD). - AssertAllNfc(NfcTestData.CreateNfcWritingSystem(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcPartOfSpeech(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcPublication(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcSemanticDomain(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcComplexFormType(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcMorphType(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcTranslation(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcExampleSentence(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcExampleSentenceWithTranslations(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcSense(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcSenseWithExamples(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcComplexFormComponent(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcEntry(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcEntryWithSenses(), requireNonTrivial: true); - AssertAllNfc(NfcTestData.CreateNfcEntryWithComponents(), requireNonTrivial: true); - } - - [Fact] - public void AssertAllNfc_WithNfcData_DoesNotThrow() - { - AssertAllNfc(NfcTestData.CreateNfcEntry()); - } - - [Fact] - public void AssertAllNfc_WithNfdData_Throws() + public void AllNfcFactories_ProduceDecomposableData() { - var entry = new Entry - { - Id = Guid.NewGuid(), - LexemeForm = new MultiString { Values = { { "en", NfcTestData.Nfd } } }, // NFD should fail - CitationForm = new MultiString { Values = { { "en", NfcTestData.Nfc } } }, - LiteralMeaning = new RichMultiString { { "en", new RichString(NfcTestData.Nfc) } }, - Note = new RichMultiString { { "en", new RichString(NfcTestData.Nfc) } } - }; - - var act = () => AssertAllNfc(entry); - - act.Should().Throw().WithMessage("*NFC*"); + AssertAllDecomposable(NfcTestData.CreateNfcWritingSystem()); + AssertAllDecomposable(NfcTestData.CreateNfcPartOfSpeech()); + AssertAllDecomposable(NfcTestData.CreateNfcPublication()); + AssertAllDecomposable(NfcTestData.CreateNfcSemanticDomain()); + AssertAllDecomposable(NfcTestData.CreateNfcComplexFormType()); + AssertAllDecomposable(NfcTestData.CreateNfcMorphType()); + AssertAllDecomposable(NfcTestData.CreateNfcTranslation()); + AssertAllDecomposable(NfcTestData.CreateNfcExampleSentence()); + AssertAllDecomposable(NfcTestData.CreateNfcExampleSentenceWithTranslations()); + AssertAllDecomposable(NfcTestData.CreateNfcSense()); + AssertAllDecomposable(NfcTestData.CreateNfcSenseWithExamples()); + AssertAllDecomposable(NfcTestData.CreateNfcComplexFormComponent()); + AssertAllDecomposable(NfcTestData.CreateNfcEntry()); + AssertAllDecomposable(NfcTestData.CreateNfcEntryWithSenses()); + AssertAllDecomposable(NfcTestData.CreateNfcEntryWithComponents()); } [Fact] - public void AssertAllNfd_WithNfdData_DoesNotThrow() + public void AssertAllDecomposed_WithDecomposedData_DoesNotThrow() { var entry = new Entry { @@ -717,19 +680,19 @@ public void AssertAllNfd_WithNfdData_DoesNotThrow() Note = new RichMultiString { { "en", new RichString(NfcTestData.Nfd) } } }; - AssertAllNfd(entry); + AssertAllDecomposed(entry); } [Fact] - public void AssertAllNfd_WithNfcData_Throws() + public void AssertAllDecomposed_WithComposedData_Throws() { - var act = () => AssertAllNfd(NfcTestData.CreateNfcEntry()); + var act = () => AssertAllDecomposed(NfcTestData.CreateNfcEntry()); act.Should().Throw().WithMessage("*NFD*"); } [Fact] - public void AssertAllNfc_WithEmptyMultiString_Throws() + public void AssertAllDecomposable_WithEmptyMultiString_Throws() { var entry = new Entry { @@ -738,13 +701,13 @@ public void AssertAllNfc_WithEmptyMultiString_Throws() CitationForm = NfcTestData.CreateNfcMultiString() }; - var act = () => AssertAllNfc(entry); + var act = () => AssertAllDecomposable(entry); act.Should().Throw().WithMessage("*no values*"); } [Fact] - public void AssertAllNfc_WithNestedNfdData_Throws() + public void AssertAllDecomposable_WithNestedNfdData_Throws() { var entry = new Entry { @@ -764,22 +727,26 @@ public void AssertAllNfc_WithNestedNfdData_Throws() ] }; - var act = () => AssertAllNfc(entry); + var act = () => AssertAllDecomposable(entry); act.Should().Throw().WithMessage("*Senses*Gloss*"); } [Fact] - public void IsAllNfd_WithNfdData_ReturnsTrue() + public void AssertAllDecomposable_WithAsciiData_Throws() { - var multiString = new MultiString { Values = { { "en", NfcTestData.Nfd } } }; + var entry = new Entry + { + Id = Guid.NewGuid(), + LexemeForm = new MultiString { Values = { { "en", "naive" } } }, // ASCII -> trivially NFC + CitationForm = NfcTestData.CreateNfcMultiString(), + LiteralMeaning = NfcTestData.CreateNfcRichMultiString(), + Note = NfcTestData.CreateNfcRichMultiString() + }; - IsAllNfd(multiString).Should().BeTrue(); - } + var act = () => AssertAllDecomposable(entry); - [Fact] - public void IsAllNfd_WithNfcData_ReturnsFalse() - { - IsAllNfd(NfcTestData.CreateNfcMultiString()).Should().BeFalse(); + act.Should().Throw().WithMessage("*trivially*"); } + } From 5a0409721b99b86719d9ed739af6e30535c66c0d Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Mon, 1 Jun 2026 15:35:33 +0200 Subject: [PATCH 3/7] Normalize CRDT writes via Copy() so dropped fields are impossible The write-normalization wrapper rebuilt each model with object initializers, duplicating the field list, so a field added to a model later but missed here would be silently reset to default on write. The seven non-record normalizers now copy-then-overwrite text (records already get this from with-expressions); non-text fields, including future ones, ride along via the per-type Copy() method, whose completeness is enforced by EntityCopyMethodTests. Behaviour is unchanged for every current field. Also make Publication.Copy() return Publication instead of IObjectWithId (it implemented only the non-generic IPossibility), dropping two now-redundant casts. Co-Authored-By: Claude Opus 4.8 (1M context) --- backend/FwLite/MiniLcm/Models/Entry.cs | 2 +- backend/FwLite/MiniLcm/Models/Publication.cs | 4 +- .../MiniLcmApiWriteNormalizationWrapper.cs | 96 +++++++------------ 3 files changed, 38 insertions(+), 64 deletions(-) diff --git a/backend/FwLite/MiniLcm/Models/Entry.cs b/backend/FwLite/MiniLcm/Models/Entry.cs index 81cbe54dc2..f4ce83daa6 100644 --- a/backend/FwLite/MiniLcm/Models/Entry.cs +++ b/backend/FwLite/MiniLcm/Models/Entry.cs @@ -78,7 +78,7 @@ public Entry Copy() [ ..ComplexFormTypes.Select(cft => cft.Copy()) ], - PublishIn = [ ..PublishIn.Select(p => (Publication)p.Copy())] + PublishIn = [ ..PublishIn.Select(p => p.Copy())] }; } diff --git a/backend/FwLite/MiniLcm/Models/Publication.cs b/backend/FwLite/MiniLcm/Models/Publication.cs index b13545622d..5f7d5db9b3 100644 --- a/backend/FwLite/MiniLcm/Models/Publication.cs +++ b/backend/FwLite/MiniLcm/Models/Publication.cs @@ -1,6 +1,6 @@ namespace MiniLcm.Models; -public class Publication : IPossibility +public class Publication : IPossibility, IObjectWithId { public required Guid Id { get; set; } public DateTimeOffset? DeletedAt { get; set; } @@ -14,7 +14,7 @@ public void RemoveReference(Guid id, DateTimeOffset time) return; } - public IObjectWithId Copy() + public Publication Copy() { return new Publication() { diff --git a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs index 27a368fb30..aa9f030172 100644 --- a/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs +++ b/backend/FwLite/MiniLcm/Normalization/MiniLcmApiWriteNormalizationWrapper.cs @@ -34,6 +34,9 @@ public IMiniLcmApi Create(IMiniLcmApi api) /// Non-normalized data that is already persisted (before this wrapper was introduced) will be normalized by LibLcm. That's not something we want to fix here. /// - Properties that liblcm/FieldWorks does not NFD-normalize are passed through unchanged. /// Currently only WritingSystem properties. +/// - Each normalizer starts from the model's own Copy() (or `with` for records) and overwrites only the +/// text-bearing fields, so non-text fields — including any added later — are preserved automatically rather +/// than re-listed here. Copy() completeness is enforced by LcmCrdt.Tests.EntityCopyMethodTests. /// - JsonPatch overloads normalize string-ish values best-effort (string, RichString, MultiString, RichMultiString). /// JsonElement values are only normalized when they are simple strings; complex JSON values are left as-is /// to avoid guessing the target type. @@ -99,13 +102,9 @@ public Task DeletePartOfSpeech(Guid id) private static PartOfSpeech NormalizePartOfSpeech(PartOfSpeech pos) { - return new PartOfSpeech - { - Id = pos.Id, - Name = StringNormalizer.Normalize(pos.Name), - DeletedAt = pos.DeletedAt, - Predefined = pos.Predefined - }; + var copy = pos.Copy(); + copy.Name = StringNormalizer.Normalize(pos.Name); + return copy; } #endregion @@ -135,12 +134,9 @@ public Task DeletePublication(Guid id) private static Publication NormalizePublication(Publication pub) { - return new Publication - { - Id = pub.Id, - Name = StringNormalizer.Normalize(pub.Name), - DeletedAt = pub.DeletedAt - }; + var copy = pub.Copy(); + copy.Name = StringNormalizer.Normalize(pub.Name); + return copy; } #endregion @@ -170,14 +166,10 @@ public Task DeleteSemanticDomain(Guid id) private static SemanticDomain NormalizeSemanticDomain(SemanticDomain sd) { - return new SemanticDomain - { - Id = sd.Id, - Name = StringNormalizer.Normalize(sd.Name), - Code = StringNormalizer.Normalize(sd.Code), // yes, LibLcm normalizes this too - DeletedAt = sd.DeletedAt, - Predefined = sd.Predefined - }; + var copy = sd.Copy(); + copy.Name = StringNormalizer.Normalize(sd.Name); + copy.Code = StringNormalizer.Normalize(sd.Code); // yes, LibLcm normalizes this too + return copy; } #endregion @@ -230,18 +222,13 @@ public async Task UpdateMorphType(MorphType before, MorphType after, private static MorphType NormalizeMorphType(MorphType mt) { - return new MorphType - { - Id = mt.Id, - Kind = mt.Kind, - Name = StringNormalizer.Normalize(mt.Name), - Abbreviation = StringNormalizer.Normalize(mt.Abbreviation), - Description = StringNormalizer.Normalize(mt.Description), - Prefix = StringNormalizer.Normalize(mt.Prefix), - Postfix = StringNormalizer.Normalize(mt.Postfix), - SecondaryOrder = mt.SecondaryOrder, - DeletedAt = mt.DeletedAt - }; + var copy = mt.Copy(); + copy.Name = StringNormalizer.Normalize(mt.Name); + copy.Abbreviation = StringNormalizer.Normalize(mt.Abbreviation); + copy.Description = StringNormalizer.Normalize(mt.Description); + copy.Prefix = StringNormalizer.Normalize(mt.Prefix); + copy.Postfix = StringNormalizer.Normalize(mt.Postfix); + return copy; } #endregion @@ -374,19 +361,13 @@ public Task SetSensePartOfSpeech(Guid senseId, Guid? partOfSpeechId) private static Sense NormalizeSense(Sense sense) { - return new Sense - { - Id = sense.Id, - Order = sense.Order, - DeletedAt = sense.DeletedAt, - EntryId = sense.EntryId, - Definition = StringNormalizer.Normalize(sense.Definition), - Gloss = StringNormalizer.Normalize(sense.Gloss), - PartOfSpeech = sense.PartOfSpeech is not null ? NormalizePartOfSpeech(sense.PartOfSpeech) : null, - PartOfSpeechId = sense.PartOfSpeechId, - SemanticDomains = [.. sense.SemanticDomains.Select(NormalizeSemanticDomain)], - ExampleSentences = [.. sense.ExampleSentences.Select(NormalizeExampleSentence)] - }; + var copy = sense.Copy(); + copy.Definition = StringNormalizer.Normalize(sense.Definition); + copy.Gloss = StringNormalizer.Normalize(sense.Gloss); + copy.PartOfSpeech = sense.PartOfSpeech is not null ? NormalizePartOfSpeech(sense.PartOfSpeech) : null; + copy.SemanticDomains = [.. sense.SemanticDomains.Select(NormalizeSemanticDomain)]; + copy.ExampleSentences = [.. sense.ExampleSentences.Select(NormalizeExampleSentence)]; + return copy; } #endregion @@ -437,25 +418,18 @@ public Task UpdateTranslation(Guid entryId, Guid senseId, Guid exampleSentenceId private static ExampleSentence NormalizeExampleSentence(ExampleSentence example) { - return new ExampleSentence - { - Id = example.Id, - Order = example.Order, - DeletedAt = example.DeletedAt, - SenseId = example.SenseId, - Sentence = StringNormalizer.Normalize(example.Sentence), - Translations = [.. example.Translations.Select(NormalizeTranslation)], - Reference = StringNormalizer.Normalize(example.Reference) - }; + var copy = example.Copy(); + copy.Sentence = StringNormalizer.Normalize(example.Sentence); + copy.Translations = [.. example.Translations.Select(NormalizeTranslation)]; + copy.Reference = StringNormalizer.Normalize(example.Reference); + return copy; } private static Translation NormalizeTranslation(Translation translation) { - return new Translation - { - Id = translation.Id, - Text = StringNormalizer.Normalize(translation.Text) - }; + var copy = translation.Copy(); + copy.Text = StringNormalizer.Normalize(translation.Text); + return copy; } #endregion From f91c0a8bb1e2adf79ef7de23bf5bd6a9455f8c62 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Mon, 1 Jun 2026 15:35:39 +0200 Subject: [PATCH 4/7] Harden normalization test assertions Remove a vacuous AssertAllDecomposable(WritingSystem) call (all its WS string fields are skipped, so it asserted nothing); isolate the empty-MultiString negative test so it cannot pass on an unrelated "no values" message; and replace the NormalizationAssert (form, requireNonTrivial) flag pair, two params encoding one bit threaded through five methods, with a single named leaf-check delegate. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../Helpers/NormalizationAssert.cs | 70 +++++++++---------- .../MiniLcm.Tests/WriteNormalizationTests.cs | 13 ++-- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index e8eb73eca1..3950247bbc 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -49,7 +49,7 @@ public static class NormalizationAssert public static void AssertAllDecomposed(object? obj) { - Assert(obj, NormalizationForm.FormD, requireNonTrivial: false); + Assert(obj, "NFD", CheckNfd); } /// @@ -59,7 +59,7 @@ public static void AssertAllDecomposed(object? obj) /// public static void AssertAllDecomposable(object? obj) { - Assert(obj, NormalizationForm.FormC, requireNonTrivial: true); + Assert(obj, "decomposable NFC", CheckDecomposableNfc); } /// @@ -75,58 +75,57 @@ public static void AssertNormalized(T? captured, T input) where T : class captured.Should().BeEquivalentTo(input, opts => opts.NormalizedStrings()); } -private static void Assert(object? obj, NormalizationForm form, bool requireNonTrivial) + private static void Assert(object? obj, string description, Action> check) { if (obj is null) throw new Xunit.Sdk.XunitException("Expected object to be non-null but was null"); - var issues = FindIssues(obj, form, requireNonTrivial); + var issues = FindIssues(obj, check); if (issues.Count == 0) return; - var name = FormName(form); throw new Xunit.Sdk.XunitException( - $"Expected all normalizable properties to contain {name} strings, but found issues:\n" + + $"Expected all normalizable properties to contain {description} strings, but found issues:\n" + string.Join("\n", issues.Select(i => " - " + i)) ); } - private static List FindIssues(object obj, NormalizationForm form, bool requireNonTrivial) + private static List FindIssues(object obj, Action> check) { var issues = new List(); - Visit(obj, "", form, requireNonTrivial, issues); + Visit(obj, "", check, issues); return issues; } - private static void Visit(object? obj, string path, NormalizationForm form, bool requireNonTrivial, List issues) + private static void Visit(object? obj, string path, Action> check, List issues) { switch (obj) { case null: return; case string s: - CheckString(s, path, form, requireNonTrivial, issues); + CheckString(s, path, check, issues); return; case MultiString ms: if (ms.Values.Count == 0) issues.Add($"{path}: MultiString has no values (must have at least one for testing)"); - foreach (var (key, value) in ms.Values) CheckString(value, $"{path}.Values[{key}]", form, requireNonTrivial, issues); + foreach (var (key, value) in ms.Values) CheckString(value, $"{path}.Values[{key}]", check, issues); return; case RichString rs: if (rs.Spans.Count == 0) issues.Add($"{path}: RichString has no spans (must have at least one for testing)"); - for (var i = 0; i < rs.Spans.Count; i++) CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", form, requireNonTrivial, issues); + for (var i = 0; i < rs.Spans.Count; i++) CheckString(rs.Spans[i].Text, $"{path}.Spans[{i}].Text", check, issues); return; case RichMultiString rms: if (rms.Count == 0) issues.Add($"{path}: RichMultiString has no values (must have at least one for testing)"); - foreach (var (key, value) in rms) Visit(value, $"{path}[{key}]", form, requireNonTrivial, issues); + foreach (var (key, value) in rms) Visit(value, $"{path}[{key}]", check, issues); return; case IEnumerable seq: var i2 = 0; - foreach (var item in seq) Visit(item, $"{path}[{i2++}]", form, requireNonTrivial, issues); + foreach (var item in seq) Visit(item, $"{path}[{i2++}]", check, issues); return; default: break; } - VisitModelProperties(obj, path, form, requireNonTrivial, issues); + VisitModelProperties(obj, path, check, issues); } - private static void VisitModelProperties(object obj, string path, NormalizationForm form, bool requireNonTrivial, List issues) + private static void VisitModelProperties(object obj, string path, Action> check, List issues) { var type = obj.GetType(); if (type.Namespace?.StartsWith("MiniLcm.Models", StringComparison.Ordinal) != true) @@ -142,7 +141,7 @@ private static void VisitModelProperties(object obj, string path, NormalizationF if (value is null) continue; var propPath = string.IsNullOrEmpty(path) ? prop.Name : $"{path}.{prop.Name}"; - Visit(value, propPath, form, requireNonTrivial, issues); + Visit(value, propPath, check, issues); } } @@ -154,33 +153,32 @@ private static bool IsIgnoredType(Type type) underlying == typeof(DateTimeOffset) || underlying == typeof(decimal); } - private static void CheckString(string? value, string path, NormalizationForm form, bool requireNonTrivial, List issues) + private static void CheckString(string? value, string path, Action> check, List issues) { if (string.IsNullOrEmpty(value)) { issues.Add($"{path}: string is null or empty (must have a value for testing)"); return; } - if (!value.IsNormalized(form)) - { - var name = FormName(form); - issues.Add($"{path}: expected {name} but \"{value}\" is not {name}-normalized"); - return; - } - if (requireNonTrivial) - { - var otherForm = form == NormalizationForm.FormC ? NormalizationForm.FormD : NormalizationForm.FormC; - if (value == value.Normalize(otherForm)) - { - var name = FormName(form); - var otherName = FormName(otherForm); - issues.Add($"{path}: \"{value}\" is trivially {name} (identical to its {otherName} form); use content that actually exercises normalization"); - } - } + check(value, path, issues); } - private static string FormName(NormalizationForm form) + // Per-form leaf checks. AssertAllDecomposed verifies wrapper OUTPUT is NFD; AssertAllDecomposable verifies + // INPUT test data is NFC AND actually decomposes (rejecting ASCII that would silently no-op the normalizer). + private static void CheckNfd(string value, string path, List issues) { - return form == NormalizationForm.FormC ? "NFC" : "NFD"; + if (!value.IsNormalized(NormalizationForm.FormD)) + issues.Add($"{path}: expected NFD but \"{value}\" is not NFD-normalized"); + } + + private static void CheckDecomposableNfc(string value, string path, List issues) + { + if (!value.IsNormalized(NormalizationForm.FormC)) + { + issues.Add($"{path}: expected NFC but \"{value}\" is not NFC-normalized"); + return; + } + if (value == value.Normalize(NormalizationForm.FormD)) + issues.Add($"{path}: \"{value}\" is trivially NFC (identical to its NFD form); use content that actually exercises normalization"); } } diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index 5402ffc014..853a699f41 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -651,7 +651,8 @@ public class NormalizationAssertTests [Fact] public void AllNfcFactories_ProduceDecomposableData() { - AssertAllDecomposable(NfcTestData.CreateNfcWritingSystem()); + // WritingSystem is intentionally omitted: its string fields are not normalized + // (see NormalizationAssert.SkippedProperties), so AssertAllDecomposable would assert nothing. AssertAllDecomposable(NfcTestData.CreateNfcPartOfSpeech()); AssertAllDecomposable(NfcTestData.CreateNfcPublication()); AssertAllDecomposable(NfcTestData.CreateNfcSemanticDomain()); @@ -697,13 +698,17 @@ public void AssertAllDecomposable_WithEmptyMultiString_Throws() var entry = new Entry { Id = Guid.NewGuid(), - LexemeForm = [], // Empty - should fail - CitationForm = NfcTestData.CreateNfcMultiString() + LexemeForm = [], // Empty MultiString — the only expected issue + CitationForm = NfcTestData.CreateNfcMultiString(), + LiteralMeaning = NfcTestData.CreateNfcRichMultiString(), + Note = NfcTestData.CreateNfcRichMultiString() }; var act = () => AssertAllDecomposable(entry); - act.Should().Throw().WithMessage("*no values*"); + // Scope to LexemeForm so this exercises the empty-MultiString check specifically; + // an empty RichMultiString would also report "no values". + act.Should().Throw().WithMessage("*LexemeForm*no values*"); } [Fact] From bd4142e54e76399d36305cf07d791229e6580ac4 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Mon, 1 Jun 2026 17:35:47 +0200 Subject: [PATCH 5/7] Polish normalization test helper per review Comments now describe the guard as catching data byte-identical in NFC and NFD (ASCII being one example), not ASCII specifically. Rename AssertNormalized to AssertNormalizedToNfd since it is NFD-specific. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../Helpers/NormalizationAssert.cs | 6 +- .../MiniLcm.Tests/WriteNormalizationTests.cs | 60 +++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index 3950247bbc..1b85d0121e 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -54,7 +54,7 @@ public static void AssertAllDecomposed(object? obj) /// /// Strict NFC plus a non-triviality check: every string must differ from its NFD form. - /// Catches ASCII-only test data, which is byte-identical in NFC and NFD and would + /// Catches test data, which is byte-identical in NFC and NFD (e.g. ASCII) and would /// silently bypass the normalizer. /// public static void AssertAllDecomposable(object? obj) @@ -68,7 +68,7 @@ public static void AssertAllDecomposable(object? obj) /// (BeEquivalentTo on the input, with NFC≡NFD string equivalence). /// Catches the field-drop regression class that pure string-form checks ignore. /// - public static void AssertNormalized(T? captured, T input) where T : class + public static void AssertNormalizedToNfd(T? captured, T input) where T : class { captured.Should().NotBeNull(); AssertAllDecomposed(captured); @@ -164,7 +164,7 @@ private static void CheckString(string? value, string path, Action issues) { if (!value.IsNormalized(NormalizationForm.FormD)) diff --git a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs index 853a699f41..0b76444495 100644 --- a/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs +++ b/backend/FwLite/MiniLcm.Tests/WriteNormalizationTests.cs @@ -84,7 +84,7 @@ public async Task CreatePartOfSpeech_NormalizesToNfd() await _normalizingApi.CreatePartOfSpeech(pos); - AssertNormalized(captured, pos); + AssertNormalizedToNfd(captured, pos); } [Fact] @@ -102,8 +102,8 @@ public async Task UpdatePartOfSpeech_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdatePartOfSpeech(before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } #endregion @@ -123,7 +123,7 @@ public async Task CreatePublication_NormalizesToNfd() await _normalizingApi.CreatePublication(pub); - AssertNormalized(captured, pub); + AssertNormalizedToNfd(captured, pub); } [Fact] @@ -141,8 +141,8 @@ public async Task UpdatePublication_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdatePublication(before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } #endregion @@ -162,7 +162,7 @@ public async Task CreateSemanticDomain_NormalizesToNfd() await _normalizingApi.CreateSemanticDomain(sd); - AssertNormalized(captured, sd); + AssertNormalizedToNfd(captured, sd); } [Fact] @@ -180,8 +180,8 @@ public async Task UpdateSemanticDomain_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateSemanticDomain(before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } [Fact] @@ -197,7 +197,7 @@ public async Task AddSemanticDomainToSense_NormalizesToNfd() await _normalizingApi.AddSemanticDomainToSense(Guid.NewGuid(), sd); - AssertNormalized(captured, sd); + AssertNormalizedToNfd(captured, sd); } [Fact] @@ -237,7 +237,7 @@ public async Task CreateComplexFormType_NormalizesToNfd() await _normalizingApi.CreateComplexFormType(cft); - AssertNormalized(captured, cft); + AssertNormalizedToNfd(captured, cft); } [Fact] @@ -255,8 +255,8 @@ public async Task UpdateComplexFormType_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateComplexFormType(before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } #endregion @@ -278,8 +278,8 @@ public async Task UpdateMorphType_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateMorphType(before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } [Fact] @@ -322,7 +322,7 @@ public async Task CreateEntry_NormalizesToNfd() await _normalizingApi.CreateEntry(entry); - AssertNormalized(captured, entry); + AssertNormalizedToNfd(captured, entry); } [Fact] @@ -340,8 +340,8 @@ public async Task UpdateEntry_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateEntry(before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } [Fact] @@ -357,7 +357,7 @@ public async Task CreateEntry_WithNestedSenses_NormalizesToNfd() await _normalizingApi.CreateEntry(entry); - AssertNormalized(captured, entry); + AssertNormalizedToNfd(captured, entry); } [Fact] @@ -373,7 +373,7 @@ public async Task CreateEntry_WithComplexFormComponents_NormalizesToNfd() await _normalizingApi.CreateEntry(entry); - AssertNormalized(captured, entry); + AssertNormalizedToNfd(captured, entry); } [Fact] @@ -500,7 +500,7 @@ public async Task CreateComplexFormComponent_NormalizesToNfd() await _normalizingApi.CreateComplexFormComponent(cfc); - AssertNormalized(captured, cfc); + AssertNormalizedToNfd(captured, cfc); } #endregion @@ -520,7 +520,7 @@ public async Task CreateSense_NormalizesToNfd() await _normalizingApi.CreateSense(Guid.NewGuid(), sense); - AssertNormalized(captured, sense); + AssertNormalizedToNfd(captured, sense); } [Fact] @@ -539,8 +539,8 @@ public async Task UpdateSense_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateSense(entryId, before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } [Fact] @@ -556,7 +556,7 @@ public async Task CreateSense_WithNestedExampleSentences_NormalizesToNfd() await _normalizingApi.CreateSense(Guid.NewGuid(), sense); - AssertNormalized(captured, sense); + AssertNormalizedToNfd(captured, sense); } #endregion @@ -577,7 +577,7 @@ public async Task CreateExampleSentence_NormalizesToNfd() await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); - AssertNormalized(captured, example); + AssertNormalizedToNfd(captured, example); } [Fact] @@ -600,8 +600,8 @@ public async Task UpdateExampleSentence_BeforeAfter_NormalizesBothToNfd() await _normalizingApi.UpdateExampleSentence(entryId, senseId, before, after); - AssertNormalized(capturedBefore, before); - AssertNormalized(capturedAfter, after); + AssertNormalizedToNfd(capturedBefore, before); + AssertNormalizedToNfd(capturedAfter, after); } [Fact] @@ -618,7 +618,7 @@ public async Task CreateExampleSentence_WithTranslations_NormalizesToNfd() await _normalizingApi.CreateExampleSentence(Guid.NewGuid(), Guid.NewGuid(), example); - AssertNormalized(captured, example); + AssertNormalizedToNfd(captured, example); } #endregion @@ -639,7 +639,7 @@ public async Task AddTranslation_NormalizesToNfd() await _normalizingApi.AddTranslation(Guid.NewGuid(), Guid.NewGuid(), Guid.NewGuid(), translation); - AssertNormalized(captured, translation); + AssertNormalizedToNfd(captured, translation); } #endregion From 6276a96c0028b0bf7c00bcf81fef6e8f78fff1bd Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Mon, 1 Jun 2026 17:37:21 +0200 Subject: [PATCH 6/7] Bundle string-check label and validator into a StringCheck record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the (description, Action>) pair threaded through the traversal — two loosely coupled params a caller could mismatch — with a single StringCheck record bundling the failure-header label and a validator that maps a string to an issue message or null. Removes the mutable-list delegate and the mismatch risk; behaviour and failure messages are unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../Helpers/NormalizationAssert.cs | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index 1b85d0121e..8c3894b46c 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -49,7 +49,7 @@ public static class NormalizationAssert public static void AssertAllDecomposed(object? obj) { - Assert(obj, "NFD", CheckNfd); + Assert(obj, Nfd); } /// @@ -59,7 +59,7 @@ public static void AssertAllDecomposed(object? obj) /// public static void AssertAllDecomposable(object? obj) { - Assert(obj, "decomposable NFC", CheckDecomposableNfc); + Assert(obj, DecomposableNfc); } /// @@ -75,25 +75,25 @@ public static void AssertNormalizedToNfd(T? captured, T input) where T : clas captured.Should().BeEquivalentTo(input, opts => opts.NormalizedStrings()); } - private static void Assert(object? obj, string description, Action> check) + private static void Assert(object? obj, StringCheck check) { if (obj is null) throw new Xunit.Sdk.XunitException("Expected object to be non-null but was null"); var issues = FindIssues(obj, check); if (issues.Count == 0) return; throw new Xunit.Sdk.XunitException( - $"Expected all normalizable properties to contain {description} strings, but found issues:\n" + + $"Expected all normalizable properties to contain {check.Label} strings, but found issues:\n" + string.Join("\n", issues.Select(i => " - " + i)) ); } - private static List FindIssues(object obj, Action> check) + private static List FindIssues(object obj, StringCheck check) { var issues = new List(); Visit(obj, "", check, issues); return issues; } - private static void Visit(object? obj, string path, Action> check, List issues) + private static void Visit(object? obj, string path, StringCheck check, List issues) { switch (obj) { @@ -125,7 +125,7 @@ private static void Visit(object? obj, string path, Action> check, List issues) + private static void VisitModelProperties(object obj, string path, StringCheck check, List issues) { var type = obj.GetType(); if (type.Namespace?.StartsWith("MiniLcm.Models", StringComparison.Ordinal) != true) @@ -153,32 +153,32 @@ private static bool IsIgnoredType(Type type) underlying == typeof(DateTimeOffset) || underlying == typeof(decimal); } - private static void CheckString(string? value, string path, Action> check, List issues) + private static void CheckString(string? value, string path, StringCheck check, List issues) { if (string.IsNullOrEmpty(value)) { issues.Add($"{path}: string is null or empty (must have a value for testing)"); return; } - check(value, path, issues); + var issue = check.Validate(value); + if (issue != null) issues.Add($"{path}: {issue}"); } - // Per-form leaf checks. AssertAllDecomposed verifies wrapper OUTPUT is NFD; AssertAllDecomposable verifies - // INPUT test data is NFC AND actually decomposes — content byte-identical in NFC and NFD (e.g. ASCII) would silently no-op the normalizer. - private static void CheckNfd(string value, string path, List issues) - { - if (!value.IsNormalized(NormalizationForm.FormD)) - issues.Add($"{path}: expected NFD but \"{value}\" is not NFD-normalized"); - } - - private static void CheckDecomposableNfc(string value, string path, List issues) - { - if (!value.IsNormalized(NormalizationForm.FormC)) - { - issues.Add($"{path}: expected NFC but \"{value}\" is not NFC-normalized"); - return; - } - if (value == value.Normalize(NormalizationForm.FormD)) - issues.Add($"{path}: \"{value}\" is trivially NFC (identical to its NFD form); use content that actually exercises normalization"); - } + // A check pairs the failure-header label with a validator that returns an issue for one string + // (or null if it is fine), so a caller can't mismatch label and logic. + private sealed record StringCheck(string Label, Func Validate); + + // AssertAllDecomposed verifies wrapper OUTPUT is NFD; AssertAllDecomposable verifies INPUT test data is + // NFC AND actually decomposes — content byte-identical in NFC and NFD (e.g. ASCII) would silently no-op the normalizer. + private static readonly StringCheck Nfd = new("NFD", value => + value.IsNormalized(NormalizationForm.FormD) + ? null + : $"expected NFD but \"{value}\" is not NFD-normalized"); + + private static readonly StringCheck DecomposableNfc = new("decomposable NFC", value => + !value.IsNormalized(NormalizationForm.FormC) + ? $"expected NFC but \"{value}\" is not NFC-normalized" + : value == value.Normalize(NormalizationForm.FormD) + ? $"\"{value}\" is trivially NFC (identical to its NFD form); use content that actually exercises normalization" + : null); } From 30ac45e8ed9f417e33fc119438a14d1d71df4680 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Tue, 2 Jun 2026 11:13:04 +0200 Subject: [PATCH 7/7] Drop duplicated NFD-triviality rationale in test helper The "ASCII bypasses the normalizer" reason was stated three times: in the AssertAllDecomposable docstring, again above the validators, and in the DecomposableNfc failure message. Keep the docstring and the failure message; remove the redundant block comment. Co-Authored-By: Claude Opus 4.8 (1M context) --- backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs index 8c3894b46c..4300daf9ab 100644 --- a/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs +++ b/backend/FwLite/MiniLcm.Tests/Helpers/NormalizationAssert.cs @@ -168,8 +168,6 @@ private static void CheckString(string? value, string path, StringCheck check, L // (or null if it is fine), so a caller can't mismatch label and logic. private sealed record StringCheck(string Label, Func Validate); - // AssertAllDecomposed verifies wrapper OUTPUT is NFD; AssertAllDecomposable verifies INPUT test data is - // NFC AND actually decomposes — content byte-identical in NFC and NFD (e.g. ASCII) would silently no-op the normalizer. private static readonly StringCheck Nfd = new("NFD", value => value.IsNormalized(NormalizationForm.FormD) ? null