-
Notifications
You must be signed in to change notification settings - Fork 2
Add DataModelMaintenance.ReseedProject for re-identifying bulk-loaded chains #68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
myieye
wants to merge
2
commits into
main
Choose a base branch
from
reseed-project-api
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
205 changes: 205 additions & 0 deletions
205
src/SIL.Harmony.Tests/Maintenance/ReseedProjectTests.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,205 @@ | ||
| using Microsoft.EntityFrameworkCore; | ||
| using SIL.Harmony.Changes; | ||
| using SIL.Harmony.Db; | ||
| using SIL.Harmony.Maintenance; | ||
| using SIL.Harmony.Sample.Changes; | ||
| using SIL.Harmony.Sample.Models; | ||
|
|
||
| namespace SIL.Harmony.Tests.Maintenance; | ||
|
|
||
| public class ReseedProjectTests : DataModelTestBase | ||
| { | ||
| private readonly Guid _word1Id = Guid.NewGuid(); | ||
| private readonly Guid _word2Id = Guid.NewGuid(); | ||
| private readonly Guid _newClientId = Guid.NewGuid(); | ||
|
|
||
| /// <summary> | ||
| /// Writes a small single-author chain (authored by <see cref="DataModelTestBase._localClientId"/>, | ||
| /// the stand-in for a template-source client) with distinct timestamps, multiple entities, and | ||
| /// snapshots/projected rows. | ||
| /// </summary> | ||
| private async Task SeedChain() | ||
| { | ||
| await WriteNextChange(SetWord(_word1Id, "apple")); | ||
| await WriteNextChange(SetWord(_word2Id, "banana")); | ||
| await WriteNextChange(new SetWordNoteChange(_word1Id, "a fruit")); | ||
| await WriteNextChange(SetWord(_word1Id, "apple-updated")); | ||
| } | ||
|
|
||
| private Task<Commit[]> CurrentChain() => | ||
| DbContext.Commits.AsNoTracking().DefaultOrder().ToArrayAsync(TestContext.Current.CancellationToken); | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_MintsFreshCommitIds() | ||
| { | ||
| await SeedChain(); | ||
| var beforeIds = (await CurrentChain()).Select(c => c.Id).ToArray(); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| var afterIds = (await CurrentChain()).Select(c => c.Id).ToArray(); | ||
| afterIds.Should().HaveCount(beforeIds.Length); | ||
| afterIds.Should().OnlyHaveUniqueItems(); | ||
| beforeIds.Should().NotIntersectWith(afterIds); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_SetsClientIdOnAllCommits() | ||
| { | ||
| await SeedChain(); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| var clientIds = await DbContext.Commits.AsNoTracking().Select(c => c.ClientId).Distinct().ToArrayAsync(TestContext.Current.CancellationToken); | ||
| clientIds.Should().ContainSingle().Which.Should().Be(_newClientId); | ||
| _newClientId.Should().NotBe(_localClientId); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_RecomputesHashesCorrectly() | ||
| { | ||
| await SeedChain(); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| var parentHash = CommitBase.NullParentHash; | ||
| foreach (var commit in await CurrentChain()) | ||
| { | ||
| commit.ParentHash.Should().Be(parentHash); | ||
| commit.Hash.Should().Be(CommitBase.GenerateHash(commit.Id, parentHash)); | ||
| parentHash = commit.Hash; | ||
| } | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_PreservesChangeEntities() | ||
| { | ||
| await SeedChain(); | ||
| var before = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking() | ||
| .Select(c => new { c.EntityId, c.Index }).ToArrayAsync(TestContext.Current.CancellationToken); | ||
| var beforeCommitIds = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking() | ||
| .Select(c => c.CommitId).Distinct().ToArrayAsync(TestContext.Current.CancellationToken); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| var after = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking() | ||
| .Select(c => new { c.EntityId, c.Index }).ToArrayAsync(TestContext.Current.CancellationToken); | ||
| var afterCommitIds = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking() | ||
| .Select(c => c.CommitId).Distinct().ToArrayAsync(TestContext.Current.CancellationToken); | ||
|
|
||
| // (EntityId, Index) is preserved exactly... | ||
| after.Should().BeEquivalentTo(before); | ||
| // ...while every CommitId FK was repointed onto the new commits. | ||
| beforeCommitIds.Should().NotIntersectWith(afterCommitIds); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_PreservesSnapshots() | ||
| { | ||
| await SeedChain(); | ||
| var before = await DbContext.Snapshots.AsNoTracking() | ||
| .Select(s => new { s.Id, s.EntityId, s.EntityIsDeleted, s.TypeName }).ToArrayAsync(TestContext.Current.CancellationToken); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| var after = await DbContext.Snapshots.AsNoTracking() | ||
| .Select(s => new { s.Id, s.EntityId, s.EntityIsDeleted, s.TypeName }).ToArrayAsync(TestContext.Current.CancellationToken); | ||
| // Snapshots.Id (and the rest of the row) is preserved verbatim — only CommitId changes. | ||
| after.Should().BeEquivalentTo(before); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_PreservesProjectedTables() | ||
| { | ||
| await SeedChain(); | ||
| var before = await DbContext.Set<Word>().AsNoTracking() | ||
| .OrderBy(w => w.Id).Select(w => new { w.Id, w.Text, w.Note }).ToArrayAsync(TestContext.Current.CancellationToken); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| var after = await DbContext.Set<Word>().AsNoTracking() | ||
| .OrderBy(w => w.Id).Select(w => new { w.Id, w.Text, w.Note }).ToArrayAsync(TestContext.Current.CancellationToken); | ||
| after.Should().BeEquivalentTo(before); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_PreservesChainOrder() | ||
| { | ||
| await SeedChain(); | ||
| var before = (await CurrentChain()) | ||
| .Select(c => (c.HybridDateTime.DateTime, c.HybridDateTime.Counter)).ToArray(); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| // CurrentChain() orders by (DateTime, Counter, NEW Id); the sequence must be unchanged. | ||
| var after = (await CurrentChain()) | ||
| .Select(c => (c.HybridDateTime.DateTime, c.HybridDateTime.Counter)).ToArray(); | ||
| after.Should().Equal(before); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_HashChainValidatesAfterReseed() | ||
| { | ||
| await SeedChain(); | ||
|
|
||
| await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
|
|
||
| // Adding another commit runs ValidateCommits (AlwaysValidateCommits defaults to true in the | ||
| // fixture), which walks the whole chain and throws on any hash mismatch. | ||
| var act = async () => await WriteNextChange(SetWord(Guid.NewGuid(), "post-reseed")); | ||
| await act.Should().NotThrowAsync(); | ||
|
|
||
| // Content survived the reseed. | ||
| (await DataModel.GetLatest<Word>(_word1Id))!.Text.Should().Be("apple-updated"); | ||
| (await DataModel.GetLatest<Word>(_word2Id))!.Text.Should().Be("banana"); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_ThrowsOnMultiAuthorChain() | ||
| { | ||
| var clientA = Guid.NewGuid(); | ||
| var clientB = Guid.NewGuid(); | ||
| await WriteChange(clientA, NextDate(), SetWord(Guid.NewGuid(), "a")); | ||
| await WriteChange(clientB, NextDate(), SetWord(Guid.NewGuid(), "b")); | ||
|
|
||
| var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
| await act.Should().ThrowAsync<InvalidOperationException>().WithMessage("*single-author*"); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_ThrowsOnEmptyChain() | ||
| { | ||
| var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
| await act.Should().ThrowAsync<InvalidOperationException>().WithMessage("*non-empty*"); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_ThrowsOnDuplicateHybridDateTime() | ||
| { | ||
| // Two commits at the same instant: the mock clock sets Counter=0 for both, so they share an | ||
| // identical (DateTime, Counter). Re-minting random Ids would reorder them, so reseed must refuse. | ||
| var sharedDate = new DateTimeOffset(2030, 1, 1, 0, 0, 0, TimeSpan.Zero); | ||
| await WriteChange(_localClientId, sharedDate, SetWord(Guid.NewGuid(), "x")); | ||
| await WriteChange(_localClientId, sharedDate, SetWord(Guid.NewGuid(), "y")); | ||
|
|
||
| var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
| await act.Should().ThrowAsync<InvalidOperationException>().WithMessage("*unique (DateTime, Counter)*"); | ||
| } | ||
|
|
||
| [Fact] | ||
| public async Task ReseedProject_LeavesChainUntouchedWhenAPreconditionFails() | ||
| { | ||
| // A failed precondition must not mutate the chain (atomicity for the cheap, pre-write guards). | ||
| var clientA = Guid.NewGuid(); | ||
| var clientB = Guid.NewGuid(); | ||
| await WriteChange(clientA, NextDate(), SetWord(_word1Id, "a")); | ||
| await WriteChange(clientB, NextDate(), SetWord(_word2Id, "b")); | ||
| var before = (await CurrentChain()).Select(c => (c.Id, c.ClientId, c.Hash, c.ParentHash)).ToArray(); | ||
|
|
||
| var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId); | ||
| await act.Should().ThrowAsync<InvalidOperationException>(); | ||
|
|
||
| var after = (await CurrentChain()).Select(c => (c.Id, c.ClientId, c.Hash, c.ParentHash)).ToArray(); | ||
| after.Should().Equal(before); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| using Microsoft.EntityFrameworkCore; | ||
|
|
||
| namespace SIL.Harmony; | ||
|
|
||
| public partial class DataModel | ||
| { | ||
| /// <summary> | ||
| /// Implementation of <see cref="Maintenance.DataModelMaintenance.ReseedProject"/>. See that method | ||
| /// for the contract. Kept internal on a separate partial so the destructive op isn't part of the | ||
| /// public DataModel surface. | ||
| /// </summary> | ||
| internal async Task ReseedProjectImpl(Guid clientId) | ||
| { | ||
| await using var repo = await _crdtRepositoryFactory.CreateRepository(); | ||
| using var locked = await repo.Lock(); | ||
| repo.ClearChangeTracker(); | ||
|
|
||
| // Load the whole chain in Harmony's canonical order: (DateTime, Counter, Id). | ||
| var commits = await repo.CurrentCommits().AsNoTracking().ToArrayAsync(); | ||
|
|
||
| // --- Preconditions --- | ||
| if (commits.Length == 0) | ||
| throw new InvalidOperationException( | ||
| "ReseedProject requires a non-empty commit chain; nothing was loaded to reseed."); | ||
|
|
||
| var distinctClientIds = commits.Select(c => c.ClientId).Distinct().Count(); | ||
| if (distinctClientIds > 1) | ||
| throw new InvalidOperationException( | ||
| $"ReseedProject requires a single-author commit chain, but found {distinctClientIds} distinct ClientIds. " + | ||
| "A multi-author chain is an already-authored chain, not a pre-built one — refusing to reseed it."); | ||
|
|
||
| // The canonical order's final tiebreaker is Commit.Id. Because we mint fresh random Ids, any | ||
| // two commits sharing an identical (DateTime, Counter) could be reordered relative to each other | ||
| // after reseeding — which would change both the parent-hash linkage and the per-entity "latest | ||
| // snapshot" winner. A single-author chain never produces such a tie (the HybridDateTimeProvider | ||
| // bumps Counter on collision), so a tie here means this isn't the pre-built chain the API is for. | ||
| // Refuse loudly rather than silently reorder. (commits are sorted, so ties are adjacent.) | ||
| for (var i = 1; i < commits.Length; i++) | ||
| { | ||
| var previous = commits[i - 1].HybridDateTime; | ||
| var current = commits[i].HybridDateTime; | ||
| if (previous.DateTime == current.DateTime && previous.Counter == current.Counter) | ||
| throw new InvalidOperationException( | ||
| $"ReseedProject requires every commit to have a unique (DateTime, Counter); commits " + | ||
| $"{commits[i - 1].Id} and {commits[i].Id} share {previous.DateTime:o} / {previous.Counter}. " + | ||
| "Re-minting Commit Ids would reorder them and break the chain."); | ||
| } | ||
|
|
||
| // --- Plan the rewrite --- | ||
| // (DateTime, Counter) is unique (guarded above), so the new-Id sort order equals the current | ||
| // order; we can chain hashes in the loaded order directly. Mint all new Ids up front. | ||
| var plan = new (Guid OldId, Guid NewId, string Hash, string ParentHash)[commits.Length]; | ||
| var parentHash = CommitBase.NullParentHash; | ||
| for (var i = 0; i < commits.Length; i++) | ||
| { | ||
| var newId = Guid.NewGuid(); | ||
| var hash = CommitBase.GenerateHash(newId, parentHash); | ||
| plan[i] = (commits[i].Id, newId, hash, parentHash); | ||
| parentHash = hash; | ||
| } | ||
|
|
||
| // --- Apply, atomically --- | ||
| // Mirror DataModel.Add's transaction guard so a caller that wraps this in an outer transaction | ||
| // doesn't trigger a nested-transaction error. | ||
| await using var transaction = repo.IsInTransaction ? null : await repo.BeginTransactionAsync(); | ||
|
|
||
| // Phase 1: insert the re-identified commits alongside the originals (Ids differ, no PK clash). | ||
| // DateTime/Counter/Metadata are copied verbatim from the original row; Id/ClientId/Hash/ParentHash | ||
| // are the new values. | ||
| foreach (var (oldId, newId, hash, newParentHash) in plan) | ||
| { | ||
| await repo.ExecuteSqlAsync($""" | ||
| INSERT INTO "Commits" ("Id", "ClientId", "DateTime", "Counter", "Metadata", "Hash", "ParentHash") | ||
| SELECT {newId}, {clientId}, "DateTime", "Counter", "Metadata", {hash}, {newParentHash} | ||
| FROM "Commits" WHERE "Id" = {oldId} | ||
| """); | ||
| } | ||
|
|
||
| // Phase 2: re-point every ChangeEntities / Snapshots row off the original commit onto the new one. | ||
| foreach (var (oldId, newId, _, _) in plan) | ||
| { | ||
| await repo.ExecuteSqlAsync($"""UPDATE "ChangeEntities" SET "CommitId" = {newId} WHERE "CommitId" = {oldId}"""); | ||
| await repo.ExecuteSqlAsync($"""UPDATE "Snapshots" SET "CommitId" = {newId} WHERE "CommitId" = {oldId}"""); | ||
| } | ||
|
|
||
| // Defensive: both child FKs are ON DELETE CASCADE, so if any row still referenced an original | ||
| // commit the phase-3 DELETE would silently cascade-delete content. Verify none do before deleting. | ||
| var oldIds = Array.ConvertAll(plan, p => p.OldId); | ||
| var dangling = await repo.CountReferencesToCommits(oldIds); | ||
| if (dangling != 0) | ||
| throw new InvalidOperationException( | ||
| $"ReseedProject FK rewrite is incomplete: {dangling} ChangeEntities/Snapshots row(s) still " + | ||
| "reference the original commit Ids. Aborting before delete to avoid cascade data loss."); | ||
|
|
||
| // Phase 3: delete the now-orphaned original commits. | ||
| foreach (var (oldId, _, _, _) in plan) | ||
| { | ||
| await repo.ExecuteSqlAsync($"""DELETE FROM "Commits" WHERE "Id" = {oldId}"""); | ||
| } | ||
|
|
||
| if (transaction is not null) await transaction.CommitAsync(); | ||
| } | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assert the commit-row statements each affect exactly one row.
Line 72 and Line 98 ignore
ExecuteSqlAsync’s affected-row count. IfINSERT ... SELECT ... WHERE "Id" = {oldId}or the laterDELETEever matches 0 rows, the dangling-FK check still won’t catch a missed commit replacement when that commit has no child rows. Fail fast on!= 1here so the transaction rolls back instead of silently shortening the chain.Suggested guard
foreach (var (oldId, newId, hash, newParentHash) in plan) { - await repo.ExecuteSqlAsync($""" + var inserted = await repo.ExecuteSqlAsync($""" INSERT INTO "Commits" ("Id", "ClientId", "DateTime", "Counter", "Metadata", "Hash", "ParentHash") SELECT {newId}, {clientId}, "DateTime", "Counter", "Metadata", {hash}, {newParentHash} FROM "Commits" WHERE "Id" = {oldId} """); + if (inserted != 1) + throw new InvalidOperationException( + $"ReseedProject expected to insert exactly one replacement commit for {oldId}, but inserted {inserted}."); } @@ foreach (var (oldId, _, _, _) in plan) { - await repo.ExecuteSqlAsync($"""DELETE FROM "Commits" WHERE "Id" = {oldId}"""); + var deleted = await repo.ExecuteSqlAsync($"""DELETE FROM "Commits" WHERE "Id" = {oldId}"""); + if (deleted != 1) + throw new InvalidOperationException( + $"ReseedProject expected to delete exactly one original commit {oldId}, but deleted {deleted}."); }Also applies to: 96-98
🤖 Prompt for AI Agents