Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
499 changes: 499 additions & 0 deletions docs/decisions/reseed-project.md

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions src/SIL.Harmony.Core/CommitBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@ internal CommitBase(Guid id)
public CommitMetadata Metadata { get; init; } = new();


public string GenerateHash(string parentHash)
public string GenerateHash(string parentHash) => GenerateHash(Id, parentHash);

/// <summary>
/// Computes a commit hash from a commit Id and its parent commit's hash. The hash binds only
/// these two values — not change content, ClientId, or the timestamp — so a commit chain can be
/// re-identified by minting new Ids and rehashing against them.
/// </summary>
public static string GenerateHash(Guid id, string parentHash)
{
var idBytes = Id.ToByteArray();
var idBytes = id.ToByteArray();
var parentHashBytes = Convert.FromHexString(parentHash);
Span<byte> hashBytes = stackalloc byte[idBytes.Length + parentHashBytes.Length];
idBytes.AsSpan().CopyTo(hashBytes);
Expand Down
205 changes: 205 additions & 0 deletions src/SIL.Harmony.Tests/Maintenance/ReseedProjectTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
using Microsoft.EntityFrameworkCore;
using SIL.Harmony.Changes;
using SIL.Harmony.Db;
using SIL.Harmony.Maintenance;
using SIL.Harmony.Sample.Changes;
using SIL.Harmony.Sample.Models;

namespace SIL.Harmony.Tests.Maintenance;

public class ReseedProjectTests : DataModelTestBase
{
private readonly Guid _word1Id = Guid.NewGuid();
private readonly Guid _word2Id = Guid.NewGuid();
private readonly Guid _newClientId = Guid.NewGuid();

/// <summary>
/// Writes a small single-author chain (authored by <see cref="DataModelTestBase._localClientId"/>,
/// the stand-in for a template-source client) with distinct timestamps, multiple entities, and
/// snapshots/projected rows.
/// </summary>
private async Task SeedChain()
{
await WriteNextChange(SetWord(_word1Id, "apple"));
await WriteNextChange(SetWord(_word2Id, "banana"));
await WriteNextChange(new SetWordNoteChange(_word1Id, "a fruit"));
await WriteNextChange(SetWord(_word1Id, "apple-updated"));
}

private Task<Commit[]> CurrentChain() =>
DbContext.Commits.AsNoTracking().DefaultOrder().ToArrayAsync(TestContext.Current.CancellationToken);

[Fact]
public async Task ReseedProject_MintsFreshCommitIds()
{
await SeedChain();
var beforeIds = (await CurrentChain()).Select(c => c.Id).ToArray();

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

var afterIds = (await CurrentChain()).Select(c => c.Id).ToArray();
afterIds.Should().HaveCount(beforeIds.Length);
afterIds.Should().OnlyHaveUniqueItems();
beforeIds.Should().NotIntersectWith(afterIds);
}

[Fact]
public async Task ReseedProject_SetsClientIdOnAllCommits()
{
await SeedChain();

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

var clientIds = await DbContext.Commits.AsNoTracking().Select(c => c.ClientId).Distinct().ToArrayAsync(TestContext.Current.CancellationToken);
clientIds.Should().ContainSingle().Which.Should().Be(_newClientId);
_newClientId.Should().NotBe(_localClientId);
}

[Fact]
public async Task ReseedProject_RecomputesHashesCorrectly()
{
await SeedChain();

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

var parentHash = CommitBase.NullParentHash;
foreach (var commit in await CurrentChain())
{
commit.ParentHash.Should().Be(parentHash);
commit.Hash.Should().Be(CommitBase.GenerateHash(commit.Id, parentHash));
parentHash = commit.Hash;
}
}

[Fact]
public async Task ReseedProject_PreservesChangeEntities()
{
await SeedChain();
var before = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking()
.Select(c => new { c.EntityId, c.Index }).ToArrayAsync(TestContext.Current.CancellationToken);
var beforeCommitIds = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking()
.Select(c => c.CommitId).Distinct().ToArrayAsync(TestContext.Current.CancellationToken);

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

var after = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking()
.Select(c => new { c.EntityId, c.Index }).ToArrayAsync(TestContext.Current.CancellationToken);
var afterCommitIds = await DbContext.Set<ChangeEntity<IChange>>().AsNoTracking()
.Select(c => c.CommitId).Distinct().ToArrayAsync(TestContext.Current.CancellationToken);

// (EntityId, Index) is preserved exactly...
after.Should().BeEquivalentTo(before);
// ...while every CommitId FK was repointed onto the new commits.
beforeCommitIds.Should().NotIntersectWith(afterCommitIds);
}

[Fact]
public async Task ReseedProject_PreservesSnapshots()
{
await SeedChain();
var before = await DbContext.Snapshots.AsNoTracking()
.Select(s => new { s.Id, s.EntityId, s.EntityIsDeleted, s.TypeName }).ToArrayAsync(TestContext.Current.CancellationToken);

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

var after = await DbContext.Snapshots.AsNoTracking()
.Select(s => new { s.Id, s.EntityId, s.EntityIsDeleted, s.TypeName }).ToArrayAsync(TestContext.Current.CancellationToken);
// Snapshots.Id (and the rest of the row) is preserved verbatim — only CommitId changes.
after.Should().BeEquivalentTo(before);
}

[Fact]
public async Task ReseedProject_PreservesProjectedTables()
{
await SeedChain();
var before = await DbContext.Set<Word>().AsNoTracking()
.OrderBy(w => w.Id).Select(w => new { w.Id, w.Text, w.Note }).ToArrayAsync(TestContext.Current.CancellationToken);

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

var after = await DbContext.Set<Word>().AsNoTracking()
.OrderBy(w => w.Id).Select(w => new { w.Id, w.Text, w.Note }).ToArrayAsync(TestContext.Current.CancellationToken);
after.Should().BeEquivalentTo(before);
}

[Fact]
public async Task ReseedProject_PreservesChainOrder()
{
await SeedChain();
var before = (await CurrentChain())
.Select(c => (c.HybridDateTime.DateTime, c.HybridDateTime.Counter)).ToArray();

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

// CurrentChain() orders by (DateTime, Counter, NEW Id); the sequence must be unchanged.
var after = (await CurrentChain())
.Select(c => (c.HybridDateTime.DateTime, c.HybridDateTime.Counter)).ToArray();
after.Should().Equal(before);
}

[Fact]
public async Task ReseedProject_HashChainValidatesAfterReseed()
{
await SeedChain();

await DataModelMaintenance.ReseedProject(DataModel, _newClientId);

// Adding another commit runs ValidateCommits (AlwaysValidateCommits defaults to true in the
// fixture), which walks the whole chain and throws on any hash mismatch.
var act = async () => await WriteNextChange(SetWord(Guid.NewGuid(), "post-reseed"));
await act.Should().NotThrowAsync();

// Content survived the reseed.
(await DataModel.GetLatest<Word>(_word1Id))!.Text.Should().Be("apple-updated");
(await DataModel.GetLatest<Word>(_word2Id))!.Text.Should().Be("banana");
}

[Fact]
public async Task ReseedProject_ThrowsOnMultiAuthorChain()
{
var clientA = Guid.NewGuid();
var clientB = Guid.NewGuid();
await WriteChange(clientA, NextDate(), SetWord(Guid.NewGuid(), "a"));
await WriteChange(clientB, NextDate(), SetWord(Guid.NewGuid(), "b"));

var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId);
await act.Should().ThrowAsync<InvalidOperationException>().WithMessage("*single-author*");
}

[Fact]
public async Task ReseedProject_ThrowsOnEmptyChain()
{
var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId);
await act.Should().ThrowAsync<InvalidOperationException>().WithMessage("*non-empty*");
}

[Fact]
public async Task ReseedProject_ThrowsOnDuplicateHybridDateTime()
{
// Two commits at the same instant: the mock clock sets Counter=0 for both, so they share an
// identical (DateTime, Counter). Re-minting random Ids would reorder them, so reseed must refuse.
var sharedDate = new DateTimeOffset(2030, 1, 1, 0, 0, 0, TimeSpan.Zero);
await WriteChange(_localClientId, sharedDate, SetWord(Guid.NewGuid(), "x"));
await WriteChange(_localClientId, sharedDate, SetWord(Guid.NewGuid(), "y"));

var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId);
await act.Should().ThrowAsync<InvalidOperationException>().WithMessage("*unique (DateTime, Counter)*");
}

[Fact]
public async Task ReseedProject_LeavesChainUntouchedWhenAPreconditionFails()
{
// A failed precondition must not mutate the chain (atomicity for the cheap, pre-write guards).
var clientA = Guid.NewGuid();
var clientB = Guid.NewGuid();
await WriteChange(clientA, NextDate(), SetWord(_word1Id, "a"));
await WriteChange(clientB, NextDate(), SetWord(_word2Id, "b"));
var before = (await CurrentChain()).Select(c => (c.Id, c.ClientId, c.Hash, c.ParentHash)).ToArray();

var act = async () => await DataModelMaintenance.ReseedProject(DataModel, _newClientId);
await act.Should().ThrowAsync<InvalidOperationException>();

var after = (await CurrentChain()).Select(c => (c.Id, c.ClientId, c.Hash, c.ParentHash)).ToArray();
after.Should().Equal(before);
}
}
2 changes: 1 addition & 1 deletion src/SIL.Harmony/DataModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace SIL.Harmony;

public record SyncResults(Commit[] MissingFromLocal, Commit[] MissingFromRemote, bool IsSynced);

public class DataModel : ISyncable, IAsyncDisposable
public partial class DataModel : ISyncable, IAsyncDisposable
{
/// <summary>
/// after adding any commit validate the commit history, not great for performance but good for testing.
Expand Down
23 changes: 23 additions & 0 deletions src/SIL.Harmony/Db/CrdtRepository.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,29 @@ public Task<IDbContextTransaction> BeginTransactionAsync()

public bool IsInTransaction => _dbContext.Database.CurrentTransaction is not null;

/// <summary>
/// Runs an interpolated SQL statement against the underlying database, enlisted in the ambient
/// transaction if one is open. Kept internal and narrow so raw-SQL surgery (see
/// <see cref="DataModel.ReseedProjectImpl"/>) stays contained and greppable.
/// </summary>
internal Task<int> ExecuteSqlAsync(FormattableString sql)
{
return _dbContext.Database.ExecuteSqlInterpolatedAsync(sql);
}

/// <summary>
/// Counts ChangeEntities and Snapshots rows still pointing at any of the given commit Ids.
/// Used as a safety check before deleting commits, since both child FKs are ON DELETE CASCADE.
/// </summary>
internal async Task<int> CountReferencesToCommits(IReadOnlyCollection<Guid> commitIds)
{
var changeCount = await _dbContext.Set<ChangeEntity<IChange>>()
.CountAsync(c => commitIds.Contains(c.CommitId));
var snapshotCount = await _dbContext.Set<ObjectSnapshot>()
.CountAsync(s => commitIds.Contains(s.CommitId));
return changeCount + snapshotCount;
}


public async Task<bool> HasCommit(Guid commitId)
{
Expand Down
103 changes: 103 additions & 0 deletions src/SIL.Harmony/Maintenance/DataModel.Reseed.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
using Microsoft.EntityFrameworkCore;

namespace SIL.Harmony;

public partial class DataModel
{
/// <summary>
/// Implementation of <see cref="Maintenance.DataModelMaintenance.ReseedProject"/>. See that method
/// for the contract. Kept internal on a separate partial so the destructive op isn't part of the
/// public DataModel surface.
/// </summary>
internal async Task ReseedProjectImpl(Guid clientId)
{
await using var repo = await _crdtRepositoryFactory.CreateRepository();
using var locked = await repo.Lock();
repo.ClearChangeTracker();

// Load the whole chain in Harmony's canonical order: (DateTime, Counter, Id).
var commits = await repo.CurrentCommits().AsNoTracking().ToArrayAsync();

// --- Preconditions ---
if (commits.Length == 0)
throw new InvalidOperationException(
"ReseedProject requires a non-empty commit chain; nothing was loaded to reseed.");

var distinctClientIds = commits.Select(c => c.ClientId).Distinct().Count();
if (distinctClientIds > 1)
throw new InvalidOperationException(
$"ReseedProject requires a single-author commit chain, but found {distinctClientIds} distinct ClientIds. " +
"A multi-author chain is an already-authored chain, not a pre-built one — refusing to reseed it.");

// The canonical order's final tiebreaker is Commit.Id. Because we mint fresh random Ids, any
// two commits sharing an identical (DateTime, Counter) could be reordered relative to each other
// after reseeding — which would change both the parent-hash linkage and the per-entity "latest
// snapshot" winner. A single-author chain never produces such a tie (the HybridDateTimeProvider
// bumps Counter on collision), so a tie here means this isn't the pre-built chain the API is for.
// Refuse loudly rather than silently reorder. (commits are sorted, so ties are adjacent.)
for (var i = 1; i < commits.Length; i++)
{
var previous = commits[i - 1].HybridDateTime;
var current = commits[i].HybridDateTime;
if (previous.DateTime == current.DateTime && previous.Counter == current.Counter)
throw new InvalidOperationException(
$"ReseedProject requires every commit to have a unique (DateTime, Counter); commits " +
$"{commits[i - 1].Id} and {commits[i].Id} share {previous.DateTime:o} / {previous.Counter}. " +
"Re-minting Commit Ids would reorder them and break the chain.");
}

// --- Plan the rewrite ---
// (DateTime, Counter) is unique (guarded above), so the new-Id sort order equals the current
// order; we can chain hashes in the loaded order directly. Mint all new Ids up front.
var plan = new (Guid OldId, Guid NewId, string Hash, string ParentHash)[commits.Length];
var parentHash = CommitBase.NullParentHash;
for (var i = 0; i < commits.Length; i++)
{
var newId = Guid.NewGuid();
var hash = CommitBase.GenerateHash(newId, parentHash);
plan[i] = (commits[i].Id, newId, hash, parentHash);
parentHash = hash;
}

// --- Apply, atomically ---
// Mirror DataModel.Add's transaction guard so a caller that wraps this in an outer transaction
// doesn't trigger a nested-transaction error.
await using var transaction = repo.IsInTransaction ? null : await repo.BeginTransactionAsync();

// Phase 1: insert the re-identified commits alongside the originals (Ids differ, no PK clash).
// DateTime/Counter/Metadata are copied verbatim from the original row; Id/ClientId/Hash/ParentHash
// are the new values.
foreach (var (oldId, newId, hash, newParentHash) in plan)
{
await repo.ExecuteSqlAsync($"""
INSERT INTO "Commits" ("Id", "ClientId", "DateTime", "Counter", "Metadata", "Hash", "ParentHash")
SELECT {newId}, {clientId}, "DateTime", "Counter", "Metadata", {hash}, {newParentHash}
FROM "Commits" WHERE "Id" = {oldId}
""");
Comment on lines +70 to +76

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Assert the commit-row statements each affect exactly one row.

Line 72 and Line 98 ignore ExecuteSqlAsync’s affected-row count. If INSERT ... SELECT ... WHERE "Id" = {oldId} or the later DELETE ever matches 0 rows, the dangling-FK check still won’t catch a missed commit replacement when that commit has no child rows. Fail fast on != 1 here so the transaction rolls back instead of silently shortening the chain.

Suggested guard
         foreach (var (oldId, newId, hash, newParentHash) in plan)
         {
-            await repo.ExecuteSqlAsync($"""
+            var inserted = await repo.ExecuteSqlAsync($"""
                 INSERT INTO "Commits" ("Id", "ClientId", "DateTime", "Counter", "Metadata", "Hash", "ParentHash")
                 SELECT {newId}, {clientId}, "DateTime", "Counter", "Metadata", {hash}, {newParentHash}
                 FROM "Commits" WHERE "Id" = {oldId}
                 """);
+            if (inserted != 1)
+                throw new InvalidOperationException(
+                    $"ReseedProject expected to insert exactly one replacement commit for {oldId}, but inserted {inserted}.");
         }
@@
         foreach (var (oldId, _, _, _) in plan)
         {
-            await repo.ExecuteSqlAsync($"""DELETE FROM "Commits" WHERE "Id" = {oldId}""");
+            var deleted = await repo.ExecuteSqlAsync($"""DELETE FROM "Commits" WHERE "Id" = {oldId}""");
+            if (deleted != 1)
+                throw new InvalidOperationException(
+                    $"ReseedProject expected to delete exactly one original commit {oldId}, but deleted {deleted}.");
         }

Also applies to: 96-98

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/SIL.Harmony/Maintenance/DataModel.Reseed.cs` around lines 70 - 76, The
INSERT/DELETE SQL operations using repo.ExecuteSqlAsync do not check the
returned affected-row count; update the code around the loop that calls
repo.ExecuteSqlAsync for the INSERT INTO "Commits" (and the corresponding DELETE
later) to capture the integer result, assert it equals 1, and throw a
descriptive exception (including oldId/newId or operation context) if it is not
1 so the surrounding transaction will roll back; specifically modify the calls
to repo.ExecuteSqlAsync(...) in the reseed logic (the INSERT INTO "Commits"
SELECT ... WHERE "Id" = {oldId} and the later DELETE that removes the old
commit) to check the return value and fail fast on != 1.

}

// Phase 2: re-point every ChangeEntities / Snapshots row off the original commit onto the new one.
foreach (var (oldId, newId, _, _) in plan)
{
await repo.ExecuteSqlAsync($"""UPDATE "ChangeEntities" SET "CommitId" = {newId} WHERE "CommitId" = {oldId}""");
await repo.ExecuteSqlAsync($"""UPDATE "Snapshots" SET "CommitId" = {newId} WHERE "CommitId" = {oldId}""");
}

// Defensive: both child FKs are ON DELETE CASCADE, so if any row still referenced an original
// commit the phase-3 DELETE would silently cascade-delete content. Verify none do before deleting.
var oldIds = Array.ConvertAll(plan, p => p.OldId);
var dangling = await repo.CountReferencesToCommits(oldIds);
if (dangling != 0)
throw new InvalidOperationException(
$"ReseedProject FK rewrite is incomplete: {dangling} ChangeEntities/Snapshots row(s) still " +
"reference the original commit Ids. Aborting before delete to avoid cascade data loss.");

// Phase 3: delete the now-orphaned original commits.
foreach (var (oldId, _, _, _) in plan)
{
await repo.ExecuteSqlAsync($"""DELETE FROM "Commits" WHERE "Id" = {oldId}""");
}

if (transaction is not null) await transaction.CommitAsync();
}
}
Loading
Loading