From 3f4e685f3a41e179d7d320b52cb93f384b809bc5 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 11 Jun 2026 10:54:39 +0700 Subject: [PATCH 1/5] Chunk queries for pre loading snapshots to avoid going over the sql parameter limit --- src/SIL.Harmony/DataModel.cs | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/SIL.Harmony/DataModel.cs b/src/SIL.Harmony/DataModel.cs index e120e1c..b31f891 100644 --- a/src/SIL.Harmony/DataModel.cs +++ b/src/SIL.Harmony/DataModel.cs @@ -198,19 +198,27 @@ private async Task UpdateSnapshots(CrdtRepository repo, SortedSet commit if (commitsToApply.Count == 0) return; var oldestAddedCommit = commitsToApply.First(); await repo.DeleteStaleSnapshots(oldestAddedCommit); - Dictionary snapshotLookup; + Dictionary snapshotLookup = []; if (commitsToApply.Count > 10) { // Bulk-load relevant snapshots to minimize DB queries - var entityIds = commitsToApply.SelectMany(c => c.ChangeEntities.Select(ce => ce.EntityId)); - snapshotLookup = await repo.CurrentSnapshots() - .Where(s => entityIds.Contains(s.EntityId)) - .Select(s => new KeyValuePair(s.EntityId, s.Id)) - .ToDictionaryAsync(s => s.Key, s => s.Value); - } - else - { - snapshotLookup = []; + var entityIds = commitsToApply + .SelectMany(c => c.ChangeEntities.Select(ce => ce.EntityId)) + .Distinct(); + + //chunk into batches to avoid too many parameters in the query + foreach (var entityIdBatch in entityIds.Chunk(500)) + { + var batchLookup = await repo.CurrentSnapshots() + .Where(s => entityIdBatch.Contains(s.EntityId)) + .Select(s => new KeyValuePair(s.EntityId, s.Id)) + .ToDictionaryAsync(s => s.Key, s => s.Value); + + foreach (var pair in batchLookup) + { + snapshotLookup[pair.Key] = pair.Value; + } + } } var snapshotWorker = new SnapshotWorker(snapshotLookup, repo, _crdtConfig.Value); From d392982fda2c2b4c7c5b5d437be702525f6b164c Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 11 Jun 2026 11:46:48 +0200 Subject: [PATCH 2/5] revert --- src/SIL.Harmony/DataModel.cs | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/SIL.Harmony/DataModel.cs b/src/SIL.Harmony/DataModel.cs index b31f891..e120e1c 100644 --- a/src/SIL.Harmony/DataModel.cs +++ b/src/SIL.Harmony/DataModel.cs @@ -198,27 +198,19 @@ private async Task UpdateSnapshots(CrdtRepository repo, SortedSet commit if (commitsToApply.Count == 0) return; var oldestAddedCommit = commitsToApply.First(); await repo.DeleteStaleSnapshots(oldestAddedCommit); - Dictionary snapshotLookup = []; + Dictionary snapshotLookup; if (commitsToApply.Count > 10) { // Bulk-load relevant snapshots to minimize DB queries - var entityIds = commitsToApply - .SelectMany(c => c.ChangeEntities.Select(ce => ce.EntityId)) - .Distinct(); - - //chunk into batches to avoid too many parameters in the query - foreach (var entityIdBatch in entityIds.Chunk(500)) - { - var batchLookup = await repo.CurrentSnapshots() - .Where(s => entityIdBatch.Contains(s.EntityId)) - .Select(s => new KeyValuePair(s.EntityId, s.Id)) - .ToDictionaryAsync(s => s.Key, s => s.Value); - - foreach (var pair in batchLookup) - { - snapshotLookup[pair.Key] = pair.Value; - } - } + var entityIds = commitsToApply.SelectMany(c => c.ChangeEntities.Select(ce => ce.EntityId)); + snapshotLookup = await repo.CurrentSnapshots() + .Where(s => entityIds.Contains(s.EntityId)) + .Select(s => new KeyValuePair(s.EntityId, s.Id)) + .ToDictionaryAsync(s => s.Key, s => s.Value); + } + else + { + snapshotLookup = []; } var snapshotWorker = new SnapshotWorker(snapshotLookup, repo, _crdtConfig.Value); From 1b821bade1fd574b365c884706cb139e51226341 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 11 Jun 2026 12:47:27 +0200 Subject: [PATCH 3/5] Change fix from chunking to EF.Parameter Appears to be significantly faster as the snapshot table grows. Only marginally slower on an empty DB. --- src/SIL.Harmony/DataModel.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/SIL.Harmony/DataModel.cs b/src/SIL.Harmony/DataModel.cs index e120e1c..f7b21ac 100644 --- a/src/SIL.Harmony/DataModel.cs +++ b/src/SIL.Harmony/DataModel.cs @@ -198,20 +198,20 @@ private async Task UpdateSnapshots(CrdtRepository repo, SortedSet commit if (commitsToApply.Count == 0) return; var oldestAddedCommit = commitsToApply.First(); await repo.DeleteStaleSnapshots(oldestAddedCommit); - Dictionary snapshotLookup; + Dictionary snapshotLookup = []; if (commitsToApply.Count > 10) { // Bulk-load relevant snapshots to minimize DB queries - var entityIds = commitsToApply.SelectMany(c => c.ChangeEntities.Select(ce => ce.EntityId)); + var entityIds = commitsToApply + .SelectMany(c => c.ChangeEntities.Select(ce => ce.EntityId)) + .Distinct(); + + //EF.Parameter forces a single JSON parameter; without it EF 10+ emits one parameter per id and overflows SQLite's parameter limit snapshotLookup = await repo.CurrentSnapshots() - .Where(s => entityIds.Contains(s.EntityId)) + .Where(s => EF.Parameter(entityIds).Contains(s.EntityId)) .Select(s => new KeyValuePair(s.EntityId, s.Id)) .ToDictionaryAsync(s => s.Key, s => s.Value); } - else - { - snapshotLookup = []; - } var snapshotWorker = new SnapshotWorker(snapshotLookup, repo, _crdtConfig.Value); await snapshotWorker.UpdateSnapshots(commitsToApply); From ab4d127c8b691b0a373562f1f55b8820f5c58d48 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 11 Jun 2026 12:47:52 +0200 Subject: [PATCH 4/5] Reproduce SQLite variable limit exception --- src/SIL.Harmony.Tests/SyncTests.cs | 32 +++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/SIL.Harmony.Tests/SyncTests.cs b/src/SIL.Harmony.Tests/SyncTests.cs index 15cf8b5..bc49df0 100644 --- a/src/SIL.Harmony.Tests/SyncTests.cs +++ b/src/SIL.Harmony.Tests/SyncTests.cs @@ -1,4 +1,6 @@ -using SIL.Harmony.Sample.Changes; +using Microsoft.Data.Sqlite; +using Microsoft.EntityFrameworkCore; +using SIL.Harmony.Sample.Changes; using SIL.Harmony.Sample.Models; namespace SIL.Harmony.Tests; @@ -119,4 +121,32 @@ public async Task CanSync_AddDependentWithMultipleChanges() _client2.DataModel.QueryLatest().ToBlockingEnumerable(TestContext.Current.CancellationToken).Should() .BeEquivalentTo(_client1.DataModel.QueryLatest().ToBlockingEnumerable(TestContext.Current.CancellationToken)); } + + [Fact] + public async Task CanSyncCommitsWithMoreEntitiesThanTheSqliteParameterLimit() + { + //lower the connection's variable limit so tripping it doesn't require such a slow test + //(currently 32,766 in the currently bundled SQLite) + var connection = (SqliteConnection)_client1.DbContext.Database.GetDbConnection(); + const int maxSqlVariables = 600; + SQLitePCL.raw.sqlite3_limit(connection.Handle, SQLitePCL.raw.SQLITE_LIMIT_VARIABLE_NUMBER, maxSqlVariables); + + //>10 commits triggers the bulk snapshot preload + const int commitCount = 30; + const int changesPerCommit = 30; + const int totalEntityCount = commitCount * changesPerCommit; + totalEntityCount.Should().BeGreaterThan(maxSqlVariables, "the synced commits must touch more entities than SQLite allows variables"); + + var commits = new List(commitCount); + for (var i = 0; i < commitCount; i++) + { + var changes = Enumerable.Range(0, changesPerCommit) + .Select(j => _client1.SetWord(Guid.NewGuid(), $"word {i}-{j}")); + commits.Add(await _client1.WriteNextChange(changes, add: false)); + } + + await ((ISyncable)_client1.DataModel).AddRangeFromSync(commits); + + _client1.DbContext.Set().Should().HaveCount(totalEntityCount); + } } \ No newline at end of file From dbe2d552fa63bff50a04ca16297b5fc6a968227a Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Thu, 11 Jun 2026 12:48:11 +0200 Subject: [PATCH 5/5] Also adapt FilterExistingCommits --- src/SIL.Harmony.Tests/RepositoryTests.cs | 24 ++++++++++++++++++++++++ src/SIL.Harmony/Db/CrdtRepository.cs | 4 +++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/SIL.Harmony.Tests/RepositoryTests.cs b/src/SIL.Harmony.Tests/RepositoryTests.cs index 5fac8a6..3a697ea 100644 --- a/src/SIL.Harmony.Tests/RepositoryTests.cs +++ b/src/SIL.Harmony.Tests/RepositoryTests.cs @@ -2,6 +2,7 @@ using SIL.Harmony.Sample; using SIL.Harmony.Sample.Models; using SIL.Harmony.Tests.Mocks; +using Microsoft.Data.Sqlite; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; using SIL.Harmony.Changes; @@ -343,4 +344,27 @@ public async Task FindPreviousCommit_ReturnsNullForFirstCommit() var previousCommit = await _repository.FindPreviousCommit(commit1); previousCommit.Should().BeNull(); } + + [Fact] + public async Task FilterExistingCommits_WorksWithMoreCommitsThanTheSqliteParameterLimit() + { + //lower the connection's variable limit so tripping it doesn't require such a slow test + //(currently 32,766 in the currently bundled SQLite) + var connection = (SqliteConnection)_crdtDbContext.Database.GetDbConnection(); + const int maxSqlVariables = 500; + SQLitePCL.raw.sqlite3_limit(connection.Handle, SQLitePCL.raw.SQLITE_LIMIT_VARIABLE_NUMBER, maxSqlVariables); + + const int commitCount = 600; + commitCount.Should().BeGreaterThan(maxSqlVariables, "more commits must be filtered than SQLite allows variables"); + var commits = Enumerable.Range(0, commitCount) + .Select(i => Commit(Guid.NewGuid(), Time(i, 0))) + .ToArray(); + await _repository.AddCommits(commits.Take(2).ToArray()); + + var (oldestChange, newCommits) = await _repository.FilterExistingCommits(commits); + + newCommits.Should().HaveCount(commitCount - 2); + oldestChange.Should().NotBeNull(); + oldestChange.Id.Should().Be(commits[2].Id); + } } diff --git a/src/SIL.Harmony/Db/CrdtRepository.cs b/src/SIL.Harmony/Db/CrdtRepository.cs index 2f2c893..6312044 100644 --- a/src/SIL.Harmony/Db/CrdtRepository.cs +++ b/src/SIL.Harmony/Db/CrdtRepository.cs @@ -112,8 +112,10 @@ public async Task HasCommit(Guid commitId) public async Task<(Commit? oldestChange, Commit[] newCommits)> FilterExistingCommits(ICollection commits) { Commit? oldestChange = null; + //EF.Parameter forces a single JSON parameter; without it EF 10+ emits one parameter per id and overflows SQLite's parameter limit + var commitIds = commits.Select(c => c.Id); var commitIdsToExclude = await Commits - .Where(c => commits.Select(c => c.Id).Contains(c.Id)) + .Where(c => EF.Parameter(commitIds).Contains(c.Id)) .Select(c => c.Id) .ToArrayAsync(); var newCommits = commits.ExceptBy(commitIdsToExclude, c => c.Id).Select(commit =>