Skip to content

Commit 151dc4b

Browse files
authored
REP-6882 Track mismatch duration (#189)
Verifier historically considered all mismatches coequally significant. For logging this is unhelpful: ideally users should see those mismatches that are recurring in successive generations. This changeset takes a step toward that by tracking how long a given mismatch has appeared in Verifier checks and surfacing the “longest-lived” mismatches in the log. To do this, two pieces of information are now tracked for a mismatch: - when it was first seen - how long the most _recent_ occurrence happened after that first sight (i.e., the mismatch’s “duration”) These get saved into both the mismatch (for logging purposes). The recheck entry tracks just the first-seen time. When Verifier creates a new task from recheck entries, that task also contains the relevant document’s first-mismatch time. _If any non-mismatch rechecks exist for the document, the mismatch time is omitted from the task._ This is because those non-mismatch rechecks will have come from change events, which means the document has changed, so any further mismatch will be a “new” one. (In this case, of course, hopefully the change event means the mismatch is now fixed!) If, on recheck, the document is still mismatched, the mismatch’s duration is recomputed (i.e., the time since the mismatch’s first-seen time), and that new duration is saved with the new recheck entry. (If there’s no mismatch, then there’s no recheck.) Finally: when logging mismatches, Verifier now also shows the mismatch’s duration and sorts the displayed mismatches longest-duration-first. To offset this additional “noise” in the logs, Verifier no longer shows destination namespaces in document mismatch logs. This changeset also includes some tangential-but-opportune logging improvements: - BUG FIX: Verifier no longer tells people to query the tasks collection. Instead, the status line refers to the documentation (which contains an aggregation command for this). - BUG FIX: Verifier no longer logs context cancellation errors when its logging is interrupted. - Logs now distinguish missing-on-dst documents from extra-on-dst. Previously the logs labeled both scenarios as “missing”, which was always confusing. - Log entries’ final status lines are now more nuanced. If all shown logs are 0s in duration, for example, the status line no longer says simply, “Mismatches found”. Instead Verifier now says that all mismatches are “new” and will (hopefully) be resolved. - Some headers are now abbreviated & made more consistent. Ancillary, implementation-level changes: - getDocIdFromComparison() now asserts rather than ignoring errors. (It should never fail.) - This includes a new library to make writing MongoDB aggregations far more ergonomic in Go.
1 parent 00ea91e commit 151dc4b

24 files changed

+1365
-270
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ The verifier will now check to completion to make sure that there are no inconsi
156156
157157
# Investigation of Mismatches
158158
159-
The verifier records any mismatches it finds in its metadata’s `mismatches`
159+
The verifier records mismatches in its metadata’s `mismatches`
160160
collection. Mismatches are indexed by verification task ID. To find a given
161161
generation’s mismatches, aggregate like this on the metadata cluster:
162162
@@ -166,7 +166,7 @@ generation’s mismatches, aggregate like this on the metadata cluster:
166166
db.verification_tasks.aggregate(
167167
{ $match: {
168168
generation: <whichever generation>,
169-
status: "failed",
169+
status: {$in: ["failed", "mismatch"]},
170170
} },
171171
{ $lookup: {
172172
from: "mismatches",

agg/accum/accumulators.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Package accum exposes helper types for accumulation operators.
2+
package accum
3+
4+
import "go.mongodb.org/mongo-driver/v2/bson"
5+
6+
type Sum [1]any
7+
8+
var _ bson.Marshaler = Sum{}
9+
10+
func (s Sum) MarshalBSON() ([]byte, error) {
11+
return bson.Marshal(bson.D{{"$sum", s[0]}})
12+
}
13+
14+
//----------------------------------------------------------------------
15+
16+
type Push [1]any
17+
18+
var _ bson.Marshaler = Push{}
19+
20+
func (p Push) MarshalBSON() ([]byte, error) {
21+
return bson.Marshal(bson.D{{"$push", p[0]}})
22+
}
23+
24+
//----------------------------------------------------------------------
25+
26+
type Max [1]any
27+
28+
var _ bson.Marshaler = Max{}
29+
30+
func (m Max) MarshalBSON() ([]byte, error) {
31+
return bson.Marshal(bson.D{{"$max", m[0]}})
32+
}
33+
34+
//----------------------------------------------------------------------
35+
36+
type FirstN struct {
37+
N any
38+
Input any
39+
}
40+
41+
var _ bson.Marshaler = FirstN{}
42+
43+
func (t FirstN) MarshalBSON() ([]byte, error) {
44+
return bson.Marshal(bson.D{
45+
{"$firstN", bson.D{
46+
{"n", t.N},
47+
{"input", t.Input},
48+
}},
49+
})
50+
}
51+
52+
//----------------------------------------------------------------------
53+
54+
type TopN struct {
55+
N any
56+
SortBy bson.D
57+
Output any
58+
}
59+
60+
var _ bson.Marshaler = TopN{}
61+
62+
func (t TopN) MarshalBSON() ([]byte, error) {
63+
return bson.Marshal(bson.D{
64+
{"$topN", bson.D{
65+
{"n", t.N},
66+
{"sortBy", t.SortBy},
67+
{"output", t.Output},
68+
}},
69+
})
70+
}

agg/agg.go

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
// Package agg provides convenience types for aggregation operators.
2+
// This yields two major advantages over using bson.D or bson.M:
3+
// - simpler syntax
4+
// - auto-completion (i.e., via gopls)
5+
//
6+
// Guiding principles are:
7+
// - Prefer [1]any for 1-arg operators (e.g., `$bsonSize`).
8+
// - Prefer [2]any for binary operators whose arguments don’t benefit
9+
// from naming. (e.g., $eq)
10+
// - Prefer struct types for operators with named parameters.
11+
// - Prefer struct types for operators whose documentation gives names,
12+
// even if those names aren’t sent to the server.
13+
// - Use functions sparingly, e.g., for “tuple” operators like `$in`.
14+
// - Use Go type `any` for generic expressions.
15+
package agg
16+
17+
import (
18+
"go.mongodb.org/mongo-driver/v2/bson"
19+
)
20+
21+
type Eq [2]any
22+
23+
var _ bson.Marshaler = Eq{}
24+
25+
func (e Eq) MarshalBSON() ([]byte, error) {
26+
return bson.Marshal(bson.D{{"$eq", [2]any(e)}})
27+
}
28+
29+
// ---------------------------------------------
30+
31+
type Gt [2]any
32+
33+
func (g Gt) MarshalBSON() ([]byte, error) {
34+
return bson.Marshal(bson.D{{"$gt", [2]any(g)}})
35+
}
36+
37+
// ---------------------------------------------
38+
39+
func In[T any](needle any, haystack []T) bson.D {
40+
return bson.D{{"$in", bson.A{needle, haystack}}}
41+
}
42+
43+
// ---------------------------------------------
44+
45+
type BSONSize [1]any
46+
47+
var _ bson.Marshaler = BSONSize{}
48+
49+
func (b BSONSize) MarshalBSON() ([]byte, error) {
50+
return bson.Marshal(bson.D{{"$bsonSize", b[0]}})
51+
}
52+
53+
// ---------------------------------------------
54+
55+
type Type [1]any
56+
57+
var _ bson.Marshaler = Type{}
58+
59+
func (t Type) MarshalBSON() ([]byte, error) {
60+
return bson.Marshal(bson.D{{"$type", t[0]}})
61+
}
62+
63+
// ---------------------------------------------
64+
65+
type Not [1]any
66+
67+
var _ bson.Marshaler = Not{}
68+
69+
func (n Not) MarshalBSON() ([]byte, error) {
70+
return bson.Marshal(bson.D{{"$not", n[0]}})
71+
}
72+
73+
// ---------------------------------------------
74+
75+
type And []any
76+
77+
var _ bson.Marshaler = And{}
78+
79+
func (a And) MarshalBSON() ([]byte, error) {
80+
return bson.Marshal(bson.D{
81+
{"$and", []any(a)},
82+
})
83+
}
84+
85+
// ---------------------------------------------
86+
87+
type Or []any
88+
89+
var _ bson.Marshaler = Or{}
90+
91+
func (o Or) MarshalBSON() ([]byte, error) {
92+
return bson.Marshal(bson.D{
93+
{"$or", []any(o)},
94+
})
95+
}
96+
97+
// ---------------------------------------------
98+
99+
type MergeObjects []any
100+
101+
var _ bson.Marshaler = MergeObjects{}
102+
103+
func (m MergeObjects) MarshalBSON() ([]byte, error) {
104+
return bson.Marshal(bson.D{
105+
{"$mergeObjects", []any(m)},
106+
})
107+
}
108+
109+
// ---------------------------------------------
110+
111+
type GetField struct {
112+
Input, Field any
113+
}
114+
115+
var _ bson.Marshaler = GetField{}
116+
117+
func (gf GetField) MarshalBSON() ([]byte, error) {
118+
return bson.Marshal(
119+
bson.D{
120+
{"$getField", bson.D{
121+
{"input", gf.Input},
122+
{"field", gf.Field},
123+
}},
124+
},
125+
)
126+
}
127+
128+
// ---------------------------------------------
129+
130+
type Cond struct {
131+
If, Then, Else any
132+
}
133+
134+
var _ bson.Marshaler = Cond{}
135+
136+
func (c Cond) D() bson.D {
137+
return bson.D{
138+
{"$cond", bson.D{
139+
{"if", c.If},
140+
{"then", c.Then},
141+
{"else", c.Else},
142+
}},
143+
}
144+
}
145+
146+
func (c Cond) MarshalBSON() ([]byte, error) {
147+
return bson.Marshal(c.D())
148+
}
149+
150+
// ---------------------------------------------
151+
152+
type Switch struct {
153+
Branches []SwitchCase
154+
Default any
155+
}
156+
157+
var _ bson.Marshaler = Switch{}
158+
159+
type SwitchCase struct {
160+
Case any
161+
Then any
162+
}
163+
164+
func (s Switch) D() bson.D {
165+
return bson.D{{"$switch", bson.D{
166+
{"branches", s.Branches},
167+
{"default", s.Default},
168+
}}}
169+
}
170+
171+
func (s Switch) MarshalBSON() ([]byte, error) {
172+
return bson.Marshal(s.D())
173+
}
174+
175+
// ---------------------------------------------
176+
177+
type Map struct {
178+
Input, As, In any
179+
}
180+
181+
var _ bson.Marshaler = Map{}
182+
183+
func (m Map) D() bson.D {
184+
return bson.D{
185+
{"$map", bson.D{
186+
{"input", m.Input},
187+
{"as", m.As},
188+
{"in", m.In},
189+
}},
190+
}
191+
}
192+
193+
func (m Map) MarshalBSON() ([]byte, error) {
194+
return bson.Marshal(m.D())
195+
}
196+
197+
// ------------------------------------------
198+
199+
type Filter struct {
200+
Input, As, Cond, Limit any
201+
}
202+
203+
var _ bson.Marshaler = Filter{}
204+
205+
func (f Filter) D() bson.D {
206+
d := bson.D{
207+
{"input", f.Input},
208+
{"as", f.As},
209+
{"cond", f.Cond},
210+
}
211+
212+
if f.Limit != nil {
213+
d = append(d, bson.E{"limit", f.Limit})
214+
}
215+
return bson.D{{"$filter", d}}
216+
}
217+
218+
func (f Filter) MarshalBSON() ([]byte, error) {
219+
return bson.Marshal(f.D())
220+
}

agg/array.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package agg
2+
3+
import (
4+
"slices"
5+
6+
"go.mongodb.org/mongo-driver/v2/bson"
7+
)
8+
9+
type Slice struct {
10+
Array any
11+
Position *any
12+
N any
13+
}
14+
15+
func (s Slice) MarshalBSON() ([]byte, error) {
16+
args := []any{s.Array, s.N}
17+
if s.Position != nil {
18+
args = slices.Insert(args, 1, *s.Position)
19+
}
20+
21+
return bson.Marshal(bson.D{
22+
{"$slice", args},
23+
})
24+
}
25+
26+
type ArrayElemAt struct {
27+
Array any
28+
Index any
29+
}
30+
31+
var _ bson.Marshaler = ArrayElemAt{}
32+
33+
func (a ArrayElemAt) D() bson.D {
34+
return bson.D{{"$arrayElemAt", bson.A{
35+
a.Array,
36+
a.Index,
37+
}}}
38+
}
39+
40+
func (a ArrayElemAt) MarshalBSON() ([]byte, error) {
41+
return bson.Marshal(a.D())
42+
}

agg/helpers/exist.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Package helpers exposes functions that express common operations
2+
// that don’t map to a single aggregation operator.
3+
package helpers
4+
5+
import (
6+
"github.com/10gen/migration-verifier/agg"
7+
"go.mongodb.org/mongo-driver/v2/bson"
8+
)
9+
10+
type Exists [1]any
11+
12+
func (e Exists) MarshalBSON() ([]byte, error) {
13+
return bson.Marshal(agg.Not{agg.Eq{"missing", agg.Type{e[0]}}})
14+
}

agg/math.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package agg
2+
3+
import "go.mongodb.org/mongo-driver/v2/bson"
4+
5+
type Subtract [2]any
6+
7+
var _ bson.Marshaler = Subtract{}
8+
9+
func (s Subtract) MarshalBSON() ([]byte, error) {
10+
return bson.Marshal(bson.D{{"$subtract", [2]any(s)}})
11+
}

0 commit comments

Comments
 (0)