diff --git a/benchmark_test.go b/benchmark_test.go
index 8c85d27e..62153322 100644
--- a/benchmark_test.go
+++ b/benchmark_test.go
@@ -1356,3 +1356,65 @@ func BenchmarkCardinalityInRange(b *testing.B) {
 		}
 	}
 }
+
+// BenchmarkFastOrRunContainers measures FastOr over inputs containing
+// runContainer16 slots (the shape AddRange and RunOptimize produce),
+// exercising the runContainer16 -> bitmapContainer pre-promotion in
+// (*Bitmap).lazyOR. See issue #81.
+//
+// go test -bench BenchmarkFastOrRunContainers -benchmem -run - -benchtime=2s
+func BenchmarkFastOrRunContainers(b *testing.B) {
+	const numBitmaps = 15
+	const blocksPerBitmap = 40 // each block = 1<<16 bits
+	const runsPerBlock = 8
+
+	rng := rand.New(rand.NewSource(42))
+	bms := make([]*Bitmap, numBitmaps)
+	for i := 0; i < numBitmaps; i++ {
+		bm := NewBitmap()
+		for blk := 0; blk < blocksPerBitmap; blk++ {
+			base := uint64(blk) << 16
+			offset := uint64(rng.Intn(1000))
+			for r := 0; r < runsPerBlock; r++ {
+				start := base + offset + uint64(r)*8000 + uint64(i*37)
+				end := start + 6000
+				blockEnd := base + (1 << 16)
+				if end > blockEnd {
+					end = blockEnd
+				}
+				if start >= blockEnd {
+					break
+				}
+				bm.AddRange(start, end)
+			}
+		}
+		bm.RunOptimize()
+		bms[i] = bm
+	}
+
+	// Sanity-check: the workload must actually contain runContainer16,
+	// otherwise the bench wouldn't exercise the patched path.
+	hasRunContainer := false
+	for _, bm := range bms {
+		for _, c := range bm.highlowcontainer.containers {
+			if _, ok := c.(*runContainer16); ok {
+				hasRunContainer = true
+				break
+			}
+		}
+		if hasRunContainer {
+			break
+		}
+	}
+	if !hasRunContainer {
+		b.Fatalf("workload did not produce any runContainer16; bench would not exercise the patched path")
+	}
+
+	b.ResetTimer()
+	for n := 0; n < b.N; n++ {
+		res := FastOr(bms...)
+		if res.GetCardinality() == 0 {
+			b.Fatal("unexpected empty result")
+		}
+	}
+}
diff --git a/fastaggregation.go b/fastaggregation.go
index 7d0a92fe..3881b88a 100644
--- a/fastaggregation.go
+++ b/fastaggregation.go
@@ -81,6 +81,13 @@ main:
 				s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
 			} else {
 				c1 := x1.highlowcontainer.getWritableContainerAtIndex(pos1)
+				// runContainer16.lazyIOR falls back to a slow ior path
+				// (O(N log R) per merged element); promote to bitmapContainer
+				// first, whose lazy union is O(1024) regardless of cardinality.
+				// See https://github.com/RoaringBitmap/roaring/issues/81.
+				if rc, ok := c1.(*runContainer16); ok && !rc.isFull() {
+					c1 = rc.toBitmapContainer()
+				}
 				x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2))
 				x1.highlowcontainer.needCopyOnWrite[pos1] = false
 				pos1++