From 7fb2db7a66435cf29294e88ca7c14f86e0312ea6 Mon Sep 17 00:00:00 2001
From: Daniel Frankcom <frankcom@amazon.com>
Date: Wed, 8 Apr 2026 11:16:05 -0700
Subject: [PATCH 1/6] Add $match stage tests

Add parametrized test cases for the $match aggregation stage,
organized into files by property group:

- test_match_core.py: null/missing, equality, predicates, empty/large
- test_match_query_operators.py: one representative per query category
- test_match_stage_position.py: pipeline position and $text first-stage
- test_match_errors.py: argument validation and restricted operators

Replace test_match_stage.py with test_smoke_match.py. Add StageTestCase
dataclass and new error codes used by the tests.

Signed-off-by: Daniel Frankcom <frankcom@amazon.com>
---
 .../core/operator/stages/match/__init__.py    |   0
 .../operator/stages/match/test_match_core.py  | 241 ++++++++++++++++++
 .../stages/match/test_match_errors.py         | 203 +++++++++++++++
 .../match/test_match_query_operators.py       | 129 ++++++++++
 .../operator/stages/match/test_match_stage.py |  92 -------
 .../stages/match/test_match_stage_position.py | 180 +++++++++++++
 .../operator/stages/match/test_smoke_match.py |  38 +++
 .../core/operator/stages/utils/__init__.py    |   0
 .../operator/stages/utils/stage_test_case.py  |  17 ++
 documentdb_tests/framework/error_codes.py     |   3 +
 10 files changed, 811 insertions(+), 92 deletions(-)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
 delete mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
new file mode 100644
index 00000000..811e51e6
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
@@ -0,0 +1,241 @@
+"""Tests for $match core matching behavior."""
+
+from __future__ import annotations
+
+import pytest
+from bson.son import SON
+
+from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import (
+    StageTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Null and Missing Field Matching]: {field: null} matches both
+# documents where the field is null-valued and documents where the field is
+# missing entirely.
+MATCH_NULL_MISSING_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "null_matches_null_valued",
+        docs=[{"_id": 1, "a": None}, {"_id": 2, "a": "x"}],
+        pipeline=[{"$match": {"a": None}}],
+        expected=[{"_id": 1, "a": None}],
+        msg="$match with {field: null} should match documents where the field is null",
+    ),
+    StageTestCase(
+        "null_matches_missing_field",
+        docs=[{"_id": 1, "a": "x"}, {"_id": 2}],
+        pipeline=[{"$match": {"a": None}}],
+        expected=[{"_id": 2}],
+        msg="$match with {field: null} should match documents where the field is missing",
+    ),
+    StageTestCase(
+        "null_matches_both_null_and_missing",
+        docs=[
+            {"_id": 1, "a": None},
+            {"_id": 2, "a": "x"},
+            {"_id": 3},
+            {"_id": 4, "a": 0},
+        ],
+        pipeline=[{"$match": {"a": None}}],
+        expected=[{"_id": 1, "a": None}, {"_id": 3}],
+        msg="$match with {field: null} should match both null-valued and missing-field documents",
+    ),
+    StageTestCase(
+        "null_excludes_falsy_values",
+        docs=[
+            {"_id": 1, "a": None},
+            {"_id": 2, "a": 0},
+            {"_id": 3, "a": False},
+            {"_id": 4, "a": ""},
+        ],
+        pipeline=[{"$match": {"a": None}}],
+        expected=[{"_id": 1, "a": None}],
+        msg="$match with {field: null} should not match falsy non-null values",
+    ),
+]
+
+# Property [Core Matching Behavior]: simple equality filtering, insertion
+# order preservation, $comment transparency, and contradictory conditions
+# returning empty results all work correctly.
+MATCH_CORE_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "core_equality_single_match",
+        docs=[
+            {"_id": 1, "a": 10, "b": "x"},
+            {"_id": 2, "a": 20, "b": "y"},
+            {"_id": 3, "a": 10, "b": "z"},
+        ],
+        pipeline=[{"$match": {"a": 10}}],
+        expected=[
+            {"_id": 1, "a": 10, "b": "x"},
+            {"_id": 3, "a": 10, "b": "z"},
+        ],
+        msg="$match should filter documents to those where the field equals the value",
+    ),
+    StageTestCase(
+        "core_insertion_order",
+        docs=[
+            {"_id": 3, "a": 1},
+            {"_id": 1, "a": 1},
+            {"_id": 2, "a": 1},
+        ],
+        pipeline=[{"$match": {"a": 1}}],
+        expected=[
+            {"_id": 3, "a": 1},
+            {"_id": 1, "a": 1},
+            {"_id": 2, "a": 1},
+        ],
+        msg="$match should return documents in insertion order, not sorted by _id",
+    ),
+    StageTestCase(
+        "core_empty_collection",
+        docs=[],
+        pipeline=[{"$match": {"a": 1}}],
+        expected=[],
+        msg="$match on empty collection should return empty result",
+    ),
+]
+
+# Property [Predicate Semantics]: $match correctly handles non-obvious
+# predicate edge cases that could differ between compatible engines.
+MATCH_PREDICATE_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "predicate_comment_ignored",
+        docs=[
+            {"_id": 1, "a": 10},
+            {"_id": 2, "a": 20},
+            {"_id": 3, "a": 10},
+        ],
+        pipeline=[{"$match": {"a": 10, "$comment": "this is a comment"}}],
+        expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}],
+        msg="$match should ignore $comment in the predicate and filter normally",
+    ),
+    StageTestCase(
+        "predicate_contradictory_empty",
+        docs=[
+            {"_id": 1, "a": 3},
+            {"_id": 2, "a": 7},
+            {"_id": 3, "a": 15},
+        ],
+        pipeline=[{"$match": {"a": {"$gt": 10, "$lt": 5}}}],
+        expected=[],
+        msg="$match with contradictory conditions should return empty result without error",
+    ),
+    StageTestCase(
+        "predicate_dollar_string_literal",
+        docs=[
+            {"_id": 1, "a": "$notAFieldRef"},
+            {"_id": 2, "a": "hello"},
+        ],
+        pipeline=[{"$match": {"a": "$notAFieldRef"}}],
+        expected=[{"_id": 1, "a": "$notAFieldRef"}],
+        msg="$match should treat $-prefixed strings as literal values, not field references",
+    ),
+    StageTestCase(
+        "predicate_duplicate_field_last_wins_equality",
+        docs=[
+            {"_id": 1, "a": 10},
+            {"_id": 2, "a": 20},
+        ],
+        pipeline=[{"$match": SON([("a", 10), ("a", 20)])}],
+        expected=[{"_id": 2, "a": 20}],
+        msg="$match with duplicate field names should use last-value-wins semantics",
+    ),
+    StageTestCase(
+        "predicate_duplicate_field_last_wins_operator",
+        docs=[
+            {"_id": 1, "a": 10},
+            {"_id": 2, "a": 20},
+        ],
+        pipeline=[{"$match": SON([("a", {"$gt": 15}), ("a", {"$lt": 15})])}],
+        expected=[{"_id": 1, "a": 10}],
+        msg="$match with duplicate operator predicates should use last-value-wins, not AND",
+    ),
+    StageTestCase(
+        "predicate_dot_notation",
+        docs=[
+            {"_id": 1, "a": {"b": 10}},
+            {"_id": 2, "a": {"b": 20}},
+        ],
+        pipeline=[{"$match": {"a.b": 10}}],
+        expected=[{"_id": 1, "a": {"b": 10}}],
+        msg="$match should support dot notation to match nested fields",
+    ),
+]
+
+# Property [Empty Predicate]: {$match: {}} returns all documents, and an
+# empty collection always returns an empty result.
+MATCH_EMPTY_PREDICATE_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "empty_predicate_returns_all",
+        docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}],
+        pipeline=[{"$match": {}}],
+        expected=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}],
+        msg="$match with empty predicate should return all documents",
+    ),
+    StageTestCase(
+        "empty_collection_empty_predicate",
+        docs=[],
+        pipeline=[{"$match": {}}],
+        expected=[],
+        msg="$match on empty collection with empty predicate should return empty result",
+    ),
+]
+
+# Property [Large Predicate]: $match handles predicates with many conditions
+# without error.
+MATCH_LARGE_PREDICATE_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "large_many_field_conditions",
+        docs=[
+            {"_id": 1, **{f"f{i}": i for i in range(500)}},
+            {"_id": 2, "f0": 0},
+        ],
+        pipeline=[{"$match": {f"f{i}": i for i in range(500)}}],
+        expected=[{"_id": 1, **{f"f{i}": i for i in range(500)}}],
+        msg="$match should handle a predicate with 500 field conditions",
+    ),
+    StageTestCase(
+        "large_many_or_branches",
+        docs=[
+            {"_id": 1, "a": 0},
+            {"_id": 2, "a": 499},
+            {"_id": 3, "a": 999},
+        ],
+        pipeline=[{"$match": {"$or": [{"a": i} for i in range(500)]}}],
+        expected=[{"_id": 1, "a": 0}, {"_id": 2, "a": 499}],
+        msg="$match should handle an $or predicate with 500 branches",
+    ),
+]
+
+MATCH_CORE_TESTS_ALL = (
+    MATCH_NULL_MISSING_TESTS
+    + MATCH_CORE_TESTS
+    + MATCH_PREDICATE_TESTS
+    + MATCH_EMPTY_PREDICATE_TESTS
+    + MATCH_LARGE_PREDICATE_TESTS
+)
+
+
+@pytest.mark.aggregate
+@pytest.mark.parametrize("test_case", pytest_params(MATCH_CORE_TESTS_ALL))
+def test_match_core_cases(collection, test_case: StageTestCase):
+    """Test $match core matching behavior."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {
+            "aggregate": collection.name,
+            "pipeline": test_case.pipeline,
+            "cursor": {},
+        },
+    )
+    assertResult(
+        result,
+        expected=test_case.expected,
+        error_code=test_case.error_code,
+        msg=test_case.msg,
+    )
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py
new file mode 100644
index 00000000..2be8a321
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py
@@ -0,0 +1,203 @@
+"""Tests for $match argument validation and restricted operator errors."""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+import pytest
+from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp
+
+from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import (
+    StageTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.error_codes import (
+    BAD_VALUE_ERROR,
+    MATCH_FILTER_NOT_OBJECT_ERROR,
+    MATCH_TEXT_NOT_FIRST_STAGE_ERROR,
+    NEAR_NOT_ALLOWED_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Argument Validation Errors]: non-document arguments to $match
+# produce an error.
+MATCH_ARGUMENT_VALIDATION_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "arg_error_string",
+        pipeline=[{"$match": "hello"}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a string argument",
+    ),
+    StageTestCase(
+        "arg_error_int",
+        pipeline=[{"$match": 42}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject an integer argument",
+    ),
+    StageTestCase(
+        "arg_error_float",
+        pipeline=[{"$match": 3.14}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a float argument",
+    ),
+    StageTestCase(
+        "arg_error_bool",
+        pipeline=[{"$match": True}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a boolean argument",
+    ),
+    StageTestCase(
+        "arg_error_null",
+        pipeline=[{"$match": None}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a null argument",
+    ),
+    StageTestCase(
+        "arg_error_array",
+        pipeline=[{"$match": [1, 2]}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject an array argument",
+    ),
+    StageTestCase(
+        "arg_error_int64",
+        pipeline=[{"$match": Int64(42)}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject an int64 argument",
+    ),
+    StageTestCase(
+        "arg_error_decimal128",
+        pipeline=[{"$match": Decimal128("3.14")}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a decimal128 argument",
+    ),
+    StageTestCase(
+        "arg_error_objectid",
+        pipeline=[{"$match": ObjectId()}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject an ObjectId argument",
+    ),
+    StageTestCase(
+        "arg_error_datetime",
+        pipeline=[{"$match": datetime(2024, 1, 1)}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a datetime argument",
+    ),
+    StageTestCase(
+        "arg_error_binary",
+        pipeline=[{"$match": Binary(b"\x00\x01")}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a binary argument",
+    ),
+    StageTestCase(
+        "arg_error_regex",
+        pipeline=[{"$match": Regex("^abc")}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a regex argument",
+    ),
+    StageTestCase(
+        "arg_error_timestamp",
+        pipeline=[{"$match": Timestamp(0, 0)}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a timestamp argument",
+    ),
+    StageTestCase(
+        "arg_error_minkey",
+        pipeline=[{"$match": MinKey()}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a MinKey argument",
+    ),
+    StageTestCase(
+        "arg_error_maxkey",
+        pipeline=[{"$match": MaxKey()}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a MaxKey argument",
+    ),
+    StageTestCase(
+        "arg_error_code",
+        pipeline=[{"$match": Code("function(){}")}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a JavaScript code argument",
+    ),
+    StageTestCase(
+        "arg_error_code_with_scope",
+        pipeline=[{"$match": Code("function(){}", {"x": 1})}],
+        error_code=MATCH_FILTER_NOT_OBJECT_ERROR,
+        msg="$match should reject a JavaScript code with scope argument",
+    ),
+]
+
+# Property [Restricted Operator Errors]: $where, $near, $nearSphere, and
+# unknown $-prefixed top-level operators are rejected inside $match; $text is
+# rejected when $match is not the first pipeline stage.
+MATCH_RESTRICTED_OPERATOR_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "restricted_where",
+        docs=[{"_id": 1, "a": 10}],
+        pipeline=[{"$match": {"$where": "true"}}],
+        error_code=BAD_VALUE_ERROR,
+        msg="$match should reject $where in the predicate",
+    ),
+    StageTestCase(
+        "restricted_near",
+        docs=[{"_id": 1, "loc": [0, 0]}],
+        pipeline=[{"$match": {"loc": {"$near": [0, 0]}}}],
+        error_code=NEAR_NOT_ALLOWED_ERROR,
+        msg="$match should reject $near in the predicate",
+    ),
+    StageTestCase(
+        "restricted_near_sphere",
+        docs=[{"_id": 1, "loc": [0, 0]}],
+        pipeline=[{"$match": {"loc": {"$nearSphere": [0, 0]}}}],
+        error_code=NEAR_NOT_ALLOWED_ERROR,
+        msg="$match should reject $nearSphere in the predicate",
+    ),
+    StageTestCase(
+        "restricted_geo_near",
+        docs=[{"_id": 1, "loc": [0, 0]}],
+        pipeline=[{"$match": {"loc": {"$geoNear": {"near": [0, 0]}}}}],
+        error_code=NEAR_NOT_ALLOWED_ERROR,
+        msg="$match should reject $geoNear in the predicate",
+    ),
+    StageTestCase(
+        "restricted_text_non_first_stage",
+        docs=[{"_id": 1, "a": 10}],
+        pipeline=[
+            {"$project": {"a": 1}},
+            {"$match": {"$text": {"$search": "hello"}}},
+        ],
+        error_code=MATCH_TEXT_NOT_FIRST_STAGE_ERROR,
+        msg="$match should reject $text when it is not the first pipeline stage",
+    ),
+    StageTestCase(
+        "restricted_unknown_dollar_operator",
+        docs=[{"_id": 1, "a": 10}],
+        pipeline=[{"$match": {"$fakeOperator": 1}}],
+        error_code=BAD_VALUE_ERROR,
+        msg="$match should reject an unknown $-prefixed top-level operator",
+    ),
+]
+
+MATCH_ERROR_TESTS_ALL = MATCH_ARGUMENT_VALIDATION_TESTS + MATCH_RESTRICTED_OPERATOR_TESTS
+
+
+@pytest.mark.aggregate
+@pytest.mark.parametrize("test_case", pytest_params(MATCH_ERROR_TESTS_ALL))
+def test_match_error_cases(collection, test_case: StageTestCase):
+    """Test $match argument validation and restricted operator errors."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {
+            "aggregate": collection.name,
+            "pipeline": test_case.pipeline,
+            "cursor": {},
+        },
+    )
+    assertResult(
+        result,
+        expected=test_case.expected,
+        error_code=test_case.error_code,
+        msg=test_case.msg,
+    )
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
new file mode 100644
index 00000000..34ed842c
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
@@ -0,0 +1,129 @@
+"""Tests for $match query operator categories."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import (
+    StageTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Query Operator Categories]: one representative per query operator
+# category functions correctly inside $match as a container.
+MATCH_QUERY_OPERATOR_TESTS: list[StageTestCase] = [
+    # Comparison operators.
+    StageTestCase(
+        "query_comparison_gt",
+        docs=[
+            {"_id": 1, "a": 3},
+            {"_id": 2, "a": 7},
+            {"_id": 3, "a": 10},
+        ],
+        pipeline=[{"$match": {"a": {"$gt": 5}}}],
+        expected=[{"_id": 2, "a": 7}, {"_id": 3, "a": 10}],
+        msg="$match should support comparison query operators",
+    ),
+    # Logical operators.
+    StageTestCase(
+        "query_logical_or",
+        docs=[
+            {"_id": 1, "a": 1, "b": 10},
+            {"_id": 2, "a": 2, "b": 20},
+            {"_id": 3, "a": 3, "b": 30},
+        ],
+        pipeline=[{"$match": {"$or": [{"a": 1}, {"b": 30}]}}],
+        expected=[
+            {"_id": 1, "a": 1, "b": 10},
+            {"_id": 3, "a": 3, "b": 30},
+        ],
+        msg="$match should support logical query operators",
+    ),
+    # Element operators.
+    StageTestCase(
+        "query_element_exists",
+        docs=[{"_id": 1, "a": 10}, {"_id": 2, "b": 20}, {"_id": 3, "a": 30}],
+        pipeline=[{"$match": {"a": {"$exists": True}}}],
+        expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 30}],
+        msg="$match should support element query operators",
+    ),
+    # Evaluation operators.
+    StageTestCase(
+        "query_eval_expr",
+        docs=[
+            {"_id": 1, "a": 10, "b": 10},
+            {"_id": 2, "a": 20, "b": 30},
+            {"_id": 3, "a": 5, "b": 5},
+        ],
+        pipeline=[{"$match": {"$expr": {"$eq": ["$a", "$b"]}}}],
+        expected=[
+            {"_id": 1, "a": 10, "b": 10},
+            {"_id": 3, "a": 5, "b": 5},
+        ],
+        msg="$match should support evaluation query operators",
+    ),
+    # Array operators.
+    StageTestCase(
+        "query_array_elemmatch",
+        docs=[
+            {"_id": 1, "arr": [0.5, 0.8, 0.95]},
+            {"_id": 2, "arr": [0.1, 0.3]},
+            {"_id": 3, "arr": [0.9, 1.0]},
+        ],
+        pipeline=[{"$match": {"arr": {"$elemMatch": {"$gte": 0.9}}}}],
+        expected=[
+            {"_id": 1, "arr": [0.5, 0.8, 0.95]},
+            {"_id": 3, "arr": [0.9, 1.0]},
+        ],
+        msg="$match should support array query operators",
+    ),
+    # Bitwise operators.
+    StageTestCase(
+        "query_bitwise_bitsallset",
+        docs=[
+            {"_id": 1, "flags": 7},
+            {"_id": 2, "flags": 3},
+            {"_id": 3, "flags": 15},
+        ],
+        # Bitmask 5 (binary 0101): flags 7 (0111) and 15 (1111) match.
+        pipeline=[{"$match": {"flags": {"$bitsAllSet": 5}}}],
+        expected=[{"_id": 1, "flags": 7}, {"_id": 3, "flags": 15}],
+        msg="$match should support bitwise query operators",
+    ),
+    # Geospatial operators.
+    StageTestCase(
+        "query_geo_geowithin",
+        docs=[
+            {"_id": 1, "loc": [0, 0]},
+            {"_id": 2, "loc": [50, 50]},
+            {"_id": 3, "loc": [1, 1]},
+        ],
+        pipeline=[{"$match": {"loc": {"$geoWithin": {"$center": [[0, 0], 10]}}}}],
+        expected=[{"_id": 1, "loc": [0, 0]}, {"_id": 3, "loc": [1, 1]}],
+        msg="$match should support geospatial query operators",
+    ),
+]
+
+
+@pytest.mark.aggregate
+@pytest.mark.parametrize("test_case", pytest_params(MATCH_QUERY_OPERATOR_TESTS))
+def test_match_query_operator_cases(collection, test_case: StageTestCase):
+    """Test $match query operator categories."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {
+            "aggregate": collection.name,
+            "pipeline": test_case.pipeline,
+            "cursor": {},
+        },
+    )
+    assertResult(
+        result,
+        expected=test_case.expected,
+        error_code=test_case.error_code,
+        msg=test_case.msg,
+    )
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py
deleted file mode 100644
index 256a62fc..00000000
--- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-Aggregation $match stage tests.
-
-Tests for the $match stage in aggregation pipelines.
-"""
-
-import pytest
-
-from documentdb_tests.framework.assertions import assertSuccess
-from documentdb_tests.framework.executor import execute_command
-
-
-@pytest.mark.aggregate
-@pytest.mark.smoke
-def test_match_simple_filter(collection):
-    """Test $match stage with simple equality filter."""
-    collection.insert_many(
-        [
-            {"_id": 0, "a": "A", "b": 30, "c": "active"},
-            {"_id": 1, "a": "B", "b": 25, "c": "active"},
-            {"_id": 2, "a": "C", "b": 35, "c": "inactive"},
-        ]
-    )
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": [{"$match": {"c": "active"}}], "cursor": {}},
-    )
-
-    expected = [
-        {"_id": 0, "a": "A", "b": 30, "c": "active"},
-        {"_id": 1, "a": "B", "b": 25, "c": "active"},
-    ]
-    assertSuccess(result, expected, "Should match active documents")
-
-
-@pytest.mark.aggregate
-def test_match_with_comparison_operator(collection):
-    """Test $match stage with comparison operators."""
-    collection.insert_many(
-        [
-            {"_id": 0, "a": "A", "b": 30},
-            {"_id": 1, "a": "B", "b": 25},
-            {"_id": 2, "a": "C", "b": 35},
-        ]
-    )
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": [{"$match": {"b": {"$gt": 25}}}], "cursor": {}},
-    )
-
-    expected = [{"_id": 0, "a": "A", "b": 30}, {"_id": 2, "a": "C", "b": 35}]
-    assertSuccess(result, expected, "Should match documents with b > 25")
-
-
-@pytest.mark.aggregate
-def test_match_multiple_conditions(collection):
-    """Test $match stage with multiple filter conditions."""
-    collection.insert_many(
-        [
-            {"_id": 0, "a": "A", "b": 30, "c": "NYC"},
-            {"_id": 1, "a": "B", "b": 25, "c": "SF"},
-            {"_id": 2, "a": "C", "b": 35, "c": "SF"},
-        ]
-    )
-    result = execute_command(
-        collection,
-        {
-            "aggregate": collection.name,
-            "pipeline": [{"$match": {"c": "NYC", "b": {"$gte": 30}}}],
-            "cursor": {},
-        },
-    )
-
-    expected = [{"_id": 0, "a": "A", "b": 30, "c": "NYC"}]
-    assertSuccess(result, expected, "Should match multiple conditions")
-
-
-@pytest.mark.aggregate
-def test_match_empty_result(collection):
-    """Test $match stage that matches no documents."""
-    collection.insert_many(
-        [
-            {"_id": 0, "a": "A", "b": "active"},
-            {"_id": 1, "a": "B", "b": "active"},
-        ]
-    )
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": [{"$match": {"b": "inactive"}}], "cursor": {}},
-    )
-
-    assertSuccess(result, [], "Should return empty result when no match")
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
new file mode 100644
index 00000000..ef7b43ec
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
@@ -0,0 +1,180 @@
+"""Tests for $match pipeline position behavior."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import (
+    StageTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Pipeline Position]: $match filters correctly regardless of its
+# position in the pipeline and composes with preceding stages that reshape
+# documents.
+MATCH_PIPELINE_POSITION_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "pipeline_first_stage",
+        docs=[
+            {"_id": 1, "a": 10},
+            {"_id": 2, "a": 20},
+            {"_id": 3, "a": 10},
+        ],
+        pipeline=[{"$match": {"a": 10}}],
+        expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}],
+        msg="$match should work as the first stage of a pipeline",
+    ),
+    StageTestCase(
+        "pipeline_middle_stage",
+        docs=[
+            {"_id": 1, "a": 10, "b": "x"},
+            {"_id": 2, "a": 20, "b": "y"},
+            {"_id": 3, "a": 10, "b": "z"},
+        ],
+        pipeline=[
+            {"$project": {"a": 1}},
+            {"$match": {"a": 10}},
+            {"$project": {"a": 1}},
+        ],
+        expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}],
+        msg="$match should work as a middle stage of a pipeline",
+    ),
+    StageTestCase(
+        "pipeline_last_stage",
+        docs=[
+            {"_id": 1, "a": 10},
+            {"_id": 2, "a": 20},
+            {"_id": 3, "a": 10},
+        ],
+        pipeline=[
+            {"$project": {"a": 1}},
+            {"$match": {"a": 10}},
+        ],
+        expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}],
+        msg="$match should work as the last stage of a pipeline",
+    ),
+    StageTestCase(
+        "pipeline_consecutive_match",
+        docs=[
+            {"_id": 1, "a": 10, "b": 1},
+            {"_id": 2, "a": 20, "b": 2},
+            {"_id": 3, "a": 10, "b": 3},
+        ],
+        pipeline=[
+            {"$match": {"a": 10}},
+            {"$match": {"b": 3}},
+        ],
+        expected=[{"_id": 3, "a": 10, "b": 3}],
+        msg="$match consecutive stages should compose like $and of all predicates",
+    ),
+    StageTestCase(
+        "pipeline_after_reshape_dropped_field",
+        docs=[
+            {"_id": 1, "a": 10, "b": "x"},
+            {"_id": 2, "a": 20, "b": "y"},
+        ],
+        pipeline=[
+            {"$project": {"b": 1}},
+            {"$match": {"a": 10}},
+        ],
+        expected=[],
+        msg="$match on a field dropped by a preceding stage should return empty",
+    ),
+    StageTestCase(
+        "pipeline_after_reshape_computed_field",
+        docs=[
+            {"_id": 1, "a": 10},
+            {"_id": 2, "a": 20},
+        ],
+        pipeline=[
+            {"$project": {"doubled": {"$multiply": ["$a", 2]}}},
+            {"$match": {"doubled": 40}},
+        ],
+        expected=[{"_id": 2, "doubled": 40}],
+        msg="$match should filter on fields computed by a preceding stage",
+    ),
+    StageTestCase(
+        "pipeline_after_aggregation_computed_field",
+        docs=[
+            {"_id": 1, "cat": "a", "val": 5},
+            {"_id": 2, "cat": "b", "val": 3},
+            {"_id": 3, "cat": "a", "val": 7},
+        ],
+        pipeline=[
+            {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}},
+            {"$match": {"total": 12}},
+        ],
+        expected=[{"_id": "a", "total": 12}],
+        msg="$match should filter on fields produced by an aggregation stage",
+    ),
+    StageTestCase(
+        "pipeline_after_aggregation_dropped_field",
+        docs=[
+            {"_id": 1, "cat": "a", "val": 5},
+            {"_id": 2, "cat": "b", "val": 3},
+        ],
+        pipeline=[
+            {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}},
+            {"$match": {"val": 5}},
+        ],
+        expected=[],
+        msg="$match on a field absent from aggregation output should return empty",
+    ),
+    StageTestCase(
+        "pipeline_after_root_replacement",
+        docs=[
+            {"_id": 1, "inner": {"x": 10}},
+            {"_id": 2, "inner": {"x": 20}},
+        ],
+        pipeline=[
+            {"$replaceRoot": {"newRoot": "$inner"}},
+            {"$match": {"x": 10}},
+        ],
+        expected=[{"x": 10}],
+        msg="$match should filter on the document shape produced by a root replacement stage",
+    ),
+]
+
+# Property [$text First-Stage Behavior]: $text search works inside $match when
+# $match is the first stage of the pipeline.
+MATCH_TEXT_FIRST_STAGE_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "text_first_stage",
+        docs=[
+            {"_id": 1, "content": "hello world"},
+            {"_id": 2, "content": "goodbye world"},
+        ],
+        setup=lambda collection: collection.create_index([("content", "text")]),
+        pipeline=[{"$match": {"$text": {"$search": "goodbye"}}}],
+        expected=[{"_id": 2, "content": "goodbye world"}],
+        msg="$match with $text should work when it is the first pipeline stage",
+    ),
+]
+
+MATCH_STAGE_POSITION_TESTS_ALL = MATCH_PIPELINE_POSITION_TESTS + MATCH_TEXT_FIRST_STAGE_TESTS
+
+
+@pytest.mark.aggregate
+@pytest.mark.parametrize("test_case", pytest_params(MATCH_STAGE_POSITION_TESTS_ALL))
+def test_match_stage_position_cases(collection, test_case: StageTestCase):
+    """Test $match pipeline position behavior."""
+    if test_case.setup:
+        test_case.setup(collection)
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {
+            "aggregate": collection.name,
+            "pipeline": test_case.pipeline,
+            "cursor": {},
+        },
+    )
+    assertResult(
+        result,
+        expected=test_case.expected,
+        error_code=test_case.error_code,
+        msg=test_case.msg,
+    )
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py
new file mode 100644
index 00000000..e60a2369
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py
@@ -0,0 +1,38 @@
+"""
+Smoke test for $match stage.
+
+Tests basic $match functionality.
+"""
+
+import pytest
+
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+
+
+@pytest.mark.aggregate
+@pytest.mark.smoke
+def test_smoke_match(collection):
+    """Test basic $match behavior."""
+    collection.insert_many(
+        [
+            {"_id": 1, "status": "active", "value": 10},
+            {"_id": 2, "status": "inactive", "value": 20},
+            {"_id": 3, "status": "active", "value": 30},
+        ]
+    )
+
+    result = execute_command(
+        collection,
+        {
+            "aggregate": collection.name,
+            "pipeline": [{"$match": {"status": "active"}}],
+            "cursor": {},
+        },
+    )
+
+    expected = [
+        {"_id": 1, "status": "active", "value": 10},
+        {"_id": 3, "status": "active", "value": 30},
+    ]
+    assertSuccess(result, expected, msg="Should support $match stage")
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py
new file mode 100644
index 00000000..a06d47b7
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py
@@ -0,0 +1,17 @@
+"""
+Shared test case for pipeline stage tests.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Callable, Optional
+
+from documentdb_tests.framework.test_case import BaseTestCase
+
+
+@dataclass(frozen=True)
+class StageTestCase(BaseTestCase):
+    """Test case for pipeline stage tests."""
+
+    docs: Optional[list[dict[str, Any]]] = None
+    pipeline: Optional[list[dict[str, Any]]] = None
+    setup: Optional[Callable] = None
diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py
index c285b5aa..0471e128 100644
--- a/documentdb_tests/framework/error_codes.py
+++ b/documentdb_tests/framework/error_codes.py
@@ -5,10 +5,12 @@
 
 DIVIDE_BY_ZERO_ERROR = 2
 TYPE_MISMATCH_ERROR = 14
+MATCH_FILTER_NOT_OBJECT_ERROR = 15959
 EXPRESSION_TYPE_MISMATCH_ERROR = 16020
 MODULO_ZERO_REMAINDER_ERROR = 16610
 MODULO_NON_NUMERIC_ERROR = 16611
 MORE_THAN_ONE_DATE_ERROR = 16612
+MATCH_TEXT_NOT_FIRST_STAGE_ERROR = 17313
 ABS_OVERFLOW_ERROR = 28680
 LOG_NON_NUMERIC_VALUE_ERROR = 28756
 LOG_NON_NUMERIC_BASE_ERROR = 28757
@@ -20,4 +22,5 @@
 POW_BASE_ZERO_EXP_NEGATIVE_ERROR = 28764
 NON_NUMERIC_TYPE_MISMATCH_ERROR = 28765
 LN_NON_POSITIVE_INPUT_ERROR = 28766
+NEAR_NOT_ALLOWED_ERROR = 5626500
 MODULO_DECIMAL128_ZERO_REMAINDER_ERROR = 5733415

From 0d037a49bfa7d11d120c23ab32fa5066993e3b31 Mon Sep 17 00:00:00 2001
From: Daniel Frankcom <frankcom@amazon.com>
Date: Wed, 8 Apr 2026 11:16:05 -0700
Subject: [PATCH 2/6] Add dot notation array index vs object key test cases

Signed-off-by: Daniel Frankcom <frankcom@amazon.com>
---
 .../operator/stages/match/test_match_core.py  | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
index 811e51e6..0f2ea360 100644
--- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
@@ -163,6 +163,43 @@
         expected=[{"_id": 1, "a": {"b": 10}}],
         msg="$match should support dot notation to match nested fields",
     ),
+    StageTestCase(
+        "predicate_dot_notation_array_index",
+        docs=[
+            {"_id": 1, "a": [{"b": 10}, {"b": 20}]},
+            {"_id": 2, "a": [{"b": 30}, {"b": 40}]},
+            {"_id": 3, "a": {"0": {"b": 99}}},
+        ],
+        pipeline=[{"$match": {"a.0.b": 10}}],
+        expected=[{"_id": 1, "a": [{"b": 10}, {"b": 20}]}],
+        msg="$match with numeric dot path should resolve as array index",
+    ),
+    StageTestCase(
+        "predicate_dot_notation_object_key",
+        docs=[
+            {"_id": 1, "a": [{"b": 10}, {"b": 20}]},
+            {"_id": 2, "a": [{"b": 30}, {"b": 40}]},
+            {"_id": 3, "a": {"0": {"b": 99}}},
+        ],
+        pipeline=[{"$match": {"a.0.b": 99}}],
+        expected=[{"_id": 3, "a": {"0": {"b": 99}}}],
+        msg="$match with numeric dot path should also match object keys",
+    ),
+    StageTestCase(
+        "predicate_dot_notation_array_index_and_object_key",
+        docs=[
+            {"_id": 1, "a": [{"b": 10}, {"b": 20}]},
+            {"_id": 2, "a": [{"b": 10}, {"b": 40}]},
+            {"_id": 3, "a": {"0": {"b": 10}}},
+        ],
+        pipeline=[{"$match": {"a.0.b": 10}}],
+        expected=[
+            {"_id": 1, "a": [{"b": 10}, {"b": 20}]},
+            {"_id": 2, "a": [{"b": 10}, {"b": 40}]},
+            {"_id": 3, "a": {"0": {"b": 10}}},
+        ],
+        msg="$match with numeric dot path should match both array index and object key",
+    ),
 ]
 
 # Property [Empty Predicate]: {$match: {}} returns all documents, and an

From 05c70834e32901ab132001e833591db4ffb29ff7 Mon Sep 17 00:00:00 2001
From: Daniel Frankcom <frankcom@amazon.com>
Date: Wed, 8 Apr 2026 11:16:05 -0700
Subject: [PATCH 3/6] Add ignore_doc_order to assertResult

Signed-off-by: Daniel Frankcom <frankcom@amazon.com>
---
 documentdb_tests/framework/assertions.py | 31 +++++++++++++-----------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/documentdb_tests/framework/assertions.py b/documentdb_tests/framework/assertions.py
index 2fe70e43..7e40331b 100644
--- a/documentdb_tests/framework/assertions.py
+++ b/documentdb_tests/framework/assertions.py
@@ -17,6 +17,11 @@
 _NUMERIC_BSON_TYPES = (int, float, Int64, Decimal128)
 
 
+def _sort_if_list(value: Any) -> Any:
+    """Return a sorted copy if value is a list, otherwise return it unchanged."""
+    return sorted(value, key=lambda x: str(x)) if isinstance(value, list) else value
+
+
 def _strict_equal(a: Any, b: Any) -> bool:
     """Equality with stricter semantics for BSON numeric types.
 
@@ -86,7 +91,7 @@ def assertSuccess(
         raw_res: If asserting raw result. False by default,
             only compare content of ["cursor"]["firstBatch"]
         transform: Optional callback to transform result before comparison
-        ignore_doc_order: If True, compare lists as sets (order-independent)
+        ignore_doc_order: If True, compare lists ignoring order (duplicates still matter)
     """
     if isinstance(result, Exception):
         if isinstance(result, _INFRA_TYPES):
@@ -105,13 +110,11 @@ def assertSuccess(
     error_text += f"\n\nExpected:\n{pprint.pformat(expected, width=100)}"
     error_text += f"\n\nActual:\n{pprint.pformat(result, width=100)}\n"
 
-    if ignore_doc_order and isinstance(result, list) and isinstance(expected, list):
-        assert _strict_equal(
-            sorted(result, key=lambda x: str(x)),
-            sorted(expected, key=lambda x: str(x)),
-        ), error_text
-    else:
-        assert _strict_equal(result, expected), error_text
+    if ignore_doc_order:
+        result = _sort_if_list(result)
+        expected = _sort_if_list(expected)
+
+    assert _strict_equal(result, expected), error_text
 
 
 def assertSuccessPartial(
@@ -194,23 +197,23 @@ def assertResult(
     expected: Any = None,
     error_code: Optional[int] = None,
     msg: Optional[str] = None,
+    ignore_doc_order: bool = False,
 ):
     """
     Universal assertion that handles success and error cases.
 
     Args:
         result: Result from execute_command
-        expected: Expected result value.
-        error_code: Expected error code (mutually exclusive with expected)
+        expected: Expected result documents (for success cases)
+        error_code: Expected error code (for error cases)
         msg: Custom assertion message (optional)
+        ignore_doc_order: If True, compare lists ignoring order (duplicates still matter)
 
     Usage:
-        assertResult(result, expected=5)  # Success case
+        assertResult(result, expected=[{"_id": 1}])  # Success case
         assertResult(result, error_code=16555)  # Error case
     """
     if error_code is not None:
-        # Error case
         assertFailureCode(result, error_code, msg)
     else:
-        # Success case
-        assertSuccess(result, [{"result": expected}], msg)
+        assertSuccess(result, expected, msg, ignore_doc_order=ignore_doc_order)

From 6e8bb78640a607147e28f6ab0cc362d7bc904a9e Mon Sep 17 00:00:00 2001
From: Daniel Frankcom <frankcom@amazon.com>
Date: Wed, 8 Apr 2026 11:16:05 -0700
Subject: [PATCH 4/6] Use ignore_doc_order for match tests and remove insertion
 order test

Signed-off-by: Daniel Frankcom <frankcom@amazon.com>
---
 .../operator/stages/match/test_match_core.py     | 16 +---------------
 .../stages/match/test_match_query_operators.py   |  1 +
 .../stages/match/test_match_stage_position.py    |  1 +
 3 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
index 0f2ea360..f40dc4bd 100644
--- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py
@@ -74,21 +74,6 @@
         ],
         msg="$match should filter documents to those where the field equals the value",
     ),
-    StageTestCase(
-        "core_insertion_order",
-        docs=[
-            {"_id": 3, "a": 1},
-            {"_id": 1, "a": 1},
-            {"_id": 2, "a": 1},
-        ],
-        pipeline=[{"$match": {"a": 1}}],
-        expected=[
-            {"_id": 3, "a": 1},
-            {"_id": 1, "a": 1},
-            {"_id": 2, "a": 1},
-        ],
-        msg="$match should return documents in insertion order, not sorted by _id",
-    ),
     StageTestCase(
         "core_empty_collection",
         docs=[],
@@ -275,4 +260,5 @@ def test_match_core_cases(collection, test_case: StageTestCase):
         expected=test_case.expected,
         error_code=test_case.error_code,
         msg=test_case.msg,
+        ignore_doc_order=True,
     )
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
index 34ed842c..9f76a5ea 100644
--- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
@@ -126,4 +126,5 @@ def test_match_query_operator_cases(collection, test_case: StageTestCase):
         expected=test_case.expected,
         error_code=test_case.error_code,
         msg=test_case.msg,
+        ignore_doc_order=True,
     )
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
index ef7b43ec..43f3e5d9 100644
--- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
@@ -177,4 +177,5 @@ def test_match_stage_position_cases(collection, test_case: StageTestCase):
         expected=test_case.expected,
         error_code=test_case.error_code,
         msg=test_case.msg,
+        ignore_doc_order=True,
     )

From 3558fcdff95f4e4460d743033119aa1c9c0ffd43 Mon Sep 17 00:00:00 2001
From: Daniel Frankcom <frankcom@amazon.com>
Date: Wed, 8 Apr 2026 11:16:05 -0700
Subject: [PATCH 5/6] Move multi-stage integration tests from match/ to stages/

Move tests that combine $match with other stages ($project, $group,
$replaceRoot) to stages/test_stages_position_match.py per FOLDER_STRUCTURE.md
rule that integration tests of same-level features belong in the parent folder.

Rename remaining pure $match tests to test_match_pipeline_semantics.py.

Signed-off-by: Daniel Frankcom <frankcom@amazon.com>
---
 .../tests/core/operator/stages/__init__.py    |  0
 .../match/test_match_pipeline_semantics.py    | 85 +++++++++++++++++++
 ...ition.py => test_stages_position_match.py} | 58 ++-----------
 3 files changed, 91 insertions(+), 52 deletions(-)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/__init__.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py
 rename documentdb_tests/compatibility/tests/core/operator/stages/{match/test_match_stage_position.py => test_stages_position_match.py} (66%)

diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py
new file mode 100644
index 00000000..514446d8
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py
@@ -0,0 +1,85 @@
+"""Tests for $match pipeline semantics."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import (
+    StageTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Pipeline Semantics]: $match works as a standalone pipeline
+# participant and composes correctly with other $match stages.
+MATCH_PIPELINE_SEMANTICS_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "pipeline_first_stage",
+        docs=[
+            {"_id": 1, "a": 10},
+            {"_id": 2, "a": 20},
+            {"_id": 3, "a": 10},
+        ],
+        pipeline=[{"$match": {"a": 10}}],
+        expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}],
+        msg="$match should work as the first stage of a pipeline",
+    ),
+    StageTestCase(
+        "pipeline_consecutive_match",
+        docs=[
+            {"_id": 1, "a": 10, "b": 1},
+            {"_id": 2, "a": 20, "b": 2},
+            {"_id": 3, "a": 10, "b": 3},
+        ],
+        pipeline=[
+            {"$match": {"a": 10}},
+            {"$match": {"b": 3}},
+        ],
+        expected=[{"_id": 3, "a": 10, "b": 3}],
+        msg="$match consecutive stages should compose like AND of all predicates",
+    ),
+]
+
+# Property [$text First-Stage Behavior]: $text search works inside $match when
+# $match is the first stage of the pipeline.
+MATCH_TEXT_FIRST_STAGE_TESTS: list[StageTestCase] = [
+    StageTestCase(
+        "text_first_stage",
+        docs=[
+            {"_id": 1, "content": "hello world"},
+            {"_id": 2, "content": "goodbye world"},
+        ],
+        setup=lambda collection: collection.create_index([("content", "text")]),
+        pipeline=[{"$match": {"$text": {"$search": "goodbye"}}}],
+        expected=[{"_id": 2, "content": "goodbye world"}],
+        msg="$match with $text should work when it is the first pipeline stage",
+    ),
+]
+
+MATCH_PIPELINE_SEMANTICS_TESTS_ALL = MATCH_PIPELINE_SEMANTICS_TESTS + MATCH_TEXT_FIRST_STAGE_TESTS
+
+
+@pytest.mark.aggregate
+@pytest.mark.parametrize("test_case", pytest_params(MATCH_PIPELINE_SEMANTICS_TESTS_ALL))
+def test_match_pipeline_semantics_cases(collection, test_case: StageTestCase):
+    """Test $match pipeline semantics."""
+    if test_case.setup:
+        test_case.setup(collection)
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {
+            "aggregate": collection.name,
+            "pipeline": test_case.pipeline,
+            "cursor": {},
+        },
+    )
+    assertResult(
+        result,
+        expected=test_case.expected,
+        error_code=test_case.error_code,
+        msg=test_case.msg,
+        ignore_doc_order=True,
+    )
diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py
similarity index 66%
rename from documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
rename to documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py
index 43f3e5d9..61f17382 100644
--- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py
@@ -1,4 +1,4 @@
-"""Tests for $match pipeline position behavior."""
+"""Tests for $match composing with other stages at different pipeline positions."""
 
 from __future__ import annotations
 
@@ -11,21 +11,9 @@
 from documentdb_tests.framework.executor import execute_command
 from documentdb_tests.framework.parametrize import pytest_params
 
-# Property [Pipeline Position]: $match filters correctly regardless of its
-# position in the pipeline and composes with preceding stages that reshape
-# documents.
+# Property [Pipeline Position]: $match filters correctly when composed with
+# preceding stages that reshape documents.
 MATCH_PIPELINE_POSITION_TESTS: list[StageTestCase] = [
-    StageTestCase(
-        "pipeline_first_stage",
-        docs=[
-            {"_id": 1, "a": 10},
-            {"_id": 2, "a": 20},
-            {"_id": 3, "a": 10},
-        ],
-        pipeline=[{"$match": {"a": 10}}],
-        expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}],
-        msg="$match should work as the first stage of a pipeline",
-    ),
     StageTestCase(
         "pipeline_middle_stage",
         docs=[
@@ -55,20 +43,6 @@
         expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}],
         msg="$match should work as the last stage of a pipeline",
     ),
-    StageTestCase(
-        "pipeline_consecutive_match",
-        docs=[
-            {"_id": 1, "a": 10, "b": 1},
-            {"_id": 2, "a": 20, "b": 2},
-            {"_id": 3, "a": 10, "b": 3},
-        ],
-        pipeline=[
-            {"$match": {"a": 10}},
-            {"$match": {"b": 3}},
-        ],
-        expected=[{"_id": 3, "a": 10, "b": 3}],
-        msg="$match consecutive stages should compose like $and of all predicates",
-    ),
     StageTestCase(
         "pipeline_after_reshape_dropped_field",
         docs=[
@@ -137,31 +111,11 @@
     ),
 ]
 
-# Property [$text First-Stage Behavior]: $text search works inside $match when
-# $match is the first stage of the pipeline.
-MATCH_TEXT_FIRST_STAGE_TESTS: list[StageTestCase] = [
-    StageTestCase(
-        "text_first_stage",
-        docs=[
-            {"_id": 1, "content": "hello world"},
-            {"_id": 2, "content": "goodbye world"},
-        ],
-        setup=lambda collection: collection.create_index([("content", "text")]),
-        pipeline=[{"$match": {"$text": {"$search": "goodbye"}}}],
-        expected=[{"_id": 2, "content": "goodbye world"}],
-        msg="$match with $text should work when it is the first pipeline stage",
-    ),
-]
-
-MATCH_STAGE_POSITION_TESTS_ALL = MATCH_PIPELINE_POSITION_TESTS + MATCH_TEXT_FIRST_STAGE_TESTS
-
 
 @pytest.mark.aggregate
-@pytest.mark.parametrize("test_case", pytest_params(MATCH_STAGE_POSITION_TESTS_ALL))
-def test_match_stage_position_cases(collection, test_case: StageTestCase):
-    """Test $match pipeline position behavior."""
-    if test_case.setup:
-        test_case.setup(collection)
+@pytest.mark.parametrize("test_case", pytest_params(MATCH_PIPELINE_POSITION_TESTS))
+def test_stage_position_match_cases(collection, test_case: StageTestCase):
+    """Test $match composing with other stages at different pipeline positions."""
     if test_case.docs:
         collection.insert_many(test_case.docs)
     result = execute_command(

From bc5096c975aaea12854e2e00b3468f5f3dd84531 Mon Sep 17 00:00:00 2001
From: Daniel Frankcom <frankcom@amazon.com>
Date: Wed, 8 Apr 2026 11:16:05 -0700
Subject: [PATCH 6/6] Add all query operator tests to $match

Signed-off-by: Daniel Frankcom <frankcom@amazon.com>
---
 .../match/test_match_query_operators.py       | 222 +++++++++++++++++-
 1 file changed, 209 insertions(+), 13 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
index 9f76a5ea..61fbab19 100644
--- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
+++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py
@@ -11,10 +11,24 @@
 from documentdb_tests.framework.executor import execute_command
 from documentdb_tests.framework.parametrize import pytest_params
 
-# Property [Query Operator Categories]: one representative per query operator
-# category functions correctly inside $match as a container.
+# Property [Query Operator Support]: each query operator functions correctly
+# inside $match as a container.
 MATCH_QUERY_OPERATOR_TESTS: list[StageTestCase] = [
     # Comparison operators.
+    StageTestCase(
+        "query_comparison_eq",
+        docs=[{"_id": 1, "a": 5}, {"_id": 2, "a": 10}],
+        pipeline=[{"$match": {"a": {"$eq": 5}}}],
+        expected=[{"_id": 1, "a": 5}],
+        msg="$match should support $eq",
+    ),
+    StageTestCase(
+        "query_comparison_ne",
+        docs=[{"_id": 1, "a": 5}, {"_id": 2, "a": 10}],
+        pipeline=[{"$match": {"a": {"$ne": 5}}}],
+        expected=[{"_id": 2, "a": 10}],
+        msg="$match should support $ne",
+    ),
     StageTestCase(
         "query_comparison_gt",
         docs=[
@@ -24,9 +38,58 @@
         ],
         pipeline=[{"$match": {"a": {"$gt": 5}}}],
         expected=[{"_id": 2, "a": 7}, {"_id": 3, "a": 10}],
-        msg="$match should support comparison query operators",
+        msg="$match should support $gt",
+    ),
+    StageTestCase(
+        "query_comparison_gte",
+        docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}, {"_id": 3, "a": 10}],
+        pipeline=[{"$match": {"a": {"$gte": 5}}}],
+        expected=[{"_id": 2, "a": 5}, {"_id": 3, "a": 10}],
+        msg="$match should support $gte",
+    ),
+    StageTestCase(
+        "query_comparison_lt",
+        docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 7}, {"_id": 3, "a": 10}],
+        pipeline=[{"$match": {"a": {"$lt": 5}}}],
+        expected=[{"_id": 1, "a": 3}],
+        msg="$match should support $lt",
+    ),
+    StageTestCase(
+        "query_comparison_lte",
+        docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}, {"_id": 3, "a": 10}],
+        pipeline=[{"$match": {"a": {"$lte": 5}}}],
+        expected=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}],
+        msg="$match should support $lte",
+    ),
+    StageTestCase(
+        "query_comparison_in",
+        docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}],
+        pipeline=[{"$match": {"a": {"$in": [1, 3]}}}],
+        expected=[{"_id": 1, "a": 1}, {"_id": 3, "a": 3}],
+        msg="$match should support $in",
+    ),
+    StageTestCase(
+        "query_comparison_nin",
+        docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}],
+        pipeline=[{"$match": {"a": {"$nin": [1, 3]}}}],
+        expected=[{"_id": 2, "a": 2}],
+        msg="$match should support $nin",
     ),
     # Logical operators.
+    StageTestCase(
+        "query_logical_and",
+        docs=[
+            {"_id": 1, "a": 1, "b": 10},
+            {"_id": 2, "a": 2, "b": 20},
+            {"_id": 3, "a": 1, "b": 20},
+        ],
+        pipeline=[{"$match": {"$and": [{"a": 1}, {"b": {"$gte": 10}}]}}],
+        expected=[
+            {"_id": 1, "a": 1, "b": 10},
+            {"_id": 3, "a": 1, "b": 20},
+        ],
+        msg="$match should support $and",
+    ),
     StageTestCase(
         "query_logical_or",
         docs=[
@@ -39,19 +102,44 @@
             {"_id": 1, "a": 1, "b": 10},
             {"_id": 3, "a": 3, "b": 30},
         ],
-        msg="$match should support logical query operators",
+        msg="$match should support $or",
+    ),
+    StageTestCase(
+        "query_logical_not",
+        docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 7}, {"_id": 3, "a": 10}],
+        pipeline=[{"$match": {"a": {"$not": {"$gt": 5}}}}],
+        expected=[{"_id": 1, "a": 3}],
+        msg="$match should support $not",
     ),
-    # Element operators.
     StageTestCase(
-        "query_element_exists",
+        "query_logical_nor",
+        docs=[
+            {"_id": 1, "a": 1},
+            {"_id": 2, "a": 2},
+            {"_id": 3, "a": 3},
+        ],
+        pipeline=[{"$match": {"$nor": [{"a": 1}, {"a": 3}]}}],
+        expected=[{"_id": 2, "a": 2}],
+        msg="$match should support $nor",
+    ),
+    # Data type operators.
+    StageTestCase(
+        "query_datatype_exists",
         docs=[{"_id": 1, "a": 10}, {"_id": 2, "b": 20}, {"_id": 3, "a": 30}],
         pipeline=[{"$match": {"a": {"$exists": True}}}],
         expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 30}],
-        msg="$match should support element query operators",
+        msg="$match should support $exists",
+    ),
+    StageTestCase(
+        "query_datatype_type",
+        docs=[{"_id": 1, "a": 10}, {"_id": 2, "a": "hello"}, {"_id": 3, "a": 3.14}],
+        pipeline=[{"$match": {"a": {"$type": "string"}}}],
+        expected=[{"_id": 2, "a": "hello"}],
+        msg="$match should support $type",
     ),
-    # Evaluation operators.
+    # Miscellaneous operators.
     StageTestCase(
-        "query_eval_expr",
+        "query_misc_expr",
         docs=[
             {"_id": 1, "a": 10, "b": 10},
             {"_id": 2, "a": 20, "b": 30},
@@ -62,9 +150,47 @@
             {"_id": 1, "a": 10, "b": 10},
             {"_id": 3, "a": 5, "b": 5},
         ],
-        msg="$match should support evaluation query operators",
+        msg="$match should support $expr",
+    ),
+    StageTestCase(
+        "query_misc_jsonschema",
+        docs=[{"_id": 1, "a": 10}, {"_id": 2, "a": "hello"}, {"_id": 3}],
+        pipeline=[
+            {
+                "$match": {
+                    "$jsonSchema": {"required": ["a"], "properties": {"a": {"bsonType": "int"}}}
+                }
+            }
+        ],
+        expected=[{"_id": 1, "a": 10}],
+        msg="$match should support $jsonSchema",
+    ),
+    StageTestCase(
+        "query_misc_mod",
+        docs=[{"_id": 1, "a": 4}, {"_id": 2, "a": 6}, {"_id": 3, "a": 10}],
+        pipeline=[{"$match": {"a": {"$mod": [3, 1]}}}],
+        expected=[{"_id": 1, "a": 4}, {"_id": 3, "a": 10}],
+        msg="$match should support $mod",
+    ),
+    StageTestCase(
+        "query_misc_regex",
+        docs=[{"_id": 1, "s": "abc"}, {"_id": 2, "s": "xyz"}, {"_id": 3, "s": "abz"}],
+        pipeline=[{"$match": {"s": {"$regex": "^ab"}}}],
+        expected=[{"_id": 1, "s": "abc"}, {"_id": 3, "s": "abz"}],
+        msg="$match should support $regex",
     ),
     # Array operators.
+    StageTestCase(
+        "query_array_all",
+        docs=[
+            {"_id": 1, "arr": [1, 2, 3]},
+            {"_id": 2, "arr": [1, 3]},
+            {"_id": 3, "arr": [2, 3]},
+        ],
+        pipeline=[{"$match": {"arr": {"$all": [1, 3]}}}],
+        expected=[{"_id": 1, "arr": [1, 2, 3]}, {"_id": 2, "arr": [1, 3]}],
+        msg="$match should support $all",
+    ),
     StageTestCase(
         "query_array_elemmatch",
         docs=[
@@ -77,9 +203,32 @@
             {"_id": 1, "arr": [0.5, 0.8, 0.95]},
             {"_id": 3, "arr": [0.9, 1.0]},
         ],
-        msg="$match should support array query operators",
+        msg="$match should support $elemMatch",
+    ),
+    StageTestCase(
+        "query_array_size",
+        docs=[
+            {"_id": 1, "arr": [1, 2, 3]},
+            {"_id": 2, "arr": [1]},
+            {"_id": 3, "arr": [1, 2]},
+        ],
+        pipeline=[{"$match": {"arr": {"$size": 2}}}],
+        expected=[{"_id": 3, "arr": [1, 2]}],
+        msg="$match should support $size",
     ),
     # Bitwise operators.
+    StageTestCase(
+        "query_bitwise_bitsallclear",
+        docs=[
+            {"_id": 1, "flags": 7},
+            {"_id": 2, "flags": 8},
+            {"_id": 3, "flags": 15},
+        ],
+        # Bitmask 7 (binary 0111): flags 8 (1000) has all bits clear.
+        pipeline=[{"$match": {"flags": {"$bitsAllClear": 7}}}],
+        expected=[{"_id": 2, "flags": 8}],
+        msg="$match should support $bitsAllClear",
+    ),
     StageTestCase(
         "query_bitwise_bitsallset",
         docs=[
@@ -90,9 +239,56 @@
         # Bitmask 5 (binary 0101): flags 7 (0111) and 15 (1111) match.
         pipeline=[{"$match": {"flags": {"$bitsAllSet": 5}}}],
         expected=[{"_id": 1, "flags": 7}, {"_id": 3, "flags": 15}],
-        msg="$match should support bitwise query operators",
+        msg="$match should support $bitsAllSet",
+    ),
+    StageTestCase(
+        "query_bitwise_bitsanyclear",
+        docs=[
+            {"_id": 1, "flags": 7},
+            {"_id": 2, "flags": 3},
+            {"_id": 3, "flags": 15},
+        ],
+        # Bitmask 12 (binary 1100): flags 7 (0111) and 3 (0011) have at least one clear.
+        pipeline=[{"$match": {"flags": {"$bitsAnyClear": 12}}}],
+        expected=[{"_id": 1, "flags": 7}, {"_id": 2, "flags": 3}],
+        msg="$match should support $bitsAnyClear",
+    ),
+    StageTestCase(
+        "query_bitwise_bitsanyset",
+        docs=[
+            {"_id": 1, "flags": 4},
+            {"_id": 2, "flags": 8},
+            {"_id": 3, "flags": 16},
+        ],
+        # Bitmask 6 (binary 0110): flags 4 (0100) has bit 2 set.
+        pipeline=[{"$match": {"flags": {"$bitsAnySet": 6}}}],
+        expected=[{"_id": 1, "flags": 4}],
+        msg="$match should support $bitsAnySet",
     ),
     # Geospatial operators.
+    StageTestCase(
+        "query_geo_geointersects",
+        docs=[
+            {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}},
+            {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}},
+        ],
+        pipeline=[
+            {
+                "$match": {
+                    "loc": {
+                        "$geoIntersects": {
+                            "$geometry": {
+                                "type": "Polygon",
+                                "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]],
+                            }
+                        }
+                    }
+                }
+            }
+        ],
+        expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}],
+        msg="$match should support $geoIntersects",
+    ),
     StageTestCase(
         "query_geo_geowithin",
         docs=[
@@ -102,7 +298,7 @@
         ],
         pipeline=[{"$match": {"loc": {"$geoWithin": {"$center": [[0, 0], 10]}}}}],
         expected=[{"_id": 1, "loc": [0, 0]}, {"_id": 3, "loc": [1, 1]}],
-        msg="$match should support geospatial query operators",
+        msg="$match should support $geoWithin",
     ),
 ]