From 7fb2db7a66435cf29294e88ca7c14f86e0312ea6 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 11:16:05 -0700 Subject: [PATCH 1/6] Add $match stage tests Add parametrized test cases for the $match aggregation stage, organized into files by property group: - test_match_core.py: null/missing, equality, predicates, empty/large - test_match_query_operators.py: one representative per query category - test_match_stage_position.py: pipeline position and $text first-stage - test_match_errors.py: argument validation and restricted operators Replace test_match_stage.py with test_smoke_match.py. Add StageTestCase dataclass and new error codes used by the tests. Signed-off-by: Daniel Frankcom --- .../core/operator/stages/match/__init__.py | 0 .../operator/stages/match/test_match_core.py | 241 ++++++++++++++++++ .../stages/match/test_match_errors.py | 203 +++++++++++++++ .../match/test_match_query_operators.py | 129 ++++++++++ .../operator/stages/match/test_match_stage.py | 92 ------- .../stages/match/test_match_stage_position.py | 180 +++++++++++++ .../operator/stages/match/test_smoke_match.py | 38 +++ .../core/operator/stages/utils/__init__.py | 0 .../operator/stages/utils/stage_test_case.py | 17 ++ documentdb_tests/framework/error_codes.py | 3 + 10 files changed, 811 insertions(+), 92 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py delete mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py new file mode 100644 index 00000000..811e51e6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py @@ -0,0 +1,241 @@ +"""Tests for $match core matching behavior.""" + +from __future__ import annotations + +import pytest +from bson.son import SON + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null and Missing Field Matching]: {field: null} matches both +# documents where the field is null-valued and documents where the field is +# missing entirely. +MATCH_NULL_MISSING_TESTS: list[StageTestCase] = [ + StageTestCase( + "null_matches_null_valued", + docs=[{"_id": 1, "a": None}, {"_id": 2, "a": "x"}], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 1, "a": None}], + msg="$match with {field: null} should match documents where the field is null", + ), + StageTestCase( + "null_matches_missing_field", + docs=[{"_id": 1, "a": "x"}, {"_id": 2}], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 2}], + msg="$match with {field: null} should match documents where the field is missing", + ), + StageTestCase( + "null_matches_both_null_and_missing", + docs=[ + {"_id": 1, "a": None}, + {"_id": 2, "a": "x"}, + {"_id": 3}, + {"_id": 4, "a": 0}, + ], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 1, "a": None}, {"_id": 3}], + msg="$match with {field: null} should match both null-valued and missing-field documents", + ), + StageTestCase( + "null_excludes_falsy_values", + docs=[ + {"_id": 1, "a": None}, + {"_id": 2, "a": 0}, + {"_id": 3, "a": False}, + {"_id": 4, "a": ""}, + ], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 1, "a": None}], + msg="$match with {field: null} should not match falsy non-null values", + ), +] + +# Property [Core Matching Behavior]: simple equality filtering, insertion +# order preservation, $comment transparency, and contradictory conditions +# returning empty results all work correctly. +MATCH_CORE_TESTS: list[StageTestCase] = [ + StageTestCase( + "core_equality_single_match", + docs=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 2, "a": 20, "b": "y"}, + {"_id": 3, "a": 10, "b": "z"}, + ], + pipeline=[{"$match": {"a": 10}}], + expected=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 3, "a": 10, "b": "z"}, + ], + msg="$match should filter documents to those where the field equals the value", + ), + StageTestCase( + "core_insertion_order", + docs=[ + {"_id": 3, "a": 1}, + {"_id": 1, "a": 1}, + {"_id": 2, "a": 1}, + ], + pipeline=[{"$match": {"a": 1}}], + expected=[ + {"_id": 3, "a": 1}, + {"_id": 1, "a": 1}, + {"_id": 2, "a": 1}, + ], + msg="$match should return documents in insertion order, not sorted by _id", + ), + StageTestCase( + "core_empty_collection", + docs=[], + pipeline=[{"$match": {"a": 1}}], + expected=[], + msg="$match on empty collection should return empty result", + ), +] + +# Property [Predicate Semantics]: $match correctly handles non-obvious +# predicate edge cases that could differ between compatible engines. +MATCH_PREDICATE_TESTS: list[StageTestCase] = [ + StageTestCase( + "predicate_comment_ignored", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + {"_id": 3, "a": 10}, + ], + pipeline=[{"$match": {"a": 10, "$comment": "this is a comment"}}], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should ignore $comment in the predicate and filter normally", + ), + StageTestCase( + "predicate_contradictory_empty", + docs=[ + {"_id": 1, "a": 3}, + {"_id": 2, "a": 7}, + {"_id": 3, "a": 15}, + ], + pipeline=[{"$match": {"a": {"$gt": 10, "$lt": 5}}}], + expected=[], + msg="$match with contradictory conditions should return empty result without error", + ), + StageTestCase( + "predicate_dollar_string_literal", + docs=[ + {"_id": 1, "a": "$notAFieldRef"}, + {"_id": 2, "a": "hello"}, + ], + pipeline=[{"$match": {"a": "$notAFieldRef"}}], + expected=[{"_id": 1, "a": "$notAFieldRef"}], + msg="$match should treat $-prefixed strings as literal values, not field references", + ), + StageTestCase( + "predicate_duplicate_field_last_wins_equality", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + ], + pipeline=[{"$match": SON([("a", 10), ("a", 20)])}], + expected=[{"_id": 2, "a": 20}], + msg="$match with duplicate field names should use last-value-wins semantics", + ), + StageTestCase( + "predicate_duplicate_field_last_wins_operator", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + ], + pipeline=[{"$match": SON([("a", {"$gt": 15}), ("a", {"$lt": 15})])}], + expected=[{"_id": 1, "a": 10}], + msg="$match with duplicate operator predicates should use last-value-wins, not AND", + ), + StageTestCase( + "predicate_dot_notation", + docs=[ + {"_id": 1, "a": {"b": 10}}, + {"_id": 2, "a": {"b": 20}}, + ], + pipeline=[{"$match": {"a.b": 10}}], + expected=[{"_id": 1, "a": {"b": 10}}], + msg="$match should support dot notation to match nested fields", + ), +] + +# Property [Empty Predicate]: {$match: {}} returns all documents, and an +# empty collection always returns an empty result. +MATCH_EMPTY_PREDICATE_TESTS: list[StageTestCase] = [ + StageTestCase( + "empty_predicate_returns_all", + docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + pipeline=[{"$match": {}}], + expected=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + msg="$match with empty predicate should return all documents", + ), + StageTestCase( + "empty_collection_empty_predicate", + docs=[], + pipeline=[{"$match": {}}], + expected=[], + msg="$match on empty collection with empty predicate should return empty result", + ), +] + +# Property [Large Predicate]: $match handles predicates with many conditions +# without error. +MATCH_LARGE_PREDICATE_TESTS: list[StageTestCase] = [ + StageTestCase( + "large_many_field_conditions", + docs=[ + {"_id": 1, **{f"f{i}": i for i in range(500)}}, + {"_id": 2, "f0": 0}, + ], + pipeline=[{"$match": {f"f{i}": i for i in range(500)}}], + expected=[{"_id": 1, **{f"f{i}": i for i in range(500)}}], + msg="$match should handle a predicate with 500 field conditions", + ), + StageTestCase( + "large_many_or_branches", + docs=[ + {"_id": 1, "a": 0}, + {"_id": 2, "a": 499}, + {"_id": 3, "a": 999}, + ], + pipeline=[{"$match": {"$or": [{"a": i} for i in range(500)]}}], + expected=[{"_id": 1, "a": 0}, {"_id": 2, "a": 499}], + msg="$match should handle an $or predicate with 500 branches", + ), +] + +MATCH_CORE_TESTS_ALL = ( + MATCH_NULL_MISSING_TESTS + + MATCH_CORE_TESTS + + MATCH_PREDICATE_TESTS + + MATCH_EMPTY_PREDICATE_TESTS + + MATCH_LARGE_PREDICATE_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_CORE_TESTS_ALL)) +def test_match_core_cases(collection, test_case: StageTestCase): + """Test $match core matching behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py new file mode 100644 index 00000000..2be8a321 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py @@ -0,0 +1,203 @@ +"""Tests for $match argument validation and restricted operator errors.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + MATCH_FILTER_NOT_OBJECT_ERROR, + MATCH_TEXT_NOT_FIRST_STAGE_ERROR, + NEAR_NOT_ALLOWED_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Argument Validation Errors]: non-document arguments to $match +# produce an error. +MATCH_ARGUMENT_VALIDATION_TESTS: list[StageTestCase] = [ + StageTestCase( + "arg_error_string", + pipeline=[{"$match": "hello"}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a string argument", + ), + StageTestCase( + "arg_error_int", + pipeline=[{"$match": 42}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an integer argument", + ), + StageTestCase( + "arg_error_float", + pipeline=[{"$match": 3.14}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a float argument", + ), + StageTestCase( + "arg_error_bool", + pipeline=[{"$match": True}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a boolean argument", + ), + StageTestCase( + "arg_error_null", + pipeline=[{"$match": None}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a null argument", + ), + StageTestCase( + "arg_error_array", + pipeline=[{"$match": [1, 2]}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an array argument", + ), + StageTestCase( + "arg_error_int64", + pipeline=[{"$match": Int64(42)}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an int64 argument", + ), + StageTestCase( + "arg_error_decimal128", + pipeline=[{"$match": Decimal128("3.14")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a decimal128 argument", + ), + StageTestCase( + "arg_error_objectid", + pipeline=[{"$match": ObjectId()}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an ObjectId argument", + ), + StageTestCase( + "arg_error_datetime", + pipeline=[{"$match": datetime(2024, 1, 1)}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a datetime argument", + ), + StageTestCase( + "arg_error_binary", + pipeline=[{"$match": Binary(b"\x00\x01")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a binary argument", + ), + StageTestCase( + "arg_error_regex", + pipeline=[{"$match": Regex("^abc")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a regex argument", + ), + StageTestCase( + "arg_error_timestamp", + pipeline=[{"$match": Timestamp(0, 0)}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a timestamp argument", + ), + StageTestCase( + "arg_error_minkey", + pipeline=[{"$match": MinKey()}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a MinKey argument", + ), + StageTestCase( + "arg_error_maxkey", + pipeline=[{"$match": MaxKey()}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a MaxKey argument", + ), + StageTestCase( + "arg_error_code", + pipeline=[{"$match": Code("function(){}")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a JavaScript code argument", + ), + StageTestCase( + "arg_error_code_with_scope", + pipeline=[{"$match": Code("function(){}", {"x": 1})}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a JavaScript code with scope argument", + ), +] + +# Property [Restricted Operator Errors]: $where, $near, $nearSphere, and +# unknown $-prefixed top-level operators are rejected inside $match; $text is +# rejected when $match is not the first pipeline stage. +MATCH_RESTRICTED_OPERATOR_TESTS: list[StageTestCase] = [ + StageTestCase( + "restricted_where", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$match": {"$where": "true"}}], + error_code=BAD_VALUE_ERROR, + msg="$match should reject $where in the predicate", + ), + StageTestCase( + "restricted_near", + docs=[{"_id": 1, "loc": [0, 0]}], + pipeline=[{"$match": {"loc": {"$near": [0, 0]}}}], + error_code=NEAR_NOT_ALLOWED_ERROR, + msg="$match should reject $near in the predicate", + ), + StageTestCase( + "restricted_near_sphere", + docs=[{"_id": 1, "loc": [0, 0]}], + pipeline=[{"$match": {"loc": {"$nearSphere": [0, 0]}}}], + error_code=NEAR_NOT_ALLOWED_ERROR, + msg="$match should reject $nearSphere in the predicate", + ), + StageTestCase( + "restricted_geo_near", + docs=[{"_id": 1, "loc": [0, 0]}], + pipeline=[{"$match": {"loc": {"$geoNear": {"near": [0, 0]}}}}], + error_code=NEAR_NOT_ALLOWED_ERROR, + msg="$match should reject $geoNear in the predicate", + ), + StageTestCase( + "restricted_text_non_first_stage", + docs=[{"_id": 1, "a": 10}], + pipeline=[ + {"$project": {"a": 1}}, + {"$match": {"$text": {"$search": "hello"}}}, + ], + error_code=MATCH_TEXT_NOT_FIRST_STAGE_ERROR, + msg="$match should reject $text when it is not the first pipeline stage", + ), + StageTestCase( + "restricted_unknown_dollar_operator", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$match": {"$fakeOperator": 1}}], + error_code=BAD_VALUE_ERROR, + msg="$match should reject an unknown $-prefixed top-level operator", + ), +] + +MATCH_ERROR_TESTS_ALL = MATCH_ARGUMENT_VALIDATION_TESTS + MATCH_RESTRICTED_OPERATOR_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_ERROR_TESTS_ALL)) +def test_match_error_cases(collection, test_case: StageTestCase): + """Test $match argument validation and restricted operator errors.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py new file mode 100644 index 00000000..34ed842c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py @@ -0,0 +1,129 @@ +"""Tests for $match query operator categories.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Query Operator Categories]: one representative per query operator +# category functions correctly inside $match as a container. +MATCH_QUERY_OPERATOR_TESTS: list[StageTestCase] = [ + # Comparison operators. + StageTestCase( + "query_comparison_gt", + docs=[ + {"_id": 1, "a": 3}, + {"_id": 2, "a": 7}, + {"_id": 3, "a": 10}, + ], + pipeline=[{"$match": {"a": {"$gt": 5}}}], + expected=[{"_id": 2, "a": 7}, {"_id": 3, "a": 10}], + msg="$match should support comparison query operators", + ), + # Logical operators. + StageTestCase( + "query_logical_or", + docs=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 2, "a": 2, "b": 20}, + {"_id": 3, "a": 3, "b": 30}, + ], + pipeline=[{"$match": {"$or": [{"a": 1}, {"b": 30}]}}], + expected=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 3, "a": 3, "b": 30}, + ], + msg="$match should support logical query operators", + ), + # Element operators. + StageTestCase( + "query_element_exists", + docs=[{"_id": 1, "a": 10}, {"_id": 2, "b": 20}, {"_id": 3, "a": 30}], + pipeline=[{"$match": {"a": {"$exists": True}}}], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 30}], + msg="$match should support element query operators", + ), + # Evaluation operators. + StageTestCase( + "query_eval_expr", + docs=[ + {"_id": 1, "a": 10, "b": 10}, + {"_id": 2, "a": 20, "b": 30}, + {"_id": 3, "a": 5, "b": 5}, + ], + pipeline=[{"$match": {"$expr": {"$eq": ["$a", "$b"]}}}], + expected=[ + {"_id": 1, "a": 10, "b": 10}, + {"_id": 3, "a": 5, "b": 5}, + ], + msg="$match should support evaluation query operators", + ), + # Array operators. + StageTestCase( + "query_array_elemmatch", + docs=[ + {"_id": 1, "arr": [0.5, 0.8, 0.95]}, + {"_id": 2, "arr": [0.1, 0.3]}, + {"_id": 3, "arr": [0.9, 1.0]}, + ], + pipeline=[{"$match": {"arr": {"$elemMatch": {"$gte": 0.9}}}}], + expected=[ + {"_id": 1, "arr": [0.5, 0.8, 0.95]}, + {"_id": 3, "arr": [0.9, 1.0]}, + ], + msg="$match should support array query operators", + ), + # Bitwise operators. + StageTestCase( + "query_bitwise_bitsallset", + docs=[ + {"_id": 1, "flags": 7}, + {"_id": 2, "flags": 3}, + {"_id": 3, "flags": 15}, + ], + # Bitmask 5 (binary 0101): flags 7 (0111) and 15 (1111) match. + pipeline=[{"$match": {"flags": {"$bitsAllSet": 5}}}], + expected=[{"_id": 1, "flags": 7}, {"_id": 3, "flags": 15}], + msg="$match should support bitwise query operators", + ), + # Geospatial operators. + StageTestCase( + "query_geo_geowithin", + docs=[ + {"_id": 1, "loc": [0, 0]}, + {"_id": 2, "loc": [50, 50]}, + {"_id": 3, "loc": [1, 1]}, + ], + pipeline=[{"$match": {"loc": {"$geoWithin": {"$center": [[0, 0], 10]}}}}], + expected=[{"_id": 1, "loc": [0, 0]}, {"_id": 3, "loc": [1, 1]}], + msg="$match should support geospatial query operators", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_QUERY_OPERATOR_TESTS)) +def test_match_query_operator_cases(collection, test_case: StageTestCase): + """Test $match query operator categories.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py deleted file mode 100644 index 256a62fc..00000000 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Aggregation $match stage tests. - -Tests for the $match stage in aggregation pipelines. -""" - -import pytest - -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command - - -@pytest.mark.aggregate -@pytest.mark.smoke -def test_match_simple_filter(collection): - """Test $match stage with simple equality filter.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": 30, "c": "active"}, - {"_id": 1, "a": "B", "b": 25, "c": "active"}, - {"_id": 2, "a": "C", "b": 35, "c": "inactive"}, - ] - ) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": [{"$match": {"c": "active"}}], "cursor": {}}, - ) - - expected = [ - {"_id": 0, "a": "A", "b": 30, "c": "active"}, - {"_id": 1, "a": "B", "b": 25, "c": "active"}, - ] - assertSuccess(result, expected, "Should match active documents") - - -@pytest.mark.aggregate -def test_match_with_comparison_operator(collection): - """Test $match stage with comparison operators.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": 30}, - {"_id": 1, "a": "B", "b": 25}, - {"_id": 2, "a": "C", "b": 35}, - ] - ) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": [{"$match": {"b": {"$gt": 25}}}], "cursor": {}}, - ) - - expected = [{"_id": 0, "a": "A", "b": 30}, {"_id": 2, "a": "C", "b": 35}] - assertSuccess(result, expected, "Should match documents with b > 25") - - -@pytest.mark.aggregate -def test_match_multiple_conditions(collection): - """Test $match stage with multiple filter conditions.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": 30, "c": "NYC"}, - {"_id": 1, "a": "B", "b": 25, "c": "SF"}, - {"_id": 2, "a": "C", "b": 35, "c": "SF"}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [{"$match": {"c": "NYC", "b": {"$gte": 30}}}], - "cursor": {}, - }, - ) - - expected = [{"_id": 0, "a": "A", "b": 30, "c": "NYC"}] - assertSuccess(result, expected, "Should match multiple conditions") - - -@pytest.mark.aggregate -def test_match_empty_result(collection): - """Test $match stage that matches no documents.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": "active"}, - {"_id": 1, "a": "B", "b": "active"}, - ] - ) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": [{"$match": {"b": "inactive"}}], "cursor": {}}, - ) - - assertSuccess(result, [], "Should return empty result when no match") diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py new file mode 100644 index 00000000..ef7b43ec --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py @@ -0,0 +1,180 @@ +"""Tests for $match pipeline position behavior.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Position]: $match filters correctly regardless of its +# position in the pipeline and composes with preceding stages that reshape +# documents. +MATCH_PIPELINE_POSITION_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_first_stage", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + {"_id": 3, "a": 10}, + ], + pipeline=[{"$match": {"a": 10}}], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should work as the first stage of a pipeline", + ), + StageTestCase( + "pipeline_middle_stage", + docs=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 2, "a": 20, "b": "y"}, + {"_id": 3, "a": 10, "b": "z"}, + ], + pipeline=[ + {"$project": {"a": 1}}, + {"$match": {"a": 10}}, + {"$project": {"a": 1}}, + ], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should work as a middle stage of a pipeline", + ), + StageTestCase( + "pipeline_last_stage", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + {"_id": 3, "a": 10}, + ], + pipeline=[ + {"$project": {"a": 1}}, + {"$match": {"a": 10}}, + ], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should work as the last stage of a pipeline", + ), + StageTestCase( + "pipeline_consecutive_match", + docs=[ + {"_id": 1, "a": 10, "b": 1}, + {"_id": 2, "a": 20, "b": 2}, + {"_id": 3, "a": 10, "b": 3}, + ], + pipeline=[ + {"$match": {"a": 10}}, + {"$match": {"b": 3}}, + ], + expected=[{"_id": 3, "a": 10, "b": 3}], + msg="$match consecutive stages should compose like $and of all predicates", + ), + StageTestCase( + "pipeline_after_reshape_dropped_field", + docs=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 2, "a": 20, "b": "y"}, + ], + pipeline=[ + {"$project": {"b": 1}}, + {"$match": {"a": 10}}, + ], + expected=[], + msg="$match on a field dropped by a preceding stage should return empty", + ), + StageTestCase( + "pipeline_after_reshape_computed_field", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + ], + pipeline=[ + {"$project": {"doubled": {"$multiply": ["$a", 2]}}}, + {"$match": {"doubled": 40}}, + ], + expected=[{"_id": 2, "doubled": 40}], + msg="$match should filter on fields computed by a preceding stage", + ), + StageTestCase( + "pipeline_after_aggregation_computed_field", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "b", "val": 3}, + {"_id": 3, "cat": "a", "val": 7}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$match": {"total": 12}}, + ], + expected=[{"_id": "a", "total": 12}], + msg="$match should filter on fields produced by an aggregation stage", + ), + StageTestCase( + "pipeline_after_aggregation_dropped_field", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "b", "val": 3}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$match": {"val": 5}}, + ], + expected=[], + msg="$match on a field absent from aggregation output should return empty", + ), + StageTestCase( + "pipeline_after_root_replacement", + docs=[ + {"_id": 1, "inner": {"x": 10}}, + {"_id": 2, "inner": {"x": 20}}, + ], + pipeline=[ + {"$replaceRoot": {"newRoot": "$inner"}}, + {"$match": {"x": 10}}, + ], + expected=[{"x": 10}], + msg="$match should filter on the document shape produced by a root replacement stage", + ), +] + +# Property [$text First-Stage Behavior]: $text search works inside $match when +# $match is the first stage of the pipeline. +MATCH_TEXT_FIRST_STAGE_TESTS: list[StageTestCase] = [ + StageTestCase( + "text_first_stage", + docs=[ + {"_id": 1, "content": "hello world"}, + {"_id": 2, "content": "goodbye world"}, + ], + setup=lambda collection: collection.create_index([("content", "text")]), + pipeline=[{"$match": {"$text": {"$search": "goodbye"}}}], + expected=[{"_id": 2, "content": "goodbye world"}], + msg="$match with $text should work when it is the first pipeline stage", + ), +] + +MATCH_STAGE_POSITION_TESTS_ALL = MATCH_PIPELINE_POSITION_TESTS + MATCH_TEXT_FIRST_STAGE_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_STAGE_POSITION_TESTS_ALL)) +def test_match_stage_position_cases(collection, test_case: StageTestCase): + """Test $match pipeline position behavior.""" + if test_case.setup: + test_case.setup(collection) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py new file mode 100644 index 00000000..e60a2369 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_smoke_match.py @@ -0,0 +1,38 @@ +""" +Smoke test for $match stage. + +Tests basic $match functionality. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command + + +@pytest.mark.aggregate +@pytest.mark.smoke +def test_smoke_match(collection): + """Test basic $match behavior.""" + collection.insert_many( + [ + {"_id": 1, "status": "active", "value": 10}, + {"_id": 2, "status": "inactive", "value": 20}, + {"_id": 3, "status": "active", "value": 30}, + ] + ) + + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$match": {"status": "active"}}], + "cursor": {}, + }, + ) + + expected = [ + {"_id": 1, "status": "active", "value": 10}, + {"_id": 3, "status": "active", "value": 30}, + ] + assertSuccess(result, expected, msg="Should support $match stage") diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py new file mode 100644 index 00000000..a06d47b7 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py @@ -0,0 +1,17 @@ +""" +Shared test case for pipeline stage tests. +""" + +from dataclasses import dataclass +from typing import Any, Callable, Optional + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class StageTestCase(BaseTestCase): + """Test case for pipeline stage tests.""" + + docs: Optional[list[dict[str, Any]]] = None + pipeline: Optional[list[dict[str, Any]]] = None + setup: Optional[Callable] = None diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index c285b5aa..0471e128 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -5,10 +5,12 @@ DIVIDE_BY_ZERO_ERROR = 2 TYPE_MISMATCH_ERROR = 14 +MATCH_FILTER_NOT_OBJECT_ERROR = 15959 EXPRESSION_TYPE_MISMATCH_ERROR = 16020 MODULO_ZERO_REMAINDER_ERROR = 16610 MODULO_NON_NUMERIC_ERROR = 16611 MORE_THAN_ONE_DATE_ERROR = 16612 +MATCH_TEXT_NOT_FIRST_STAGE_ERROR = 17313 ABS_OVERFLOW_ERROR = 28680 LOG_NON_NUMERIC_VALUE_ERROR = 28756 LOG_NON_NUMERIC_BASE_ERROR = 28757 @@ -20,4 +22,5 @@ POW_BASE_ZERO_EXP_NEGATIVE_ERROR = 28764 NON_NUMERIC_TYPE_MISMATCH_ERROR = 28765 LN_NON_POSITIVE_INPUT_ERROR = 28766 +NEAR_NOT_ALLOWED_ERROR = 5626500 MODULO_DECIMAL128_ZERO_REMAINDER_ERROR = 5733415 From 0d037a49bfa7d11d120c23ab32fa5066993e3b31 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 11:16:05 -0700 Subject: [PATCH 2/6] Add dot notation array index vs object key test cases Signed-off-by: Daniel Frankcom --- .../operator/stages/match/test_match_core.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py index 811e51e6..0f2ea360 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py @@ -163,6 +163,43 @@ expected=[{"_id": 1, "a": {"b": 10}}], msg="$match should support dot notation to match nested fields", ), + StageTestCase( + "predicate_dot_notation_array_index", + docs=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 30}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 99}}}, + ], + pipeline=[{"$match": {"a.0.b": 10}}], + expected=[{"_id": 1, "a": [{"b": 10}, {"b": 20}]}], + msg="$match with numeric dot path should resolve as array index", + ), + StageTestCase( + "predicate_dot_notation_object_key", + docs=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 30}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 99}}}, + ], + pipeline=[{"$match": {"a.0.b": 99}}], + expected=[{"_id": 3, "a": {"0": {"b": 99}}}], + msg="$match with numeric dot path should also match object keys", + ), + StageTestCase( + "predicate_dot_notation_array_index_and_object_key", + docs=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 10}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 10}}}, + ], + pipeline=[{"$match": {"a.0.b": 10}}], + expected=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 10}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 10}}}, + ], + msg="$match with numeric dot path should match both array index and object key", + ), ] # Property [Empty Predicate]: {$match: {}} returns all documents, and an From 05c70834e32901ab132001e833591db4ffb29ff7 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 11:16:05 -0700 Subject: [PATCH 3/6] Add ignore_doc_order to assertResult Signed-off-by: Daniel Frankcom --- documentdb_tests/framework/assertions.py | 31 +++++++++++++----------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/documentdb_tests/framework/assertions.py b/documentdb_tests/framework/assertions.py index 2fe70e43..7e40331b 100644 --- a/documentdb_tests/framework/assertions.py +++ b/documentdb_tests/framework/assertions.py @@ -17,6 +17,11 @@ _NUMERIC_BSON_TYPES = (int, float, Int64, Decimal128) +def _sort_if_list(value: Any) -> Any: + """Return a sorted copy if value is a list, otherwise return it unchanged.""" + return sorted(value, key=lambda x: str(x)) if isinstance(value, list) else value + + def _strict_equal(a: Any, b: Any) -> bool: """Equality with stricter semantics for BSON numeric types. @@ -86,7 +91,7 @@ def assertSuccess( raw_res: If asserting raw result. False by default, only compare content of ["cursor"]["firstBatch"] transform: Optional callback to transform result before comparison - ignore_doc_order: If True, compare lists as sets (order-independent) + ignore_doc_order: If True, compare lists ignoring order (duplicates still matter) """ if isinstance(result, Exception): if isinstance(result, _INFRA_TYPES): @@ -105,13 +110,11 @@ def assertSuccess( error_text += f"\n\nExpected:\n{pprint.pformat(expected, width=100)}" error_text += f"\n\nActual:\n{pprint.pformat(result, width=100)}\n" - if ignore_doc_order and isinstance(result, list) and isinstance(expected, list): - assert _strict_equal( - sorted(result, key=lambda x: str(x)), - sorted(expected, key=lambda x: str(x)), - ), error_text - else: - assert _strict_equal(result, expected), error_text + if ignore_doc_order: + result = _sort_if_list(result) + expected = _sort_if_list(expected) + + assert _strict_equal(result, expected), error_text def assertSuccessPartial( @@ -194,23 +197,23 @@ def assertResult( expected: Any = None, error_code: Optional[int] = None, msg: Optional[str] = None, + ignore_doc_order: bool = False, ): """ Universal assertion that handles success and error cases. Args: result: Result from execute_command - expected: Expected result value. - error_code: Expected error code (mutually exclusive with expected) + expected: Expected result documents (for success cases) + error_code: Expected error code (for error cases) msg: Custom assertion message (optional) + ignore_doc_order: If True, compare lists ignoring order (duplicates still matter) Usage: - assertResult(result, expected=5) # Success case + assertResult(result, expected=[{"_id": 1}]) # Success case assertResult(result, error_code=16555) # Error case """ if error_code is not None: - # Error case assertFailureCode(result, error_code, msg) else: - # Success case - assertSuccess(result, [{"result": expected}], msg) + assertSuccess(result, expected, msg, ignore_doc_order=ignore_doc_order) From 6e8bb78640a607147e28f6ab0cc362d7bc904a9e Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 11:16:05 -0700 Subject: [PATCH 4/6] Use ignore_doc_order for match tests and remove insertion order test Signed-off-by: Daniel Frankcom --- .../operator/stages/match/test_match_core.py | 16 +--------------- .../stages/match/test_match_query_operators.py | 1 + .../stages/match/test_match_stage_position.py | 1 + 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py index 0f2ea360..f40dc4bd 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py @@ -74,21 +74,6 @@ ], msg="$match should filter documents to those where the field equals the value", ), - StageTestCase( - "core_insertion_order", - docs=[ - {"_id": 3, "a": 1}, - {"_id": 1, "a": 1}, - {"_id": 2, "a": 1}, - ], - pipeline=[{"$match": {"a": 1}}], - expected=[ - {"_id": 3, "a": 1}, - {"_id": 1, "a": 1}, - {"_id": 2, "a": 1}, - ], - msg="$match should return documents in insertion order, not sorted by _id", - ), StageTestCase( "core_empty_collection", docs=[], @@ -275,4 +260,5 @@ def test_match_core_cases(collection, test_case: StageTestCase): expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg, + ignore_doc_order=True, ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py index 34ed842c..9f76a5ea 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py @@ -126,4 +126,5 @@ def test_match_query_operator_cases(collection, test_case: StageTestCase): expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg, + ignore_doc_order=True, ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py index ef7b43ec..43f3e5d9 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py @@ -177,4 +177,5 @@ def test_match_stage_position_cases(collection, test_case: StageTestCase): expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg, + ignore_doc_order=True, ) From 3558fcdff95f4e4460d743033119aa1c9c0ffd43 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 11:16:05 -0700 Subject: [PATCH 5/6] Move multi-stage integration tests from match/ to stages/ Move tests that combine $match with other stages ($project, $group, $replaceRoot) to stages/test_stages_position_match.py per FOLDER_STRUCTURE.md rule that integration tests of same-level features belong in the parent folder. Rename remaining pure $match tests to test_match_pipeline_semantics.py. Signed-off-by: Daniel Frankcom --- .../tests/core/operator/stages/__init__.py | 0 .../match/test_match_pipeline_semantics.py | 85 +++++++++++++++++++ ...ition.py => test_stages_position_match.py} | 58 ++----------- 3 files changed, 91 insertions(+), 52 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py rename documentdb_tests/compatibility/tests/core/operator/stages/{match/test_match_stage_position.py => test_stages_position_match.py} (66%) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py new file mode 100644 index 00000000..514446d8 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py @@ -0,0 +1,85 @@ +"""Tests for $match pipeline semantics.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Semantics]: $match works as a standalone pipeline +# participant and composes correctly with other $match stages. +MATCH_PIPELINE_SEMANTICS_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_first_stage", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + {"_id": 3, "a": 10}, + ], + pipeline=[{"$match": {"a": 10}}], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should work as the first stage of a pipeline", + ), + StageTestCase( + "pipeline_consecutive_match", + docs=[ + {"_id": 1, "a": 10, "b": 1}, + {"_id": 2, "a": 20, "b": 2}, + {"_id": 3, "a": 10, "b": 3}, + ], + pipeline=[ + {"$match": {"a": 10}}, + {"$match": {"b": 3}}, + ], + expected=[{"_id": 3, "a": 10, "b": 3}], + msg="$match consecutive stages should compose like AND of all predicates", + ), +] + +# Property [$text First-Stage Behavior]: $text search works inside $match when +# $match is the first stage of the pipeline. +MATCH_TEXT_FIRST_STAGE_TESTS: list[StageTestCase] = [ + StageTestCase( + "text_first_stage", + docs=[ + {"_id": 1, "content": "hello world"}, + {"_id": 2, "content": "goodbye world"}, + ], + setup=lambda collection: collection.create_index([("content", "text")]), + pipeline=[{"$match": {"$text": {"$search": "goodbye"}}}], + expected=[{"_id": 2, "content": "goodbye world"}], + msg="$match with $text should work when it is the first pipeline stage", + ), +] + +MATCH_PIPELINE_SEMANTICS_TESTS_ALL = MATCH_PIPELINE_SEMANTICS_TESTS + MATCH_TEXT_FIRST_STAGE_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_PIPELINE_SEMANTICS_TESTS_ALL)) +def test_match_pipeline_semantics_cases(collection, test_case: StageTestCase): + """Test $match pipeline semantics.""" + if test_case.setup: + test_case.setup(collection) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py similarity index 66% rename from documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py rename to documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py index 43f3e5d9..61f17382 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage_position.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py @@ -1,4 +1,4 @@ -"""Tests for $match pipeline position behavior.""" +"""Tests for $match composing with other stages at different pipeline positions.""" from __future__ import annotations @@ -11,21 +11,9 @@ from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -# Property [Pipeline Position]: $match filters correctly regardless of its -# position in the pipeline and composes with preceding stages that reshape -# documents. +# Property [Pipeline Position]: $match filters correctly when composed with +# preceding stages that reshape documents. MATCH_PIPELINE_POSITION_TESTS: list[StageTestCase] = [ - StageTestCase( - "pipeline_first_stage", - docs=[ - {"_id": 1, "a": 10}, - {"_id": 2, "a": 20}, - {"_id": 3, "a": 10}, - ], - pipeline=[{"$match": {"a": 10}}], - expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], - msg="$match should work as the first stage of a pipeline", - ), StageTestCase( "pipeline_middle_stage", docs=[ @@ -55,20 +43,6 @@ expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], msg="$match should work as the last stage of a pipeline", ), - StageTestCase( - "pipeline_consecutive_match", - docs=[ - {"_id": 1, "a": 10, "b": 1}, - {"_id": 2, "a": 20, "b": 2}, - {"_id": 3, "a": 10, "b": 3}, - ], - pipeline=[ - {"$match": {"a": 10}}, - {"$match": {"b": 3}}, - ], - expected=[{"_id": 3, "a": 10, "b": 3}], - msg="$match consecutive stages should compose like $and of all predicates", - ), StageTestCase( "pipeline_after_reshape_dropped_field", docs=[ @@ -137,31 +111,11 @@ ), ] -# Property [$text First-Stage Behavior]: $text search works inside $match when -# $match is the first stage of the pipeline. -MATCH_TEXT_FIRST_STAGE_TESTS: list[StageTestCase] = [ - StageTestCase( - "text_first_stage", - docs=[ - {"_id": 1, "content": "hello world"}, - {"_id": 2, "content": "goodbye world"}, - ], - setup=lambda collection: collection.create_index([("content", "text")]), - pipeline=[{"$match": {"$text": {"$search": "goodbye"}}}], - expected=[{"_id": 2, "content": "goodbye world"}], - msg="$match with $text should work when it is the first pipeline stage", - ), -] - -MATCH_STAGE_POSITION_TESTS_ALL = MATCH_PIPELINE_POSITION_TESTS + MATCH_TEXT_FIRST_STAGE_TESTS - @pytest.mark.aggregate -@pytest.mark.parametrize("test_case", pytest_params(MATCH_STAGE_POSITION_TESTS_ALL)) -def test_match_stage_position_cases(collection, test_case: StageTestCase): - """Test $match pipeline position behavior.""" - if test_case.setup: - test_case.setup(collection) +@pytest.mark.parametrize("test_case", pytest_params(MATCH_PIPELINE_POSITION_TESTS)) +def test_stage_position_match_cases(collection, test_case: StageTestCase): + """Test $match composing with other stages at different pipeline positions.""" if test_case.docs: collection.insert_many(test_case.docs) result = execute_command( From bc5096c975aaea12854e2e00b3468f5f3dd84531 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 11:16:05 -0700 Subject: [PATCH 6/6] Add all query operator tests to $match Signed-off-by: Daniel Frankcom --- .../match/test_match_query_operators.py | 222 +++++++++++++++++- 1 file changed, 209 insertions(+), 13 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py index 9f76a5ea..61fbab19 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py @@ -11,10 +11,24 @@ from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -# Property [Query Operator Categories]: one representative per query operator -# category functions correctly inside $match as a container. +# Property [Query Operator Support]: each query operator functions correctly +# inside $match as a container. MATCH_QUERY_OPERATOR_TESTS: list[StageTestCase] = [ # Comparison operators. + StageTestCase( + "query_comparison_eq", + docs=[{"_id": 1, "a": 5}, {"_id": 2, "a": 10}], + pipeline=[{"$match": {"a": {"$eq": 5}}}], + expected=[{"_id": 1, "a": 5}], + msg="$match should support $eq", + ), + StageTestCase( + "query_comparison_ne", + docs=[{"_id": 1, "a": 5}, {"_id": 2, "a": 10}], + pipeline=[{"$match": {"a": {"$ne": 5}}}], + expected=[{"_id": 2, "a": 10}], + msg="$match should support $ne", + ), StageTestCase( "query_comparison_gt", docs=[ @@ -24,9 +38,58 @@ ], pipeline=[{"$match": {"a": {"$gt": 5}}}], expected=[{"_id": 2, "a": 7}, {"_id": 3, "a": 10}], - msg="$match should support comparison query operators", + msg="$match should support $gt", + ), + StageTestCase( + "query_comparison_gte", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$gte": 5}}}], + expected=[{"_id": 2, "a": 5}, {"_id": 3, "a": 10}], + msg="$match should support $gte", + ), + StageTestCase( + "query_comparison_lt", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 7}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$lt": 5}}}], + expected=[{"_id": 1, "a": 3}], + msg="$match should support $lt", + ), + StageTestCase( + "query_comparison_lte", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$lte": 5}}}], + expected=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}], + msg="$match should support $lte", + ), + StageTestCase( + "query_comparison_in", + docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + pipeline=[{"$match": {"a": {"$in": [1, 3]}}}], + expected=[{"_id": 1, "a": 1}, {"_id": 3, "a": 3}], + msg="$match should support $in", + ), + StageTestCase( + "query_comparison_nin", + docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + pipeline=[{"$match": {"a": {"$nin": [1, 3]}}}], + expected=[{"_id": 2, "a": 2}], + msg="$match should support $nin", ), # Logical operators. + StageTestCase( + "query_logical_and", + docs=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 2, "a": 2, "b": 20}, + {"_id": 3, "a": 1, "b": 20}, + ], + pipeline=[{"$match": {"$and": [{"a": 1}, {"b": {"$gte": 10}}]}}], + expected=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 3, "a": 1, "b": 20}, + ], + msg="$match should support $and", + ), StageTestCase( "query_logical_or", docs=[ @@ -39,19 +102,44 @@ {"_id": 1, "a": 1, "b": 10}, {"_id": 3, "a": 3, "b": 30}, ], - msg="$match should support logical query operators", + msg="$match should support $or", + ), + StageTestCase( + "query_logical_not", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 7}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$not": {"$gt": 5}}}}], + expected=[{"_id": 1, "a": 3}], + msg="$match should support $not", ), - # Element operators. StageTestCase( - "query_element_exists", + "query_logical_nor", + docs=[ + {"_id": 1, "a": 1}, + {"_id": 2, "a": 2}, + {"_id": 3, "a": 3}, + ], + pipeline=[{"$match": {"$nor": [{"a": 1}, {"a": 3}]}}], + expected=[{"_id": 2, "a": 2}], + msg="$match should support $nor", + ), + # Data type operators. + StageTestCase( + "query_datatype_exists", docs=[{"_id": 1, "a": 10}, {"_id": 2, "b": 20}, {"_id": 3, "a": 30}], pipeline=[{"$match": {"a": {"$exists": True}}}], expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 30}], - msg="$match should support element query operators", + msg="$match should support $exists", + ), + StageTestCase( + "query_datatype_type", + docs=[{"_id": 1, "a": 10}, {"_id": 2, "a": "hello"}, {"_id": 3, "a": 3.14}], + pipeline=[{"$match": {"a": {"$type": "string"}}}], + expected=[{"_id": 2, "a": "hello"}], + msg="$match should support $type", ), - # Evaluation operators. + # Miscellaneous operators. StageTestCase( - "query_eval_expr", + "query_misc_expr", docs=[ {"_id": 1, "a": 10, "b": 10}, {"_id": 2, "a": 20, "b": 30}, @@ -62,9 +150,47 @@ {"_id": 1, "a": 10, "b": 10}, {"_id": 3, "a": 5, "b": 5}, ], - msg="$match should support evaluation query operators", + msg="$match should support $expr", + ), + StageTestCase( + "query_misc_jsonschema", + docs=[{"_id": 1, "a": 10}, {"_id": 2, "a": "hello"}, {"_id": 3}], + pipeline=[ + { + "$match": { + "$jsonSchema": {"required": ["a"], "properties": {"a": {"bsonType": "int"}}} + } + } + ], + expected=[{"_id": 1, "a": 10}], + msg="$match should support $jsonSchema", + ), + StageTestCase( + "query_misc_mod", + docs=[{"_id": 1, "a": 4}, {"_id": 2, "a": 6}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$mod": [3, 1]}}}], + expected=[{"_id": 1, "a": 4}, {"_id": 3, "a": 10}], + msg="$match should support $mod", + ), + StageTestCase( + "query_misc_regex", + docs=[{"_id": 1, "s": "abc"}, {"_id": 2, "s": "xyz"}, {"_id": 3, "s": "abz"}], + pipeline=[{"$match": {"s": {"$regex": "^ab"}}}], + expected=[{"_id": 1, "s": "abc"}, {"_id": 3, "s": "abz"}], + msg="$match should support $regex", ), # Array operators. + StageTestCase( + "query_array_all", + docs=[ + {"_id": 1, "arr": [1, 2, 3]}, + {"_id": 2, "arr": [1, 3]}, + {"_id": 3, "arr": [2, 3]}, + ], + pipeline=[{"$match": {"arr": {"$all": [1, 3]}}}], + expected=[{"_id": 1, "arr": [1, 2, 3]}, {"_id": 2, "arr": [1, 3]}], + msg="$match should support $all", + ), StageTestCase( "query_array_elemmatch", docs=[ @@ -77,9 +203,32 @@ {"_id": 1, "arr": [0.5, 0.8, 0.95]}, {"_id": 3, "arr": [0.9, 1.0]}, ], - msg="$match should support array query operators", + msg="$match should support $elemMatch", + ), + StageTestCase( + "query_array_size", + docs=[ + {"_id": 1, "arr": [1, 2, 3]}, + {"_id": 2, "arr": [1]}, + {"_id": 3, "arr": [1, 2]}, + ], + pipeline=[{"$match": {"arr": {"$size": 2}}}], + expected=[{"_id": 3, "arr": [1, 2]}], + msg="$match should support $size", ), # Bitwise operators. + StageTestCase( + "query_bitwise_bitsallclear", + docs=[ + {"_id": 1, "flags": 7}, + {"_id": 2, "flags": 8}, + {"_id": 3, "flags": 15}, + ], + # Bitmask 7 (binary 0111): flags 8 (1000) has all bits clear. + pipeline=[{"$match": {"flags": {"$bitsAllClear": 7}}}], + expected=[{"_id": 2, "flags": 8}], + msg="$match should support $bitsAllClear", + ), StageTestCase( "query_bitwise_bitsallset", docs=[ @@ -90,9 +239,56 @@ # Bitmask 5 (binary 0101): flags 7 (0111) and 15 (1111) match. pipeline=[{"$match": {"flags": {"$bitsAllSet": 5}}}], expected=[{"_id": 1, "flags": 7}, {"_id": 3, "flags": 15}], - msg="$match should support bitwise query operators", + msg="$match should support $bitsAllSet", + ), + StageTestCase( + "query_bitwise_bitsanyclear", + docs=[ + {"_id": 1, "flags": 7}, + {"_id": 2, "flags": 3}, + {"_id": 3, "flags": 15}, + ], + # Bitmask 12 (binary 1100): flags 7 (0111) and 3 (0011) have at least one clear. + pipeline=[{"$match": {"flags": {"$bitsAnyClear": 12}}}], + expected=[{"_id": 1, "flags": 7}, {"_id": 2, "flags": 3}], + msg="$match should support $bitsAnyClear", + ), + StageTestCase( + "query_bitwise_bitsanyset", + docs=[ + {"_id": 1, "flags": 4}, + {"_id": 2, "flags": 8}, + {"_id": 3, "flags": 16}, + ], + # Bitmask 6 (binary 0110): flags 4 (0100) has bit 2 set. + pipeline=[{"$match": {"flags": {"$bitsAnySet": 6}}}], + expected=[{"_id": 1, "flags": 4}], + msg="$match should support $bitsAnySet", ), # Geospatial operators. + StageTestCase( + "query_geo_geointersects", + docs=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + pipeline=[ + { + "$match": { + "loc": { + "$geoIntersects": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + } + } + } + } + } + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="$match should support $geoIntersects", + ), StageTestCase( "query_geo_geowithin", docs=[ @@ -102,7 +298,7 @@ ], pipeline=[{"$match": {"loc": {"$geoWithin": {"$center": [[0, 0], 10]}}}}], expected=[{"_id": 1, "loc": [0, 0]}, {"_id": 3, "loc": [1, 1]}], - msg="$match should support geospatial query operators", + msg="$match should support $geoWithin", ), ]