diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py new file mode 100644 index 00000000..f40dc4bd --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_core.py @@ -0,0 +1,264 @@ +"""Tests for $match core matching behavior.""" + +from __future__ import annotations + +import pytest +from bson.son import SON + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null and Missing Field Matching]: {field: null} matches both +# documents where the field is null-valued and documents where the field is +# missing entirely. +MATCH_NULL_MISSING_TESTS: list[StageTestCase] = [ + StageTestCase( + "null_matches_null_valued", + docs=[{"_id": 1, "a": None}, {"_id": 2, "a": "x"}], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 1, "a": None}], + msg="$match with {field: null} should match documents where the field is null", + ), + StageTestCase( + "null_matches_missing_field", + docs=[{"_id": 1, "a": "x"}, {"_id": 2}], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 2}], + msg="$match with {field: null} should match documents where the field is missing", + ), + StageTestCase( + "null_matches_both_null_and_missing", + docs=[ + {"_id": 1, "a": None}, + {"_id": 2, "a": "x"}, + {"_id": 3}, + {"_id": 4, "a": 0}, + ], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 1, "a": None}, {"_id": 3}], + msg="$match with {field: null} should match both null-valued and missing-field documents", + ), + StageTestCase( + "null_excludes_falsy_values", + docs=[ + {"_id": 1, "a": None}, + {"_id": 2, "a": 0}, + {"_id": 3, "a": False}, + {"_id": 4, "a": ""}, + ], + pipeline=[{"$match": {"a": None}}], + expected=[{"_id": 1, "a": None}], + msg="$match with {field: null} should not match falsy non-null values", + ), +] + +# Property [Core Matching Behavior]: simple equality filtering, insertion +# order preservation, $comment transparency, and contradictory conditions +# returning empty results all work correctly. +MATCH_CORE_TESTS: list[StageTestCase] = [ + StageTestCase( + "core_equality_single_match", + docs=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 2, "a": 20, "b": "y"}, + {"_id": 3, "a": 10, "b": "z"}, + ], + pipeline=[{"$match": {"a": 10}}], + expected=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 3, "a": 10, "b": "z"}, + ], + msg="$match should filter documents to those where the field equals the value", + ), + StageTestCase( + "core_empty_collection", + docs=[], + pipeline=[{"$match": {"a": 1}}], + expected=[], + msg="$match on empty collection should return empty result", + ), +] + +# Property [Predicate Semantics]: $match correctly handles non-obvious +# predicate edge cases that could differ between compatible engines. +MATCH_PREDICATE_TESTS: list[StageTestCase] = [ + StageTestCase( + "predicate_comment_ignored", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + {"_id": 3, "a": 10}, + ], + pipeline=[{"$match": {"a": 10, "$comment": "this is a comment"}}], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should ignore $comment in the predicate and filter normally", + ), + StageTestCase( + "predicate_contradictory_empty", + docs=[ + {"_id": 1, "a": 3}, + {"_id": 2, "a": 7}, + {"_id": 3, "a": 15}, + ], + pipeline=[{"$match": {"a": {"$gt": 10, "$lt": 5}}}], + expected=[], + msg="$match with contradictory conditions should return empty result without error", + ), + StageTestCase( + "predicate_dollar_string_literal", + docs=[ + {"_id": 1, "a": "$notAFieldRef"}, + {"_id": 2, "a": "hello"}, + ], + pipeline=[{"$match": {"a": "$notAFieldRef"}}], + expected=[{"_id": 1, "a": "$notAFieldRef"}], + msg="$match should treat $-prefixed strings as literal values, not field references", + ), + StageTestCase( + "predicate_duplicate_field_last_wins_equality", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + ], + pipeline=[{"$match": SON([("a", 10), ("a", 20)])}], + expected=[{"_id": 2, "a": 20}], + msg="$match with duplicate field names should use last-value-wins semantics", + ), + StageTestCase( + "predicate_duplicate_field_last_wins_operator", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + ], + pipeline=[{"$match": SON([("a", {"$gt": 15}), ("a", {"$lt": 15})])}], + expected=[{"_id": 1, "a": 10}], + msg="$match with duplicate operator predicates should use last-value-wins, not AND", + ), + StageTestCase( + "predicate_dot_notation", + docs=[ + {"_id": 1, "a": {"b": 10}}, + {"_id": 2, "a": {"b": 20}}, + ], + pipeline=[{"$match": {"a.b": 10}}], + expected=[{"_id": 1, "a": {"b": 10}}], + msg="$match should support dot notation to match nested fields", + ), + StageTestCase( + "predicate_dot_notation_array_index", + docs=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 30}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 99}}}, + ], + pipeline=[{"$match": {"a.0.b": 10}}], + expected=[{"_id": 1, "a": [{"b": 10}, {"b": 20}]}], + msg="$match with numeric dot path should resolve as array index", + ), + StageTestCase( + "predicate_dot_notation_object_key", + docs=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 30}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 99}}}, + ], + pipeline=[{"$match": {"a.0.b": 99}}], + expected=[{"_id": 3, "a": {"0": {"b": 99}}}], + msg="$match with numeric dot path should also match object keys", + ), + StageTestCase( + "predicate_dot_notation_array_index_and_object_key", + docs=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 10}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 10}}}, + ], + pipeline=[{"$match": {"a.0.b": 10}}], + expected=[ + {"_id": 1, "a": [{"b": 10}, {"b": 20}]}, + {"_id": 2, "a": [{"b": 10}, {"b": 40}]}, + {"_id": 3, "a": {"0": {"b": 10}}}, + ], + msg="$match with numeric dot path should match both array index and object key", + ), +] + +# Property [Empty Predicate]: {$match: {}} returns all documents, and an +# empty collection always returns an empty result. +MATCH_EMPTY_PREDICATE_TESTS: list[StageTestCase] = [ + StageTestCase( + "empty_predicate_returns_all", + docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + pipeline=[{"$match": {}}], + expected=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + msg="$match with empty predicate should return all documents", + ), + StageTestCase( + "empty_collection_empty_predicate", + docs=[], + pipeline=[{"$match": {}}], + expected=[], + msg="$match on empty collection with empty predicate should return empty result", + ), +] + +# Property [Large Predicate]: $match handles predicates with many conditions +# without error. +MATCH_LARGE_PREDICATE_TESTS: list[StageTestCase] = [ + StageTestCase( + "large_many_field_conditions", + docs=[ + {"_id": 1, **{f"f{i}": i for i in range(500)}}, + {"_id": 2, "f0": 0}, + ], + pipeline=[{"$match": {f"f{i}": i for i in range(500)}}], + expected=[{"_id": 1, **{f"f{i}": i for i in range(500)}}], + msg="$match should handle a predicate with 500 field conditions", + ), + StageTestCase( + "large_many_or_branches", + docs=[ + {"_id": 1, "a": 0}, + {"_id": 2, "a": 499}, + {"_id": 3, "a": 999}, + ], + pipeline=[{"$match": {"$or": [{"a": i} for i in range(500)]}}], + expected=[{"_id": 1, "a": 0}, {"_id": 2, "a": 499}], + msg="$match should handle an $or predicate with 500 branches", + ), +] + +MATCH_CORE_TESTS_ALL = ( + MATCH_NULL_MISSING_TESTS + + MATCH_CORE_TESTS + + MATCH_PREDICATE_TESTS + + MATCH_EMPTY_PREDICATE_TESTS + + MATCH_LARGE_PREDICATE_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_CORE_TESTS_ALL)) +def test_match_core_cases(collection, test_case: StageTestCase): + """Test $match core matching behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py new file mode 100644 index 00000000..2be8a321 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_errors.py @@ -0,0 +1,203 @@ +"""Tests for $match argument validation and restricted operator errors.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + MATCH_FILTER_NOT_OBJECT_ERROR, + MATCH_TEXT_NOT_FIRST_STAGE_ERROR, + NEAR_NOT_ALLOWED_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Argument Validation Errors]: non-document arguments to $match +# produce an error. +MATCH_ARGUMENT_VALIDATION_TESTS: list[StageTestCase] = [ + StageTestCase( + "arg_error_string", + pipeline=[{"$match": "hello"}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a string argument", + ), + StageTestCase( + "arg_error_int", + pipeline=[{"$match": 42}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an integer argument", + ), + StageTestCase( + "arg_error_float", + pipeline=[{"$match": 3.14}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a float argument", + ), + StageTestCase( + "arg_error_bool", + pipeline=[{"$match": True}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a boolean argument", + ), + StageTestCase( + "arg_error_null", + pipeline=[{"$match": None}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a null argument", + ), + StageTestCase( + "arg_error_array", + pipeline=[{"$match": [1, 2]}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an array argument", + ), + StageTestCase( + "arg_error_int64", + pipeline=[{"$match": Int64(42)}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an int64 argument", + ), + StageTestCase( + "arg_error_decimal128", + pipeline=[{"$match": Decimal128("3.14")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a decimal128 argument", + ), + StageTestCase( + "arg_error_objectid", + pipeline=[{"$match": ObjectId()}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject an ObjectId argument", + ), + StageTestCase( + "arg_error_datetime", + pipeline=[{"$match": datetime(2024, 1, 1)}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a datetime argument", + ), + StageTestCase( + "arg_error_binary", + pipeline=[{"$match": Binary(b"\x00\x01")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a binary argument", + ), + StageTestCase( + "arg_error_regex", + pipeline=[{"$match": Regex("^abc")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a regex argument", + ), + StageTestCase( + "arg_error_timestamp", + pipeline=[{"$match": Timestamp(0, 0)}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a timestamp argument", + ), + StageTestCase( + "arg_error_minkey", + pipeline=[{"$match": MinKey()}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a MinKey argument", + ), + StageTestCase( + "arg_error_maxkey", + pipeline=[{"$match": MaxKey()}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a MaxKey argument", + ), + StageTestCase( + "arg_error_code", + pipeline=[{"$match": Code("function(){}")}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a JavaScript code argument", + ), + StageTestCase( + "arg_error_code_with_scope", + pipeline=[{"$match": Code("function(){}", {"x": 1})}], + error_code=MATCH_FILTER_NOT_OBJECT_ERROR, + msg="$match should reject a JavaScript code with scope argument", + ), +] + +# Property [Restricted Operator Errors]: $where, $near, $nearSphere, and +# unknown $-prefixed top-level operators are rejected inside $match; $text is +# rejected when $match is not the first pipeline stage. +MATCH_RESTRICTED_OPERATOR_TESTS: list[StageTestCase] = [ + StageTestCase( + "restricted_where", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$match": {"$where": "true"}}], + error_code=BAD_VALUE_ERROR, + msg="$match should reject $where in the predicate", + ), + StageTestCase( + "restricted_near", + docs=[{"_id": 1, "loc": [0, 0]}], + pipeline=[{"$match": {"loc": {"$near": [0, 0]}}}], + error_code=NEAR_NOT_ALLOWED_ERROR, + msg="$match should reject $near in the predicate", + ), + StageTestCase( + "restricted_near_sphere", + docs=[{"_id": 1, "loc": [0, 0]}], + pipeline=[{"$match": {"loc": {"$nearSphere": [0, 0]}}}], + error_code=NEAR_NOT_ALLOWED_ERROR, + msg="$match should reject $nearSphere in the predicate", + ), + StageTestCase( + "restricted_geo_near", + docs=[{"_id": 1, "loc": [0, 0]}], + pipeline=[{"$match": {"loc": {"$geoNear": {"near": [0, 0]}}}}], + error_code=NEAR_NOT_ALLOWED_ERROR, + msg="$match should reject $geoNear in the predicate", + ), + StageTestCase( + "restricted_text_non_first_stage", + docs=[{"_id": 1, "a": 10}], + pipeline=[ + {"$project": {"a": 1}}, + {"$match": {"$text": {"$search": "hello"}}}, + ], + error_code=MATCH_TEXT_NOT_FIRST_STAGE_ERROR, + msg="$match should reject $text when it is not the first pipeline stage", + ), + StageTestCase( + "restricted_unknown_dollar_operator", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$match": {"$fakeOperator": 1}}], + error_code=BAD_VALUE_ERROR, + msg="$match should reject an unknown $-prefixed top-level operator", + ), +] + +MATCH_ERROR_TESTS_ALL = MATCH_ARGUMENT_VALIDATION_TESTS + MATCH_RESTRICTED_OPERATOR_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_ERROR_TESTS_ALL)) +def test_match_error_cases(collection, test_case: StageTestCase): + """Test $match argument validation and restricted operator errors.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py new file mode 100644 index 00000000..514446d8 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_pipeline_semantics.py @@ -0,0 +1,85 @@ +"""Tests for $match pipeline semantics.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Semantics]: $match works as a standalone pipeline +# participant and composes correctly with other $match stages. +MATCH_PIPELINE_SEMANTICS_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_first_stage", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + {"_id": 3, "a": 10}, + ], + pipeline=[{"$match": {"a": 10}}], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should work as the first stage of a pipeline", + ), + StageTestCase( + "pipeline_consecutive_match", + docs=[ + {"_id": 1, "a": 10, "b": 1}, + {"_id": 2, "a": 20, "b": 2}, + {"_id": 3, "a": 10, "b": 3}, + ], + pipeline=[ + {"$match": {"a": 10}}, + {"$match": {"b": 3}}, + ], + expected=[{"_id": 3, "a": 10, "b": 3}], + msg="$match consecutive stages should compose like AND of all predicates", + ), +] + +# Property [$text First-Stage Behavior]: $text search works inside $match when +# $match is the first stage of the pipeline. +MATCH_TEXT_FIRST_STAGE_TESTS: list[StageTestCase] = [ + StageTestCase( + "text_first_stage", + docs=[ + {"_id": 1, "content": "hello world"}, + {"_id": 2, "content": "goodbye world"}, + ], + setup=lambda collection: collection.create_index([("content", "text")]), + pipeline=[{"$match": {"$text": {"$search": "goodbye"}}}], + expected=[{"_id": 2, "content": "goodbye world"}], + msg="$match with $text should work when it is the first pipeline stage", + ), +] + +MATCH_PIPELINE_SEMANTICS_TESTS_ALL = MATCH_PIPELINE_SEMANTICS_TESTS + MATCH_TEXT_FIRST_STAGE_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_PIPELINE_SEMANTICS_TESTS_ALL)) +def test_match_pipeline_semantics_cases(collection, test_case: StageTestCase): + """Test $match pipeline semantics.""" + if test_case.setup: + test_case.setup(collection) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py new file mode 100644 index 00000000..61fbab19 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_query_operators.py @@ -0,0 +1,326 @@ +"""Tests for $match query operator categories.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Query Operator Support]: each query operator functions correctly +# inside $match as a container. +MATCH_QUERY_OPERATOR_TESTS: list[StageTestCase] = [ + # Comparison operators. + StageTestCase( + "query_comparison_eq", + docs=[{"_id": 1, "a": 5}, {"_id": 2, "a": 10}], + pipeline=[{"$match": {"a": {"$eq": 5}}}], + expected=[{"_id": 1, "a": 5}], + msg="$match should support $eq", + ), + StageTestCase( + "query_comparison_ne", + docs=[{"_id": 1, "a": 5}, {"_id": 2, "a": 10}], + pipeline=[{"$match": {"a": {"$ne": 5}}}], + expected=[{"_id": 2, "a": 10}], + msg="$match should support $ne", + ), + StageTestCase( + "query_comparison_gt", + docs=[ + {"_id": 1, "a": 3}, + {"_id": 2, "a": 7}, + {"_id": 3, "a": 10}, + ], + pipeline=[{"$match": {"a": {"$gt": 5}}}], + expected=[{"_id": 2, "a": 7}, {"_id": 3, "a": 10}], + msg="$match should support $gt", + ), + StageTestCase( + "query_comparison_gte", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$gte": 5}}}], + expected=[{"_id": 2, "a": 5}, {"_id": 3, "a": 10}], + msg="$match should support $gte", + ), + StageTestCase( + "query_comparison_lt", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 7}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$lt": 5}}}], + expected=[{"_id": 1, "a": 3}], + msg="$match should support $lt", + ), + StageTestCase( + "query_comparison_lte", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$lte": 5}}}], + expected=[{"_id": 1, "a": 3}, {"_id": 2, "a": 5}], + msg="$match should support $lte", + ), + StageTestCase( + "query_comparison_in", + docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + pipeline=[{"$match": {"a": {"$in": [1, 3]}}}], + expected=[{"_id": 1, "a": 1}, {"_id": 3, "a": 3}], + msg="$match should support $in", + ), + StageTestCase( + "query_comparison_nin", + docs=[{"_id": 1, "a": 1}, {"_id": 2, "a": 2}, {"_id": 3, "a": 3}], + pipeline=[{"$match": {"a": {"$nin": [1, 3]}}}], + expected=[{"_id": 2, "a": 2}], + msg="$match should support $nin", + ), + # Logical operators. + StageTestCase( + "query_logical_and", + docs=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 2, "a": 2, "b": 20}, + {"_id": 3, "a": 1, "b": 20}, + ], + pipeline=[{"$match": {"$and": [{"a": 1}, {"b": {"$gte": 10}}]}}], + expected=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 3, "a": 1, "b": 20}, + ], + msg="$match should support $and", + ), + StageTestCase( + "query_logical_or", + docs=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 2, "a": 2, "b": 20}, + {"_id": 3, "a": 3, "b": 30}, + ], + pipeline=[{"$match": {"$or": [{"a": 1}, {"b": 30}]}}], + expected=[ + {"_id": 1, "a": 1, "b": 10}, + {"_id": 3, "a": 3, "b": 30}, + ], + msg="$match should support $or", + ), + StageTestCase( + "query_logical_not", + docs=[{"_id": 1, "a": 3}, {"_id": 2, "a": 7}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$not": {"$gt": 5}}}}], + expected=[{"_id": 1, "a": 3}], + msg="$match should support $not", + ), + StageTestCase( + "query_logical_nor", + docs=[ + {"_id": 1, "a": 1}, + {"_id": 2, "a": 2}, + {"_id": 3, "a": 3}, + ], + pipeline=[{"$match": {"$nor": [{"a": 1}, {"a": 3}]}}], + expected=[{"_id": 2, "a": 2}], + msg="$match should support $nor", + ), + # Data type operators. + StageTestCase( + "query_datatype_exists", + docs=[{"_id": 1, "a": 10}, {"_id": 2, "b": 20}, {"_id": 3, "a": 30}], + pipeline=[{"$match": {"a": {"$exists": True}}}], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 30}], + msg="$match should support $exists", + ), + StageTestCase( + "query_datatype_type", + docs=[{"_id": 1, "a": 10}, {"_id": 2, "a": "hello"}, {"_id": 3, "a": 3.14}], + pipeline=[{"$match": {"a": {"$type": "string"}}}], + expected=[{"_id": 2, "a": "hello"}], + msg="$match should support $type", + ), + # Miscellaneous operators. + StageTestCase( + "query_misc_expr", + docs=[ + {"_id": 1, "a": 10, "b": 10}, + {"_id": 2, "a": 20, "b": 30}, + {"_id": 3, "a": 5, "b": 5}, + ], + pipeline=[{"$match": {"$expr": {"$eq": ["$a", "$b"]}}}], + expected=[ + {"_id": 1, "a": 10, "b": 10}, + {"_id": 3, "a": 5, "b": 5}, + ], + msg="$match should support $expr", + ), + StageTestCase( + "query_misc_jsonschema", + docs=[{"_id": 1, "a": 10}, {"_id": 2, "a": "hello"}, {"_id": 3}], + pipeline=[ + { + "$match": { + "$jsonSchema": {"required": ["a"], "properties": {"a": {"bsonType": "int"}}} + } + } + ], + expected=[{"_id": 1, "a": 10}], + msg="$match should support $jsonSchema", + ), + StageTestCase( + "query_misc_mod", + docs=[{"_id": 1, "a": 4}, {"_id": 2, "a": 6}, {"_id": 3, "a": 10}], + pipeline=[{"$match": {"a": {"$mod": [3, 1]}}}], + expected=[{"_id": 1, "a": 4}, {"_id": 3, "a": 10}], + msg="$match should support $mod", + ), + StageTestCase( + "query_misc_regex", + docs=[{"_id": 1, "s": "abc"}, {"_id": 2, "s": "xyz"}, {"_id": 3, "s": "abz"}], + pipeline=[{"$match": {"s": {"$regex": "^ab"}}}], + expected=[{"_id": 1, "s": "abc"}, {"_id": 3, "s": "abz"}], + msg="$match should support $regex", + ), + # Array operators. + StageTestCase( + "query_array_all", + docs=[ + {"_id": 1, "arr": [1, 2, 3]}, + {"_id": 2, "arr": [1, 3]}, + {"_id": 3, "arr": [2, 3]}, + ], + pipeline=[{"$match": {"arr": {"$all": [1, 3]}}}], + expected=[{"_id": 1, "arr": [1, 2, 3]}, {"_id": 2, "arr": [1, 3]}], + msg="$match should support $all", + ), + StageTestCase( + "query_array_elemmatch", + docs=[ + {"_id": 1, "arr": [0.5, 0.8, 0.95]}, + {"_id": 2, "arr": [0.1, 0.3]}, + {"_id": 3, "arr": [0.9, 1.0]}, + ], + pipeline=[{"$match": {"arr": {"$elemMatch": {"$gte": 0.9}}}}], + expected=[ + {"_id": 1, "arr": [0.5, 0.8, 0.95]}, + {"_id": 3, "arr": [0.9, 1.0]}, + ], + msg="$match should support $elemMatch", + ), + StageTestCase( + "query_array_size", + docs=[ + {"_id": 1, "arr": [1, 2, 3]}, + {"_id": 2, "arr": [1]}, + {"_id": 3, "arr": [1, 2]}, + ], + pipeline=[{"$match": {"arr": {"$size": 2}}}], + expected=[{"_id": 3, "arr": [1, 2]}], + msg="$match should support $size", + ), + # Bitwise operators. + StageTestCase( + "query_bitwise_bitsallclear", + docs=[ + {"_id": 1, "flags": 7}, + {"_id": 2, "flags": 8}, + {"_id": 3, "flags": 15}, + ], + # Bitmask 7 (binary 0111): flags 8 (1000) has all bits clear. + pipeline=[{"$match": {"flags": {"$bitsAllClear": 7}}}], + expected=[{"_id": 2, "flags": 8}], + msg="$match should support $bitsAllClear", + ), + StageTestCase( + "query_bitwise_bitsallset", + docs=[ + {"_id": 1, "flags": 7}, + {"_id": 2, "flags": 3}, + {"_id": 3, "flags": 15}, + ], + # Bitmask 5 (binary 0101): flags 7 (0111) and 15 (1111) match. + pipeline=[{"$match": {"flags": {"$bitsAllSet": 5}}}], + expected=[{"_id": 1, "flags": 7}, {"_id": 3, "flags": 15}], + msg="$match should support $bitsAllSet", + ), + StageTestCase( + "query_bitwise_bitsanyclear", + docs=[ + {"_id": 1, "flags": 7}, + {"_id": 2, "flags": 3}, + {"_id": 3, "flags": 15}, + ], + # Bitmask 12 (binary 1100): flags 7 (0111) and 3 (0011) have at least one clear. + pipeline=[{"$match": {"flags": {"$bitsAnyClear": 12}}}], + expected=[{"_id": 1, "flags": 7}, {"_id": 2, "flags": 3}], + msg="$match should support $bitsAnyClear", + ), + StageTestCase( + "query_bitwise_bitsanyset", + docs=[ + {"_id": 1, "flags": 4}, + {"_id": 2, "flags": 8}, + {"_id": 3, "flags": 16}, + ], + # Bitmask 6 (binary 0110): flags 4 (0100) has bit 2 set. + pipeline=[{"$match": {"flags": {"$bitsAnySet": 6}}}], + expected=[{"_id": 1, "flags": 4}], + msg="$match should support $bitsAnySet", + ), + # Geospatial operators. + StageTestCase( + "query_geo_geointersects", + docs=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + pipeline=[ + { + "$match": { + "loc": { + "$geoIntersects": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + } + } + } + } + } + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="$match should support $geoIntersects", + ), + StageTestCase( + "query_geo_geowithin", + docs=[ + {"_id": 1, "loc": [0, 0]}, + {"_id": 2, "loc": [50, 50]}, + {"_id": 3, "loc": [1, 1]}, + ], + pipeline=[{"$match": {"loc": {"$geoWithin": {"$center": [[0, 0], 10]}}}}], + expected=[{"_id": 1, "loc": [0, 0]}, {"_id": 3, "loc": [1, 1]}], + msg="$match should support $geoWithin", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_QUERY_OPERATOR_TESTS)) +def test_match_query_operator_cases(collection, test_case: StageTestCase): + """Test $match query operator categories.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py b/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py deleted file mode 100644 index 256a62fc..00000000 --- a/documentdb_tests/compatibility/tests/core/operator/stages/match/test_match_stage.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Aggregation $match stage tests. - -Tests for the $match stage in aggregation pipelines. -""" - -import pytest - -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command - - -@pytest.mark.aggregate -@pytest.mark.smoke -def test_match_simple_filter(collection): - """Test $match stage with simple equality filter.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": 30, "c": "active"}, - {"_id": 1, "a": "B", "b": 25, "c": "active"}, - {"_id": 2, "a": "C", "b": 35, "c": "inactive"}, - ] - ) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": [{"$match": {"c": "active"}}], "cursor": {}}, - ) - - expected = [ - {"_id": 0, "a": "A", "b": 30, "c": "active"}, - {"_id": 1, "a": "B", "b": 25, "c": "active"}, - ] - assertSuccess(result, expected, "Should match active documents") - - -@pytest.mark.aggregate -def test_match_with_comparison_operator(collection): - """Test $match stage with comparison operators.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": 30}, - {"_id": 1, "a": "B", "b": 25}, - {"_id": 2, "a": "C", "b": 35}, - ] - ) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": [{"$match": {"b": {"$gt": 25}}}], "cursor": {}}, - ) - - expected = [{"_id": 0, "a": "A", "b": 30}, {"_id": 2, "a": "C", "b": 35}] - assertSuccess(result, expected, "Should match documents with b > 25") - - -@pytest.mark.aggregate -def test_match_multiple_conditions(collection): - """Test $match stage with multiple filter conditions.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": 30, "c": "NYC"}, - {"_id": 1, "a": "B", "b": 25, "c": "SF"}, - {"_id": 2, "a": "C", "b": 35, "c": "SF"}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [{"$match": {"c": "NYC", "b": {"$gte": 30}}}], - "cursor": {}, - }, - ) - - expected = [{"_id": 0, "a": "A", "b": 30, "c": "NYC"}] - assertSuccess(result, expected, "Should match multiple conditions") - - -@pytest.mark.aggregate -def test_match_empty_result(collection): - """Test $match stage that matches no documents.""" - collection.insert_many( - [ - {"_id": 0, "a": "A", "b": "active"}, - {"_id": 1, "a": "B", "b": "active"}, - ] - ) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": [{"$match": {"b": "inactive"}}], "cursor": {}}, - ) - - assertSuccess(result, [], "Should return empty result when no match") diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py new file mode 100644 index 00000000..61f17382 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_match.py @@ -0,0 +1,135 @@ +"""Tests for $match composing with other stages at different pipeline positions.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Position]: $match filters correctly when composed with +# preceding stages that reshape documents. +MATCH_PIPELINE_POSITION_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_middle_stage", + docs=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 2, "a": 20, "b": "y"}, + {"_id": 3, "a": 10, "b": "z"}, + ], + pipeline=[ + {"$project": {"a": 1}}, + {"$match": {"a": 10}}, + {"$project": {"a": 1}}, + ], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should work as a middle stage of a pipeline", + ), + StageTestCase( + "pipeline_last_stage", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + {"_id": 3, "a": 10}, + ], + pipeline=[ + {"$project": {"a": 1}}, + {"$match": {"a": 10}}, + ], + expected=[{"_id": 1, "a": 10}, {"_id": 3, "a": 10}], + msg="$match should work as the last stage of a pipeline", + ), + StageTestCase( + "pipeline_after_reshape_dropped_field", + docs=[ + {"_id": 1, "a": 10, "b": "x"}, + {"_id": 2, "a": 20, "b": "y"}, + ], + pipeline=[ + {"$project": {"b": 1}}, + {"$match": {"a": 10}}, + ], + expected=[], + msg="$match on a field dropped by a preceding stage should return empty", + ), + StageTestCase( + "pipeline_after_reshape_computed_field", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 20}, + ], + pipeline=[ + {"$project": {"doubled": {"$multiply": ["$a", 2]}}}, + {"$match": {"doubled": 40}}, + ], + expected=[{"_id": 2, "doubled": 40}], + msg="$match should filter on fields computed by a preceding stage", + ), + StageTestCase( + "pipeline_after_aggregation_computed_field", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "b", "val": 3}, + {"_id": 3, "cat": "a", "val": 7}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$match": {"total": 12}}, + ], + expected=[{"_id": "a", "total": 12}], + msg="$match should filter on fields produced by an aggregation stage", + ), + StageTestCase( + "pipeline_after_aggregation_dropped_field", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "b", "val": 3}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$match": {"val": 5}}, + ], + expected=[], + msg="$match on a field absent from aggregation output should return empty", + ), + StageTestCase( + "pipeline_after_root_replacement", + docs=[ + {"_id": 1, "inner": {"x": 10}}, + {"_id": 2, "inner": {"x": 20}}, + ], + pipeline=[ + {"$replaceRoot": {"newRoot": "$inner"}}, + {"$match": {"x": 10}}, + ], + expected=[{"x": 10}], + msg="$match should filter on the document shape produced by a root replacement stage", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(MATCH_PIPELINE_POSITION_TESTS)) +def test_stage_position_match_cases(collection, test_case: StageTestCase): + """Test $match composing with other stages at different pipeline positions.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py new file mode 100644 index 00000000..a06d47b7 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py @@ -0,0 +1,17 @@ +""" +Shared test case for pipeline stage tests. +""" + +from dataclasses import dataclass +from typing import Any, Callable, Optional + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class StageTestCase(BaseTestCase): + """Test case for pipeline stage tests.""" + + docs: Optional[list[dict[str, Any]]] = None + pipeline: Optional[list[dict[str, Any]]] = None + setup: Optional[Callable] = None diff --git a/documentdb_tests/framework/assertions.py b/documentdb_tests/framework/assertions.py index 630232c9..7e40331b 100644 --- a/documentdb_tests/framework/assertions.py +++ b/documentdb_tests/framework/assertions.py @@ -17,6 +17,11 @@ _NUMERIC_BSON_TYPES = (int, float, Int64, Decimal128) +def _sort_if_list(value: Any) -> Any: + """Return a sorted copy if value is a list, otherwise return it unchanged.""" + return sorted(value, key=lambda x: str(x)) if isinstance(value, list) else value + + def _strict_equal(a: Any, b: Any) -> bool: """Equality with stricter semantics for BSON numeric types. @@ -86,7 +91,7 @@ def assertSuccess( raw_res: If asserting raw result. False by default, only compare content of ["cursor"]["firstBatch"] transform: Optional callback to transform result before comparison - ignore_doc_order: If True, compare lists as sets (order-independent) + ignore_doc_order: If True, compare lists ignoring order (duplicates still matter) """ if isinstance(result, Exception): if isinstance(result, _INFRA_TYPES): @@ -105,13 +110,11 @@ def assertSuccess( error_text += f"\n\nExpected:\n{pprint.pformat(expected, width=100)}" error_text += f"\n\nActual:\n{pprint.pformat(result, width=100)}\n" - if ignore_doc_order and isinstance(result, list) and isinstance(expected, list): - assert _strict_equal( - sorted(result, key=lambda x: str(x)), - sorted(expected, key=lambda x: str(x)), - ), error_text - else: - assert _strict_equal(result, expected), error_text + if ignore_doc_order: + result = _sort_if_list(result) + expected = _sort_if_list(expected) + + assert _strict_equal(result, expected), error_text def assertSuccessPartial( @@ -194,6 +197,7 @@ def assertResult( expected: Any = None, error_code: Optional[int] = None, msg: Optional[str] = None, + ignore_doc_order: bool = False, ): """ Universal assertion that handles success and error cases. @@ -203,6 +207,7 @@ def assertResult( expected: Expected result documents (for success cases) error_code: Expected error code (for error cases) msg: Custom assertion message (optional) + ignore_doc_order: If True, compare lists ignoring order (duplicates still matter) Usage: assertResult(result, expected=[{"_id": 1}]) # Success case @@ -211,4 +216,4 @@ def assertResult( if error_code is not None: assertFailureCode(result, error_code, msg) else: - assertSuccess(result, expected, msg) + assertSuccess(result, expected, msg, ignore_doc_order=ignore_doc_order)