From 7444540960d05818d397106cfb7068e6a493e000 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 14:13:21 -0700 Subject: [PATCH 1/6] Add $sort stage tests Signed-off-by: Daniel Frankcom --- .../tests/core/operator/stages/__init__.py | 0 .../core/operator/stages/sort/__init__.py | 0 .../operator/stages/sort/test_sort_arrays.py | 225 ++++++++++ .../stages/sort/test_sort_basic_ordering.py | 408 ++++++++++++++++++ .../stages/sort/test_sort_direction_values.py | 161 +++++++ .../stages/sort/test_sort_field_paths.py | 175 ++++++++ .../stages/sort/test_sort_key_resolution.py | 161 +++++++ .../stages/sort/test_sort_meta_errors.py | 252 +++++++++++ .../operator/stages/sort/test_sort_numeric.py | 317 ++++++++++++++ .../stages/sort/test_sort_order_errors.py | 350 +++++++++++++++ .../stages/sort/test_sort_spec_errors.py | 317 ++++++++++++++ .../stages/sort/test_sort_type_comparison.py | 281 ++++++++++++ .../stages/test_stages_position_sort.py | 221 ++++++++++ .../core/operator/stages/utils/__init__.py | 0 .../operator/stages/utils/stage_test_case.py | 20 + documentdb_tests/framework/bson_helpers.py | 35 ++ documentdb_tests/framework/error_codes.py | 22 + documentdb_tests/framework/test_constants.py | 7 + 18 files changed, 2952 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_arrays.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_basic_ordering.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_meta_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_numeric.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_order_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_spec_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_type_comparison.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_sort.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py create mode 100644 documentdb_tests/framework/bson_helpers.py diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_arrays.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_arrays.py new file mode 100644 index 00000000..1dba46a6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_arrays.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import pytest +from bson import MaxKey, MinKey + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Array Sort Key Extraction]: in ascending sort the sort key for an +# array field is the minimum element by BSON comparison order, and in +# descending sort it is the maximum element. +SORT_ARRAY_KEY_TESTS: list[StageTestCase] = [ + StageTestCase( + "array_key_asc_min_element", + docs=[ + {"_id": 1, "v": [10, 30]}, + {"_id": 2, "v": 5}, + {"_id": 3, "v": [25, 20, 15]}, + {"_id": 4, "v": 12}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 2, "v": 5}, + {"_id": 1, "v": [10, 30]}, + {"_id": 4, "v": 12}, + {"_id": 3, "v": [25, 20, 15]}, + ], + msg="$sort ascending should use the minimum array element as the sort key", + ), + StageTestCase( + "array_key_desc_max_element", + docs=[ + {"_id": 1, "v": [10, 30]}, + {"_id": 2, "v": 5}, + {"_id": 3, "v": [25, 20, 15]}, + {"_id": 4, "v": 12}, + ], + pipeline=[{"$sort": {"v": -1, "_id": 1}}], + expected=[ + {"_id": 1, "v": [10, 30]}, + {"_id": 3, "v": [25, 20, 15]}, + {"_id": 4, "v": 12}, + {"_id": 2, "v": 5}, + ], + msg="$sort descending should use the maximum array element as the sort key", + ), + StageTestCase( + "array_key_singleton_equiv_scalar", + docs=[ + {"_id": 1, "v": [5]}, + {"_id": 2, "v": 5}, + {"_id": 3, "v": 3}, + {"_id": 4, "v": [3]}, + {"_id": 5, "v": 8}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 3, "v": 3}, + {"_id": 4, "v": [3]}, + {"_id": 1, "v": [5]}, + {"_id": 2, "v": 5}, + {"_id": 5, "v": 8}, + ], + msg="$sort should sort singleton arrays equivalently to their scalar value", + ), + StageTestCase( + "array_key_heterogeneous_asc", + docs=[ + # Min element is None (null < number < string < boolean). + {"_id": 5, "v": [1, "hello", None, True]}, + {"_id": 4, "v": None}, + {"_id": 3, "v": 0}, + {"_id": 2, "v": "abc"}, + {"_id": 1, "v": True}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 4, "v": None}, + {"_id": 5, "v": [1, "hello", None, True]}, + {"_id": 3, "v": 0}, + {"_id": 2, "v": "abc"}, + {"_id": 1, "v": True}, + ], + msg=( + "$sort ascending should extract null as min from a heterogeneous" + " array using BSON type ordering" + ), + ), + StageTestCase( + "array_key_heterogeneous_desc", + docs=[ + # Max element is True (boolean > string > number > null). + {"_id": 5, "v": [1, "hello", None, True]}, + {"_id": 4, "v": None}, + {"_id": 3, "v": 0}, + {"_id": 2, "v": "abc"}, + {"_id": 1, "v": True}, + ], + pipeline=[{"$sort": {"v": -1, "_id": 1}}], + expected=[ + {"_id": 1, "v": True}, + {"_id": 5, "v": [1, "hello", None, True]}, + {"_id": 2, "v": "abc"}, + {"_id": 3, "v": 0}, + {"_id": 4, "v": None}, + ], + msg=( + "$sort descending should extract True as max from a heterogeneous" + " array using BSON type ordering" + ), + ), + StageTestCase( + "array_key_large_array_asc", + docs=[ + {"_id": 1, "v": list(range(50, 150))}, + {"_id": 2, "v": 49}, + {"_id": 3, "v": 51}, + {"_id": 4, "v": 150}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 2, "v": 49}, + {"_id": 1, "v": list(range(50, 150))}, + {"_id": 3, "v": 51}, + {"_id": 4, "v": 150}, + ], + msg="$sort ascending should correctly extract min from a 100-element array", + ), + StageTestCase( + "array_key_large_array_desc", + docs=[ + {"_id": 1, "v": list(range(50, 150))}, + {"_id": 2, "v": 49}, + {"_id": 3, "v": 51}, + {"_id": 4, "v": 150}, + ], + pipeline=[{"$sort": {"v": -1, "_id": 1}}], + expected=[ + {"_id": 4, "v": 150}, + {"_id": 1, "v": list(range(50, 150))}, + {"_id": 3, "v": 51}, + {"_id": 2, "v": 49}, + ], + msg="$sort descending should correctly extract max from a 100-element array", + ), + StageTestCase( + "array_key_nested_array_not_unwrapped", + docs=[ + {"_id": 1, "v": [[1, 2]]}, + {"_id": 2, "v": {"a": 1}}, + {"_id": 3, "v": MaxKey()}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": {"a": 1}}, + {"_id": 1, "v": [[1, 2]]}, + {"_id": 3, "v": MaxKey()}, + ], + msg=( + "$sort should compare nested arrays as array type rather than" + " unwrapping to scalar elements" + ), + ), + StageTestCase( + "array_key_single_bool_true", + docs=[ + {"_id": 1, "v": [True]}, + {"_id": 2, "v": "abc"}, + {"_id": 3, "v": 5}, + {"_id": 4, "v": True}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 3, "v": 5}, + {"_id": 2, "v": "abc"}, + {"_id": 1, "v": [True]}, + {"_id": 4, "v": True}, + ], + msg="$sort should place [true] at the boolean BSON type position", + ), + StageTestCase( + "array_key_empty_array_before_null", + docs=[ + {"_id": 1, "v": None}, + {"_id": 2, "v": []}, + {"_id": 3}, + {"_id": 4, "v": MinKey()}, + {"_id": 5, "v": 1}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 4, "v": MinKey()}, + {"_id": 2, "v": []}, + {"_id": 1, "v": None}, + {"_id": 3}, + {"_id": 5, "v": 1}, + ], + msg="$sort should place empty array before null and missing in ascending order", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_ARRAY_KEY_TESTS)) +def test_sort_arrays(collection, test_case: StageTestCase): + """Test $sort array key extraction.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_basic_ordering.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_basic_ordering.py new file mode 100644 index 00000000..0334e155 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_basic_ordering.py @@ -0,0 +1,408 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Ascending and Descending Sort]: sort order 1 sorts documents in +# ascending order and sort order -1 sorts documents in descending order by the +# specified field, for all BSON types that support ordering. +SORT_ASC_DESC_TESTS: list[StageTestCase] = [ + StageTestCase( + "asc_int", + docs=[{"_id": 1, "v": 10}, {"_id": 2, "v": 20}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": 10}, {"_id": 2, "v": 20}], + msg="$sort with order 1 should sort int32 in ascending order", + ), + StageTestCase( + "desc_int", + docs=[{"_id": 1, "v": 10}, {"_id": 2, "v": 20}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": 20}, {"_id": 1, "v": 10}], + msg="$sort with order -1 should sort int32 in descending order", + ), + StageTestCase( + "asc_int64", + docs=[{"_id": 1, "v": Int64(10)}, {"_id": 2, "v": Int64(20)}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": Int64(10)}, {"_id": 2, "v": Int64(20)}], + msg="$sort with order 1 should sort Int64 in ascending order", + ), + StageTestCase( + "desc_int64", + docs=[{"_id": 1, "v": Int64(10)}, {"_id": 2, "v": Int64(20)}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": Int64(20)}, {"_id": 1, "v": Int64(10)}], + msg="$sort with order -1 should sort Int64 in descending order", + ), + StageTestCase( + "asc_double", + docs=[{"_id": 1, "v": 1.5}, {"_id": 2, "v": 2.5}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": 1.5}, {"_id": 2, "v": 2.5}], + msg="$sort with order 1 should sort doubles in ascending order", + ), + StageTestCase( + "desc_double", + docs=[{"_id": 1, "v": 1.5}, {"_id": 2, "v": 2.5}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": 2.5}, {"_id": 1, "v": 1.5}], + msg="$sort with order -1 should sort doubles in descending order", + ), + StageTestCase( + "asc_decimal128", + docs=[{"_id": 1, "v": Decimal128("10")}, {"_id": 2, "v": Decimal128("20")}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": Decimal128("10")}, {"_id": 2, "v": Decimal128("20")}], + msg="$sort with order 1 should sort Decimal128 in ascending order", + ), + StageTestCase( + "desc_decimal128", + docs=[{"_id": 1, "v": Decimal128("10")}, {"_id": 2, "v": Decimal128("20")}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": Decimal128("20")}, {"_id": 1, "v": Decimal128("10")}], + msg="$sort with order -1 should sort Decimal128 in descending order", + ), + StageTestCase( + "asc_string", + docs=[{"_id": 1, "v": "apple"}, {"_id": 2, "v": "banana"}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": "apple"}, {"_id": 2, "v": "banana"}], + msg="$sort with order 1 should sort strings in ascending order", + ), + StageTestCase( + "desc_string", + docs=[{"_id": 1, "v": "apple"}, {"_id": 2, "v": "banana"}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": "banana"}, {"_id": 1, "v": "apple"}], + msg="$sort with order -1 should sort strings in descending order", + ), + StageTestCase( + "asc_objectid", + docs=[ + {"_id": 1, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + {"_id": 2, "v": ObjectId("bbbbbbbbbbbbbbbbbbbbbbbb")}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 1, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + {"_id": 2, "v": ObjectId("bbbbbbbbbbbbbbbbbbbbbbbb")}, + ], + msg="$sort with order 1 should sort ObjectId in ascending order", + ), + StageTestCase( + "desc_objectid", + docs=[ + {"_id": 1, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + {"_id": 2, "v": ObjectId("bbbbbbbbbbbbbbbbbbbbbbbb")}, + ], + pipeline=[{"$sort": {"v": -1}}], + expected=[ + {"_id": 2, "v": ObjectId("bbbbbbbbbbbbbbbbbbbbbbbb")}, + {"_id": 1, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + ], + msg="$sort with order -1 should sort ObjectId in descending order", + ), + StageTestCase( + "asc_boolean", + docs=[{"_id": 1, "v": False}, {"_id": 2, "v": True}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": False}, {"_id": 2, "v": True}], + msg="$sort with order 1 should sort booleans in ascending order", + ), + StageTestCase( + "desc_boolean", + docs=[{"_id": 1, "v": False}, {"_id": 2, "v": True}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": True}, {"_id": 1, "v": False}], + msg="$sort with order -1 should sort booleans in descending order", + ), + StageTestCase( + "asc_datetime", + docs=[ + {"_id": 1, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 2, "v": datetime(2025, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 1, "v": datetime(2024, 1, 1)}, + {"_id": 2, "v": datetime(2025, 1, 1)}, + ], + msg="$sort with order 1 should sort datetimes in ascending order", + ), + StageTestCase( + "desc_datetime", + docs=[ + {"_id": 1, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 2, "v": datetime(2025, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[{"$sort": {"v": -1}}], + expected=[ + {"_id": 2, "v": datetime(2025, 1, 1)}, + {"_id": 1, "v": datetime(2024, 1, 1)}, + ], + msg="$sort with order -1 should sort datetimes in descending order", + ), + StageTestCase( + "asc_timestamp", + docs=[{"_id": 1, "v": Timestamp(100, 1)}, {"_id": 2, "v": Timestamp(200, 1)}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": Timestamp(100, 1)}, {"_id": 2, "v": Timestamp(200, 1)}], + msg="$sort with order 1 should sort timestamps in ascending order", + ), + StageTestCase( + "desc_timestamp", + docs=[{"_id": 1, "v": Timestamp(100, 1)}, {"_id": 2, "v": Timestamp(200, 1)}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": Timestamp(200, 1)}, {"_id": 1, "v": Timestamp(100, 1)}], + msg="$sort with order -1 should sort timestamps in descending order", + ), + StageTestCase( + "asc_embedded_doc", + docs=[{"_id": 1, "v": {"a": 1}}, {"_id": 2, "v": {"a": 2}}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": {"a": 1}}, {"_id": 2, "v": {"a": 2}}], + msg="$sort with order 1 should sort embedded documents in ascending order", + ), + StageTestCase( + "desc_embedded_doc", + docs=[{"_id": 1, "v": {"a": 1}}, {"_id": 2, "v": {"a": 2}}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": {"a": 2}}, {"_id": 1, "v": {"a": 1}}], + msg="$sort with order -1 should sort embedded documents in descending order", + ), + StageTestCase( + "asc_binary", + docs=[{"_id": 1, "v": Binary(b"\x01")}, {"_id": 2, "v": Binary(b"\x02")}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": b"\x01"}, {"_id": 2, "v": b"\x02"}], + msg="$sort with order 1 should sort binary in ascending order", + ), + StageTestCase( + "desc_binary", + docs=[{"_id": 1, "v": Binary(b"\x01")}, {"_id": 2, "v": Binary(b"\x02")}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": b"\x02"}, {"_id": 1, "v": b"\x01"}], + msg="$sort with order -1 should sort binary in descending order", + ), + StageTestCase( + "asc_regex", + docs=[{"_id": 1, "v": Regex("a")}, {"_id": 2, "v": Regex("b")}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": Regex("a")}, {"_id": 2, "v": Regex("b")}], + msg="$sort with order 1 should sort regex in ascending order", + ), + StageTestCase( + "desc_regex", + docs=[{"_id": 1, "v": Regex("a")}, {"_id": 2, "v": Regex("b")}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": Regex("b")}, {"_id": 1, "v": Regex("a")}], + msg="$sort with order -1 should sort regex in descending order", + ), + StageTestCase( + "asc_code", + docs=[{"_id": 1, "v": Code("a")}, {"_id": 2, "v": Code("b")}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": Code("a")}, {"_id": 2, "v": Code("b")}], + msg="$sort with order 1 should sort code in ascending order", + ), + StageTestCase( + "desc_code", + docs=[{"_id": 1, "v": Code("a")}, {"_id": 2, "v": Code("b")}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": Code("b")}, {"_id": 1, "v": Code("a")}], + msg="$sort with order -1 should sort code in descending order", + ), + StageTestCase( + "asc_codewithscope", + docs=[{"_id": 1, "v": Code("a", {})}, {"_id": 2, "v": Code("b", {})}], + pipeline=[{"$sort": {"v": 1}}], + expected=[{"_id": 1, "v": Code("a", {})}, {"_id": 2, "v": Code("b", {})}], + msg="$sort with order 1 should sort code with scope in ascending order", + ), + StageTestCase( + "desc_codewithscope", + docs=[{"_id": 1, "v": Code("a", {})}, {"_id": 2, "v": Code("b", {})}], + pipeline=[{"$sort": {"v": -1}}], + expected=[{"_id": 2, "v": Code("b", {})}, {"_id": 1, "v": Code("a", {})}], + msg="$sort with order -1 should sort code with scope in descending order", + ), +] + +# Property [Equal Value Sort]: sorting documents with identical sort-field +# values succeeds without error for every BSON type, with _id as tiebreaker. +SORT_EQUAL_VALUE_TESTS: list[StageTestCase] = [ + StageTestCase( + "equal_null", + docs=[{"_id": 2, "v": None}, {"_id": 1, "v": None}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": None}, {"_id": 2, "v": None}], + msg="$sort should handle equal null values with _id tiebreaker", + ), + StageTestCase( + "equal_int", + docs=[{"_id": 2, "v": 5}, {"_id": 1, "v": 5}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": 5}, {"_id": 2, "v": 5}], + msg="$sort should handle equal int32 values with _id tiebreaker", + ), + StageTestCase( + "equal_int64", + docs=[{"_id": 2, "v": Int64(5)}, {"_id": 1, "v": Int64(5)}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": Int64(5)}, {"_id": 2, "v": Int64(5)}], + msg="$sort should handle equal Int64 values with _id tiebreaker", + ), + StageTestCase( + "equal_double", + docs=[{"_id": 2, "v": 1.5}, {"_id": 1, "v": 1.5}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": 1.5}, {"_id": 2, "v": 1.5}], + msg="$sort should handle equal double values with _id tiebreaker", + ), + StageTestCase( + "equal_decimal128", + docs=[{"_id": 2, "v": Decimal128("5")}, {"_id": 1, "v": Decimal128("5")}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": Decimal128("5")}, {"_id": 2, "v": Decimal128("5")}], + msg="$sort should handle equal Decimal128 values with _id tiebreaker", + ), + StageTestCase( + "equal_string", + docs=[{"_id": 2, "v": "abc"}, {"_id": 1, "v": "abc"}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": "abc"}, {"_id": 2, "v": "abc"}], + msg="$sort should handle equal string values with _id tiebreaker", + ), + StageTestCase( + "equal_objectid", + docs=[ + {"_id": 2, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + {"_id": 1, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 1, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + {"_id": 2, "v": ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa")}, + ], + msg="$sort should handle equal ObjectId values with _id tiebreaker", + ), + StageTestCase( + "equal_boolean", + docs=[{"_id": 2, "v": True}, {"_id": 1, "v": True}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": True}, {"_id": 2, "v": True}], + msg="$sort should handle equal boolean values with _id tiebreaker", + ), + StageTestCase( + "equal_datetime", + docs=[ + {"_id": 2, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 1, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 1, "v": datetime(2024, 1, 1)}, + {"_id": 2, "v": datetime(2024, 1, 1)}, + ], + msg="$sort should handle equal datetime values with _id tiebreaker", + ), + StageTestCase( + "equal_timestamp", + docs=[{"_id": 2, "v": Timestamp(100, 1)}, {"_id": 1, "v": Timestamp(100, 1)}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": Timestamp(100, 1)}, {"_id": 2, "v": Timestamp(100, 1)}], + msg="$sort should handle equal Timestamp values with _id tiebreaker", + ), + StageTestCase( + "equal_embedded_doc", + docs=[{"_id": 2, "v": {"a": 1}}, {"_id": 1, "v": {"a": 1}}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": {"a": 1}}, {"_id": 2, "v": {"a": 1}}], + msg="$sort should handle equal embedded document values with _id tiebreaker", + ), + StageTestCase( + "equal_binary", + docs=[{"_id": 2, "v": Binary(b"\x01")}, {"_id": 1, "v": Binary(b"\x01")}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": b"\x01"}, {"_id": 2, "v": b"\x01"}], + msg="$sort should handle equal Binary values with _id tiebreaker", + ), + StageTestCase( + "equal_regex", + docs=[{"_id": 2, "v": Regex("a")}, {"_id": 1, "v": Regex("a")}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": Regex("a")}, {"_id": 2, "v": Regex("a")}], + msg="$sort should handle equal Regex values with _id tiebreaker", + ), + StageTestCase( + "equal_code", + docs=[{"_id": 2, "v": Code("f")}, {"_id": 1, "v": Code("f")}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": Code("f")}, {"_id": 2, "v": Code("f")}], + msg="$sort should handle equal Code values with _id tiebreaker", + ), + StageTestCase( + "equal_codewithscope", + docs=[{"_id": 2, "v": Code("f", {})}, {"_id": 1, "v": Code("f", {})}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": Code("f", {})}, {"_id": 2, "v": Code("f", {})}], + msg="$sort should handle equal CodeWithScope values with _id tiebreaker", + ), + StageTestCase( + "equal_minkey", + docs=[{"_id": 2, "v": MinKey()}, {"_id": 1, "v": MinKey()}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": MinKey()}, {"_id": 2, "v": MinKey()}], + msg="$sort should handle equal MinKey values with _id tiebreaker", + ), + StageTestCase( + "equal_maxkey", + docs=[{"_id": 2, "v": MaxKey()}, {"_id": 1, "v": MaxKey()}], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[{"_id": 1, "v": MaxKey()}, {"_id": 2, "v": MaxKey()}], + msg="$sort should handle equal MaxKey values with _id tiebreaker", + ), +] + +SORT_BASIC_ORDERING_TESTS = SORT_ASC_DESC_TESTS + SORT_EQUAL_VALUE_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_BASIC_ORDERING_TESTS)) +def test_sort_basic_ordering(collection, test_case: StageTestCase): + """Test $sort ascending, descending, and equal-value ordering.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py new file mode 100644 index 00000000..450c4063 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import QUERY_METADATA_NOT_AVAILABLE_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Sort Order Value Acceptance]: int32, int64, double (truncation +# toward zero yields 1 or -1), and Decimal128 (banker's rounding of abs +# yields 1) are accepted as sort direction values. +SORT_ORDER_VALUE_ACCEPTANCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "accept_int64_asc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Int64(1)}}], + expected=[{"_id": 2, "v": 10}, {"_id": 1, "v": 20}], + msg="$sort should accept Int64(1) as ascending sort direction", + ), + StageTestCase( + "accept_int64_desc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Int64(-1)}}], + expected=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + msg="$sort should accept Int64(-1) as descending sort direction", + ), + StageTestCase( + "accept_double_truncated_to_asc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": 1.999}}], + expected=[{"_id": 2, "v": 10}, {"_id": 1, "v": 20}], + msg="$sort should accept double 1.999 truncated toward zero to 1", + ), + StageTestCase( + "accept_double_truncated_to_desc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": -1.999}}], + expected=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + msg="$sort should accept double -1.999 truncated toward zero to -1", + ), + StageTestCase( + "accept_decimal128_asc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Decimal128("1")}}], + expected=[{"_id": 2, "v": 10}, {"_id": 1, "v": 20}], + msg="$sort should accept Decimal128('1') as ascending sort direction", + ), + StageTestCase( + "accept_decimal128_desc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Decimal128("-1")}}], + expected=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + msg="$sort should accept Decimal128('-1') as descending sort direction", + ), + StageTestCase( + "accept_decimal128_0_51_rounds_to_asc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Decimal128("0.51")}}], + expected=[{"_id": 2, "v": 10}, {"_id": 1, "v": 20}], + msg="$sort should accept Decimal128('0.51') banker's rounded to 1", + ), + StageTestCase( + "accept_decimal128_1_49_rounds_to_asc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Decimal128("1.49")}}], + expected=[{"_id": 2, "v": 10}, {"_id": 1, "v": 20}], + msg="$sort should accept Decimal128('1.49') banker's rounded to 1", + ), + StageTestCase( + "accept_decimal128_neg_0_51_rounds_to_desc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Decimal128("-0.51")}}], + expected=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + msg="$sort should accept Decimal128('-0.51') banker's rounded to -1", + ), + StageTestCase( + "accept_decimal128_neg_1_49_rounds_to_desc", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": Decimal128("-1.49")}}], + expected=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + msg="$sort should accept Decimal128('-1.49') banker's rounded to -1", + ), +] + +# Property [$meta Sort Acceptance]: valid $meta values are accepted as sort +# order values, and $meta keys do not count toward the 32-key compound sort +# limit. +SORT_META_ACCEPTANCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "accept_meta_randval", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": 1, "r": {"$meta": "randVal"}}}], + expected=[{"_id": 2, "v": 10}, {"_id": 1, "v": 20}], + msg="$sort should accept {$meta: 'randVal'} as a sort order value", + ), + StageTestCase( + "accept_meta_searchscore", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": 1, "s": {"$meta": "searchScore"}}}], + expected=[{"_id": 2, "v": 10}, {"_id": 1, "v": 20}], + msg="$sort should accept {$meta: 'searchScore'} as a sort order value", + ), + StageTestCase( + "accept_meta_textscore", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": 1, "t": {"$meta": "textScore"}}}], + error_code=QUERY_METADATA_NOT_AVAILABLE_ERROR, + msg="$sort should accept {$meta: 'textScore'} as a sort order value", + ), + StageTestCase( + "accept_meta_geoneardistance", + docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], + pipeline=[{"$sort": {"v": 1, "d": {"$meta": "geoNearDistance"}}}], + error_code=QUERY_METADATA_NOT_AVAILABLE_ERROR, + msg="$sort should accept {$meta: 'geoNearDistance'} as a sort order value", + ), + # $meta keys do not count toward the 32-key compound sort limit. + StageTestCase( + "accept_meta_beyond_32_key_limit", + docs=[{"_id": 1, "f0": 20}, {"_id": 2, "f0": 10}], + pipeline=[ + { + "$sort": { + **{f"f{i}": 1 for i in range(32)}, + "meta1": {"$meta": "randVal"}, + "meta2": {"$meta": "searchScore"}, + } + }, + ], + expected=[{"_id": 2, "f0": 10}, {"_id": 1, "f0": 20}], + msg="$sort should not count $meta keys toward the 32-key compound sort limit", + ), +] + +SORT_DIRECTION_VALUE_TESTS = SORT_ORDER_VALUE_ACCEPTANCE_TESTS + SORT_META_ACCEPTANCE_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_DIRECTION_VALUE_TESTS)) +def test_sort_direction_values(collection, test_case: StageTestCase): + """Test $sort accepted direction values.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py new file mode 100644 index 00000000..f85b7240 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from functools import reduce +from typing import Any, cast + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Nested Field Paths]: dot notation traverses into embedded +# documents and arrays of objects for sort key extraction. +SORT_NESTED_FIELD_TESTS: list[StageTestCase] = [ + StageTestCase( + "nested_dot_notation", + docs=[ + {"_id": 1, "a": {"b": 30}}, + {"_id": 2, "a": {"b": 10}}, + ], + pipeline=[{"$sort": {"a.b": 1}}], + expected=[ + {"_id": 2, "a": {"b": 10}}, + {"_id": 1, "a": {"b": 30}}, + ], + msg="$sort should traverse embedded documents via dot notation", + ), + StageTestCase( + "nested_array_of_objects_asc", + docs=[ + {"_id": 3, "a": [{"b": 20}, {"b": 15}]}, + {"_id": 2, "a": [{"b": 25}, {"b": 5}]}, + {"_id": 1, "a": [{"b": 30}, {"b": 10}]}, + ], + pipeline=[{"$sort": {"a.b": 1, "_id": 1}}], + expected=[ + {"_id": 2, "a": [{"b": 25}, {"b": 5}]}, + {"_id": 1, "a": [{"b": 30}, {"b": 10}]}, + {"_id": 3, "a": [{"b": 20}, {"b": 15}]}, + ], + msg=( + "$sort ascending should use min of nested field values" + " extracted from array of objects" + ), + ), + StageTestCase( + "nested_array_of_objects_desc", + docs=[ + {"_id": 3, "a": [{"b": 20}, {"b": 15}]}, + {"_id": 2, "a": [{"b": 25}, {"b": 5}]}, + {"_id": 1, "a": [{"b": 30}, {"b": 10}]}, + ], + pipeline=[{"$sort": {"a.b": -1, "_id": 1}}], + expected=[ + {"_id": 1, "a": [{"b": 30}, {"b": 10}]}, + {"_id": 2, "a": [{"b": 25}, {"b": 5}]}, + {"_id": 3, "a": [{"b": 20}, {"b": 15}]}, + ], + msg=( + "$sort descending should use max of nested field values" + " extracted from array of objects" + ), + ), + StageTestCase( + "nested_non_traversable_intermediate_treated_as_missing", + docs=[ + {"_id": 1, "a": 42}, + {"_id": 2, "a": None}, + {"_id": 3, "a": {"b": 10}}, + {"_id": 4}, + {"_id": 5, "a": {"b": 5}}, + ], + pipeline=[{"$sort": {"a.b": 1, "_id": 1}}], + expected=[ + {"_id": 1, "a": 42}, + {"_id": 2, "a": None}, + {"_id": 4}, + {"_id": 5, "a": {"b": 5}}, + {"_id": 3, "a": {"b": 10}}, + ], + msg="$sort should treat scalar and null at an intermediate path level as missing", + ), + # The server limits document nesting to 180 levels, so the sort path + # can only be verified up to that depth. The 200-component path test + # below only proves acceptance (all values are missing). + StageTestCase( + "nested_max_depth_sort", + docs=[ + {"_id": 1, **reduce(lambda v, k: {k: v}, reversed(["a"] * 180), cast(Any, 20))}, + {"_id": 2, **reduce(lambda v, k: {k: v}, reversed(["a"] * 180), cast(Any, 10))}, + ], + pipeline=[{"$sort": {".".join(["a"] * 180): 1}}], + expected=[ + {"_id": 2, **reduce(lambda v, k: {k: v}, reversed(["a"] * 180), cast(Any, 10))}, + {"_id": 1, **reduce(lambda v, k: {k: v}, reversed(["a"] * 180), cast(Any, 20))}, + ], + msg="$sort should sort by a field path at the maximum document nesting depth of 180", + ), + StageTestCase( + "nested_200_component_path_accepted", + docs=[ + {"_id": 3, "v": 30}, + {"_id": 1, "v": 10}, + {"_id": 2, "v": 20}, + ], + pipeline=[ + {"$sort": {".".join(["a"] * 200): 1, "_id": 1}}, + ], + expected=[ + {"_id": 1, "v": 10}, + {"_id": 2, "v": 20}, + {"_id": 3, "v": 30}, + ], + msg="$sort should accept a 200-component field path without error", + ), +] + +# Property [Field Name Acceptance]: field names with non-leading dollar signs, +# spaces, numeric names, and Unicode characters are accepted. +SORT_FIELD_NAME_ACCEPTANCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "field_name_non_leading_dollar", + docs=[{"_id": 1, "a$bc": 20}, {"_id": 2, "a$bc": 10}], + pipeline=[{"$sort": {"a$bc": 1}}], + expected=[{"_id": 2, "a$bc": 10}, {"_id": 1, "a$bc": 20}], + msg="$sort should accept non-leading $ in field names", + ), + StageTestCase( + "field_name_space", + docs=[{"_id": 1, "field name": 20}, {"_id": 2, "field name": 10}], + pipeline=[{"$sort": {"field name": 1}}], + expected=[{"_id": 2, "field name": 10}, {"_id": 1, "field name": 20}], + msg="$sort should accept spaces in field names", + ), + StageTestCase( + "field_name_numeric", + docs=[{"_id": 1, "123": 20}, {"_id": 2, "123": 10}], + pipeline=[{"$sort": {"123": 1}}], + expected=[{"_id": 2, "123": 10}, {"_id": 1, "123": 20}], + msg="$sort should accept numeric field names", + ), + StageTestCase( + "field_name_unicode", + docs=[{"_id": 1, "caf\u00e9": 20}, {"_id": 2, "caf\u00e9": 10}], + pipeline=[{"$sort": {"caf\u00e9": 1}}], + expected=[{"_id": 2, "caf\u00e9": 10}, {"_id": 1, "caf\u00e9": 20}], + msg="$sort should accept Unicode characters in field names", + ), +] + +SORT_FIELD_PATH_TESTS = SORT_NESTED_FIELD_TESTS + SORT_FIELD_NAME_ACCEPTANCE_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_FIELD_PATH_TESTS)) +def test_sort_field_paths(collection, test_case: StageTestCase): + """Test $sort field path traversal and name acceptance.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py new file mode 100644 index 00000000..bf6c7d23 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +import pytest +from bson import Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Compound Sort]: when sorting on multiple fields, documents are +# sorted by the first field, then ties are broken by subsequent fields in +# left-to-right order, with each field independently ascending or descending. +SORT_COMPOUND_TESTS: list[StageTestCase] = [ + StageTestCase( + "compound_left_to_right", + docs=[ + {"_id": 1, "a": 2, "b": 30}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 3, "a": 2, "b": 10}, + {"_id": 4, "a": 1, "b": 40}, + ], + pipeline=[{"$sort": {"a": 1, "b": 1}}], + expected=[ + {"_id": 2, "a": 1, "b": 20}, + {"_id": 4, "a": 1, "b": 40}, + {"_id": 3, "a": 2, "b": 10}, + {"_id": 1, "a": 2, "b": 30}, + ], + msg="$sort should sort by first field then by second field left to right", + ), + StageTestCase( + "compound_non_alphabetical_order", + docs=[ + {"_id": 1, "a": 2, "b": 30}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 3, "a": 2, "b": 10}, + {"_id": 4, "a": 1, "b": 40}, + ], + pipeline=[{"$sort": {"b": 1, "a": 1}}], + expected=[ + {"_id": 3, "a": 2, "b": 10}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 1, "a": 2, "b": 30}, + {"_id": 4, "a": 1, "b": 40}, + ], + msg="$sort should use key insertion order, not alphabetical field name order", + ), + StageTestCase( + "compound_mixed_directions", + docs=[ + {"_id": 1, "a": 2, "b": 30}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 3, "a": 2, "b": 10}, + {"_id": 4, "a": 1, "b": 40}, + ], + pipeline=[{"$sort": {"a": 1, "b": -1}}], + expected=[ + {"_id": 4, "a": 1, "b": 40}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 1, "a": 2, "b": 30}, + {"_id": 3, "a": 2, "b": 10}, + ], + msg="$sort should apply ascending on first field and descending on second field", + ), +] + +# Property [Null and Missing Field Behavior]: documents where the sort field +# is null, missing, or an array containing only null sort equivalently. +SORT_NULL_MISSING_TESTS: list[StageTestCase] = [ + StageTestCase( + "null_missing_array_null_equivalent", + docs=[ + {"_id": 5}, + {"_id": 3, "v": [None]}, + {"_id": 1, "v": None}, + {"_id": 6, "v": [None]}, + {"_id": 4, "v": None}, + {"_id": 2}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 1, "v": None}, + {"_id": 2}, + {"_id": 3, "v": [None]}, + {"_id": 4, "v": None}, + {"_id": 5}, + {"_id": 6, "v": [None]}, + ], + msg="$sort should sort null, missing, and [null] equivalently, interleaved by _id", + ), + StageTestCase( + "null_missing_nonexistent_sort_field", + docs=[ + {"_id": 2, "x": "b"}, + {"_id": 1, "x": "a"}, + {"_id": 3, "x": "c"}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "b"}, + {"_id": 3, "x": "c"}, + ], + msg="$sort on a field absent from all documents should treat all as missing", + ), +] + +SORT_KEY_RESOLUTION_TESTS = SORT_COMPOUND_TESTS + SORT_NULL_MISSING_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_KEY_RESOLUTION_TESTS)) +def test_sort_key_resolution(collection, test_case: StageTestCase): + """Test $sort compound keys, null, and missing fields.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) + + +def test_sort_key_resolution_timestamp_zero_replaced(collection): + """Test $sort with Timestamp(0, 0) which is replaced by the server on insert.""" + collection.insert_many( + [ + {"_id": 1, "v": Timestamp(0, 0)}, + {"_id": 3, "v": Timestamp(1, 1)}, + {"_id": 2, "v": Timestamp(100, 1)}, + ] + ) + # Timestamp(0, 0) is replaced by the server with the current time, which + # is larger than Timestamp(200, 1). Sort ascending and verify _id order. + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$sort": {"v": 1}}, {"$project": {"_id": 1}}], + "cursor": {}, + }, + ) + assertResult( + result, + expected=[{"_id": 3}, {"_id": 2}, {"_id": 1}], + msg=( + "$sort should correctly order Timestamp(0, 0) after server" + " replacement with the current time" + ), + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_meta_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_meta_errors.py new file mode 100644 index 00000000..d56cd29c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_meta_errors.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FAILED_TO_PARSE_ERROR, + SORT_ILLEGAL_META_ERROR, + SORT_NON_META_OBJECT_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [$meta Non-Meta Object Error]: non-$meta objects as sort order +# values produce an error. +SORT_META_NON_META_OBJECT_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "meta_non_meta_object", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$foo": 1}}}], + error_code=SORT_NON_META_OBJECT_ERROR, + msg="$sort should reject a non-$meta object as a sort order value", + ), +] + +# Property [$meta Extra Keys Error]: extra keys alongside $meta in a sort +# order object produce an error. +SORT_META_EXTRA_KEYS_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "meta_extra_keys", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": "randVal", "extra": 1}}}], + error_code=FAILED_TO_PARSE_ERROR, + msg="$sort should reject extra keys in a $meta sort specification", + ), +] + +# Property [$meta Invalid Value Error]: invalid $meta values including +# unrecognized strings and non-string types produce an error. +SORT_META_INVALID_VALUE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "meta_invalid_unknown", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": "unknown"}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject $meta value 'unknown'", + ), + StageTestCase( + "meta_invalid_searchhighlights", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": "searchHighlights"}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject $meta value 'searchHighlights'", + ), + StageTestCase( + "meta_invalid_indexkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": "indexKey"}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject $meta value 'indexKey'", + ), + StageTestCase( + "meta_invalid_sortkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": "sortKey"}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject $meta value 'sortKey'", + ), + StageTestCase( + "meta_invalid_recordid", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": "recordId"}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject $meta value 'recordId'", + ), + StageTestCase( + "meta_invalid_geonearpoint", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": "geoNearPoint"}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject $meta value 'geoNearPoint'", + ), + StageTestCase( + "meta_invalid_empty_string", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": ""}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject an empty string as a $meta value", + ), + StageTestCase( + "meta_invalid_type_int", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": 1}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject an int as a $meta value", + ), + StageTestCase( + "meta_invalid_type_null", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": None}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject null as a $meta value", + ), + StageTestCase( + "meta_invalid_type_bool", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": True}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a boolean as a $meta value", + ), + StageTestCase( + "meta_invalid_type_object", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": {"a": 1}}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject an object as a $meta value", + ), + StageTestCase( + "meta_invalid_type_array", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": [1]}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject an array as a $meta value", + ), + StageTestCase( + "meta_invalid_type_float", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": 1.5}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a float as a $meta value", + ), + StageTestCase( + "meta_invalid_type_int64", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": Int64(1)}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject an Int64 as a $meta value", + ), + StageTestCase( + "meta_invalid_type_decimal128", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": Decimal128("1")}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a Decimal128 as a $meta value", + ), + StageTestCase( + "meta_invalid_type_objectid", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": ObjectId()}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject an ObjectId as a $meta value", + ), + StageTestCase( + "meta_invalid_type_datetime", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": datetime(2024, 1, 1, tzinfo=timezone.utc)}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a datetime as a $meta value", + ), + StageTestCase( + "meta_invalid_type_binary", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": Binary(b"\x01")}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a Binary as a $meta value", + ), + StageTestCase( + "meta_invalid_type_regex", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": Regex("a")}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a Regex as a $meta value", + ), + StageTestCase( + "meta_invalid_type_timestamp", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": Timestamp(1, 1)}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a Timestamp as a $meta value", + ), + StageTestCase( + "meta_invalid_type_minkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": MinKey()}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a MinKey as a $meta value", + ), + StageTestCase( + "meta_invalid_type_maxkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": MaxKey()}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a MaxKey as a $meta value", + ), + StageTestCase( + "meta_invalid_type_code", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": Code("f")}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a Code as a $meta value", + ), + StageTestCase( + "meta_invalid_type_codewithscope", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": {"$meta": Code("f", {"x": 1})}}}], + error_code=SORT_ILLEGAL_META_ERROR, + msg="$sort should reject a CodeWithScope as a $meta value", + ), +] + +SORT_META_ERROR_TESTS = ( + SORT_META_NON_META_OBJECT_ERROR_TESTS + + SORT_META_EXTRA_KEYS_ERROR_TESTS + + SORT_META_INVALID_VALUE_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_META_ERROR_TESTS)) +def test_sort_meta_errors(collection, test_case: StageTestCase): + """Test $sort $meta validation errors.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_numeric.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_numeric.py new file mode 100644 index 00000000..21d0016d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_numeric.py @@ -0,0 +1,317 @@ +from __future__ import annotations + +import math + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_INT64_OVERFLOW, + DECIMAL128_INT64_UNDERFLOW, + DECIMAL128_MAX, + DECIMAL128_MAX_NEGATIVE, + DECIMAL128_MIN, + DECIMAL128_MIN_POSITIVE, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_TRAILING_ZERO, + DECIMAL128_ZERO, + DOUBLE_MAX, + DOUBLE_MIN, + DOUBLE_MIN_NEGATIVE_SUBNORMAL, + DOUBLE_MIN_NORMAL, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_PRECISION_LOSS, + DOUBLE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT32_MAX, + INT32_MAX_MINUS_1, + INT32_OVERFLOW, + INT64_MAX, + INT64_MIN, + INT64_ZERO, +) + +# Property [Numeric Ordering]: all numeric BSON types are interleaved by numeric +# value rather than grouped by subtype, with NaN sorting before negative +# infinity, all zero variants sorting equivalently, and Decimal128 preserving +# exact precision where double and int64 cannot. +SORT_NUMERIC_ORDERING_TESTS: list[StageTestCase] = [ + StageTestCase( + "numeric_interleaved_by_value", + docs=[ + {"_id": 1, "v": 10}, + {"_id": 2, "v": Int64(5)}, + {"_id": 3, "v": 7.5}, + {"_id": 4, "v": Decimal128("3")}, + {"_id": 5, "v": Int64(15)}, + {"_id": 6, "v": 1.5}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 6, "v": 1.5}, + {"_id": 4, "v": Decimal128("3")}, + {"_id": 2, "v": Int64(5)}, + {"_id": 3, "v": 7.5}, + {"_id": 1, "v": 10}, + {"_id": 5, "v": Int64(15)}, + ], + msg="$sort should interleave numeric types by value, not group by subtype", + ), + StageTestCase( + "numeric_int32_int64_boundary", + docs=[ + {"_id": 1, "v": INT32_MAX}, + {"_id": 2, "v": Int64(INT32_OVERFLOW)}, + {"_id": 3, "v": INT32_MAX_MINUS_1}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 3, "v": INT32_MAX_MINUS_1}, + {"_id": 1, "v": INT32_MAX}, + {"_id": 2, "v": Int64(INT32_OVERFLOW)}, + ], + msg="$sort should order int32 max before Int64 value just above int32 range", + ), + StageTestCase( + "numeric_int64_extremes", + docs=[ + {"_id": 1, "v": INT64_MAX}, + {"_id": 2, "v": INT64_MIN}, + {"_id": 3, "v": INT64_ZERO}, + {"_id": 4, "v": Int64(-1)}, + {"_id": 5, "v": Int64(1)}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": INT64_MIN}, + {"_id": 4, "v": Int64(-1)}, + {"_id": 3, "v": INT64_ZERO}, + {"_id": 5, "v": Int64(1)}, + {"_id": 1, "v": INT64_MAX}, + ], + msg="$sort should correctly order Int64 extreme boundary values", + ), + StageTestCase( + "numeric_double_extremes", + docs=[ + {"_id": 1, "v": FLOAT_INFINITY}, + {"_id": 2, "v": FLOAT_NEGATIVE_INFINITY}, + {"_id": 3, "v": DOUBLE_MAX}, + {"_id": 4, "v": DOUBLE_MIN}, + {"_id": 5, "v": DOUBLE_MIN_SUBNORMAL}, + {"_id": 6, "v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}, + {"_id": 7, "v": DOUBLE_MIN_NORMAL}, + {"_id": 8, "v": DOUBLE_ZERO}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": FLOAT_NEGATIVE_INFINITY}, + {"_id": 4, "v": DOUBLE_MIN}, + {"_id": 6, "v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}, + {"_id": 8, "v": DOUBLE_ZERO}, + {"_id": 5, "v": DOUBLE_MIN_SUBNORMAL}, + {"_id": 7, "v": DOUBLE_MIN_NORMAL}, + {"_id": 3, "v": DOUBLE_MAX}, + {"_id": 1, "v": FLOAT_INFINITY}, + ], + msg="$sort should correctly order double extreme values", + ), + StageTestCase( + "numeric_decimal128_extremes", + docs=[ + {"_id": 1, "v": DECIMAL128_INFINITY}, + {"_id": 2, "v": DECIMAL128_NEGATIVE_INFINITY}, + {"_id": 3, "v": DECIMAL128_MAX}, + {"_id": 4, "v": DECIMAL128_MIN}, + {"_id": 5, "v": DECIMAL128_MIN_POSITIVE}, + {"_id": 6, "v": DECIMAL128_MAX_NEGATIVE}, + {"_id": 7, "v": DECIMAL128_ZERO}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": DECIMAL128_NEGATIVE_INFINITY}, + {"_id": 4, "v": DECIMAL128_MIN}, + {"_id": 6, "v": DECIMAL128_MAX_NEGATIVE}, + {"_id": 7, "v": DECIMAL128_ZERO}, + {"_id": 5, "v": DECIMAL128_MIN_POSITIVE}, + {"_id": 3, "v": DECIMAL128_MAX}, + {"_id": 1, "v": DECIMAL128_INFINITY}, + ], + msg="$sort should correctly order Decimal128 extreme exponent values", + ), + StageTestCase( + "numeric_nan_and_infinity_cross_type", + docs=[ + {"_id": 2, "v": FLOAT_NAN}, + {"_id": 1, "v": DECIMAL128_NAN}, + {"_id": 4, "v": FLOAT_NEGATIVE_INFINITY}, + {"_id": 3, "v": DECIMAL128_NEGATIVE_INFINITY}, + {"_id": 5, "v": 0}, + {"_id": 7, "v": FLOAT_INFINITY}, + {"_id": 6, "v": DECIMAL128_INFINITY}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 1, "v": DECIMAL128_NAN}, + {"_id": 2, "v": pytest.approx(math.nan, nan_ok=True)}, + {"_id": 3, "v": DECIMAL128_NEGATIVE_INFINITY}, + {"_id": 4, "v": FLOAT_NEGATIVE_INFINITY}, + {"_id": 5, "v": 0}, + {"_id": 6, "v": DECIMAL128_INFINITY}, + {"_id": 7, "v": FLOAT_INFINITY}, + ], + msg=( + "$sort should treat float and Decimal128 NaN as equivalent," + " and float and Decimal128 infinity as equivalent" + ), + ), + StageTestCase( + "numeric_zero_variants_equivalent", + docs=[ + {"_id": 6, "v": 0}, + {"_id": 5, "v": DOUBLE_ZERO}, + {"_id": 4, "v": DOUBLE_NEGATIVE_ZERO}, + {"_id": 3, "v": INT64_ZERO}, + {"_id": 2, "v": DECIMAL128_ZERO}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 7, "v": -1}, + {"_id": 8, "v": 1}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 7, "v": -1}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 2, "v": DECIMAL128_ZERO}, + {"_id": 3, "v": INT64_ZERO}, + {"_id": 4, "v": DOUBLE_NEGATIVE_ZERO}, + {"_id": 5, "v": DOUBLE_ZERO}, + {"_id": 6, "v": 0}, + {"_id": 8, "v": 1}, + ], + msg="$sort should sort all zero variants equivalently, interleaved by _id", + ), + StageTestCase( + "cross_int32_decimal128", + docs=[ + {"_id": 2, "v": 5}, + {"_id": 1, "v": Decimal128("5")}, + {"_id": 3, "v": Decimal128("3")}, + {"_id": 4, "v": 10}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 3, "v": Decimal128("3")}, + {"_id": 1, "v": Decimal128("5")}, + {"_id": 2, "v": 5}, + {"_id": 4, "v": 10}, + ], + msg=( + "$sort should interleave int32 and Decimal128 by numeric value" + " with equal values tiebroken by _id" + ), + ), + StageTestCase( + "cross_int64_decimal128_beyond_range", + docs=[ + {"_id": 1, "v": INT64_MAX}, + {"_id": 2, "v": DECIMAL128_INT64_OVERFLOW}, + {"_id": 3, "v": INT64_MIN}, + {"_id": 4, "v": DECIMAL128_INT64_UNDERFLOW}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 4, "v": DECIMAL128_INT64_UNDERFLOW}, + {"_id": 3, "v": INT64_MIN}, + {"_id": 1, "v": INT64_MAX}, + {"_id": 2, "v": DECIMAL128_INT64_OVERFLOW}, + ], + msg="$sort should place Decimal128 values beyond int64 range correctly at the extremes", + ), + StageTestCase( + "cross_double_decimal128_precision", + docs=[ + {"_id": 1, "v": 0.1}, + {"_id": 2, "v": Decimal128("0.1")}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 2, "v": Decimal128("0.1")}, + {"_id": 1, "v": 0.1}, + ], + msg=( + "$sort should place double 0.1 after Decimal128 0.1" + " because IEEE 754 double is slightly greater" + ), + ), + StageTestCase( + "cross_int64_double_precision_boundary", + docs=[ + {"_id": 1, "v": Int64(DOUBLE_PRECISION_LOSS)}, + {"_id": 2, "v": float(DOUBLE_PRECISION_LOSS)}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 2, "v": float(DOUBLE_PRECISION_LOSS)}, + {"_id": 1, "v": Int64(DOUBLE_PRECISION_LOSS)}, + ], + msg=( + "$sort should place int64 after its double representation" + " at the precision loss boundary" + ), + ), + StageTestCase( + "cross_decimal128_equivalent_representations", + docs=[ + {"_id": 5, "v": Decimal128("1")}, + {"_id": 3, "v": DECIMAL128_TRAILING_ZERO}, + {"_id": 1, "v": Decimal128("10E-1")}, + {"_id": 4, "v": Decimal128("1E0")}, + {"_id": 2, "v": Decimal128("100E-2")}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 1, "v": Decimal128("10E-1")}, + {"_id": 2, "v": Decimal128("100E-2")}, + {"_id": 3, "v": DECIMAL128_TRAILING_ZERO}, + {"_id": 4, "v": Decimal128("1E0")}, + {"_id": 5, "v": Decimal128("1")}, + ], + msg=( + "$sort should sort Decimal128 equivalent representations" + " of the same value equivalently by _id" + ), + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_NUMERIC_ORDERING_TESTS)) +def test_sort_numeric(collection, test_case: StageTestCase): + """Test $sort numeric type ordering.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_order_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_order_errors.py new file mode 100644 index 00000000..807ee33d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_order_errors.py @@ -0,0 +1,350 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + SORT_ORDER_RANGE_ERROR, + SORT_ORDER_TYPE_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_HALF, + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ONE_AND_HALF, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT64_ZERO, +) + +# Property [Sort Order Value Type Errors]: non-numeric, non-object types as +# sort order values produce an error. +SORT_ORDER_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "type_error_string", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": "asc"}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a string as a sort order value", + ), + StageTestCase( + "type_error_bool", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": True}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a boolean as a sort order value", + ), + StageTestCase( + "type_error_null", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": None}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject null as a sort order value", + ), + StageTestCase( + "type_error_array", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": [1]}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject an array as a sort order value", + ), + StageTestCase( + "type_error_objectid", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": ObjectId("000000000000000000000001")}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject an ObjectId as a sort order value", + ), + StageTestCase( + "type_error_datetime", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": datetime(2024, 1, 1, tzinfo=timezone.utc)}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a datetime as a sort order value", + ), + StageTestCase( + "type_error_timestamp", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Timestamp(1, 1)}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a Timestamp as a sort order value", + ), + StageTestCase( + "type_error_binary", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Binary(b"\x01")}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a Binary as a sort order value", + ), + StageTestCase( + "type_error_regex", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Regex("a")}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a Regex as a sort order value", + ), + StageTestCase( + "type_error_code", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Code("f")}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a Code as a sort order value", + ), + StageTestCase( + "type_error_codewithscope", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Code("f", {"x": 1})}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject a CodeWithScope as a sort order value", + ), + StageTestCase( + "type_error_minkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": MinKey()}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject MinKey as a sort order value", + ), + StageTestCase( + "type_error_maxkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": MaxKey()}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should reject MaxKey as a sort order value", + ), +] + +# Property [Sort Order Value Range Errors]: numeric sort order values that +# are not equivalent to 1 or -1 after type-specific rounding produce an error. +SORT_ORDER_RANGE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "range_error_int_zero", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": 0}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject integer 0 as a sort order value", + ), + StageTestCase( + "range_error_int_two", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": 2}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject integer 2 as a sort order value", + ), + StageTestCase( + "range_error_int_neg_two", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": -2}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject integer -2 as a sort order value", + ), + StageTestCase( + "range_error_double_zero", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DOUBLE_ZERO}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject double 0.0 as a sort order value", + ), + StageTestCase( + "range_error_double_neg_zero", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DOUBLE_NEGATIVE_ZERO}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject double -0.0 as a sort order value", + ), + StageTestCase( + "range_error_double_two", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": 2.0}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject double 2.0 as a sort order value", + ), + StageTestCase( + "range_error_double_0_5", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": 0.5}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject double 0.5 as a sort order value", + ), + StageTestCase( + "range_error_float_nan", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": FLOAT_NAN}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject float NaN as a sort order value", + ), + StageTestCase( + "range_error_float_inf", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": FLOAT_INFINITY}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject float infinity as a sort order value", + ), + StageTestCase( + "range_error_float_neg_inf", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": FLOAT_NEGATIVE_INFINITY}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject float negative infinity as a sort order value", + ), + # Decimal128 banker's rounding: abs(1.5) rounds to 2 (rejected). + StageTestCase( + "range_error_decimal128_1_5", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DECIMAL128_ONE_AND_HALF}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128('1.5') which rounds to 2", + ), + # Decimal128 banker's rounding: abs(0.5) rounds to 0 (rejected). + StageTestCase( + "range_error_decimal128_0_5", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DECIMAL128_HALF}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128('0.5') which rounds to 0", + ), + StageTestCase( + "range_error_decimal128_neg_zero", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DECIMAL128_NEGATIVE_ZERO}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128('-0') as a sort order value", + ), + StageTestCase( + "range_error_decimal128_nan", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DECIMAL128_NAN}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128 NaN as a sort order value", + ), + StageTestCase( + "range_error_decimal128_inf", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DECIMAL128_INFINITY}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128 Infinity as a sort order value", + ), + StageTestCase( + "range_error_decimal128_neg_inf", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": DECIMAL128_NEGATIVE_INFINITY}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128 negative Infinity as a sort order value", + ), + StageTestCase( + "range_error_int64_zero", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": INT64_ZERO}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Int64 0 as a sort order value", + ), + StageTestCase( + "range_error_int64_two", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Int64(2)}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Int64 2 as a sort order value", + ), + StageTestCase( + "range_error_int64_neg_two", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Int64(-2)}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Int64 -2 as a sort order value", + ), + StageTestCase( + "range_error_decimal128_two", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Decimal128("2")}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128 2 as a sort order value", + ), + StageTestCase( + "range_error_decimal128_neg_two", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"v": Decimal128("-2")}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should reject Decimal128 -2 as a sort order value", + ), +] + +# Property [Error Precedence]: sort order value errors take precedence over +# field path errors on the same key, and across multiple keys the first key's +# error is reported. +SORT_ERROR_PRECEDENCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "precedence_range_over_field_path", + docs=[{"_id": 1}], + pipeline=[{"$sort": {"$a": 0}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should report range error over field path error on the same key", + ), + StageTestCase( + "precedence_type_over_field_path", + docs=[{"_id": 1}], + pipeline=[{"$sort": {"$a": "asc"}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should report type error over field path error on the same key", + ), + StageTestCase( + "precedence_first_key_range_over_second_type", + docs=[{"_id": 1}], + pipeline=[{"$sort": {"a": 0, "b": "asc"}}], + error_code=SORT_ORDER_RANGE_ERROR, + msg="$sort should report the first key's range error over the second key's type error", + ), + StageTestCase( + "precedence_first_key_type_over_second_range", + docs=[{"_id": 1}], + pipeline=[{"$sort": {"a": "asc", "b": 0}}], + error_code=SORT_ORDER_TYPE_ERROR, + msg="$sort should report the first key's type error over the second key's range error", + ), +] + +SORT_ORDER_ERROR_TESTS = ( + SORT_ORDER_TYPE_ERROR_TESTS + SORT_ORDER_RANGE_ERROR_TESTS + SORT_ERROR_PRECEDENCE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_ORDER_ERROR_TESTS)) +def test_sort_order_errors(collection, test_case: StageTestCase): + """Test $sort order value type and range errors.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_spec_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_spec_errors.py new file mode 100644 index 00000000..6a4cb9fd --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_spec_errors.py @@ -0,0 +1,317 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) +from bson.raw_bson import RawBSONDocument + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.bson_helpers import build_raw_bson_doc +from documentdb_tests.framework.error_codes import ( + FIELD_PATH_DOLLAR_PREFIX_ERROR, + FIELD_PATH_EMPTY_COMPONENT_ERROR, + FIELD_PATH_EMPTY_ERROR, + FIELD_PATH_TRAILING_DOT_ERROR, + OVERFLOW_ERROR, + SORT_COMPOUND_KEY_LIMIT_ERROR, + SORT_DUPLICATE_KEY_ERROR, + SORT_EMPTY_SPEC_ERROR, + SORT_NON_OBJECT_SPEC_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Sort Specification Validation Errors]: a non-document argument or +# an empty sort specification produces an error; non-document rejection applies +# to all BSON types. +SORT_SPEC_VALIDATION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "spec_non_document_string", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": "not_a_doc"}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a string sort specification", + ), + StageTestCase( + "spec_non_document_int", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": 42}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject an integer sort specification", + ), + StageTestCase( + "spec_non_document_float", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": 3.14}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a float sort specification", + ), + StageTestCase( + "spec_non_document_bool", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": True}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a boolean sort specification", + ), + StageTestCase( + "spec_non_document_null", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": None}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a null sort specification", + ), + StageTestCase( + "spec_non_document_array", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": [1, 2]}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject an array sort specification", + ), + StageTestCase( + "spec_non_document_int64", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": Int64(42)}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject an Int64 sort specification", + ), + StageTestCase( + "spec_non_document_decimal128", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": Decimal128("3.14")}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a Decimal128 sort specification", + ), + StageTestCase( + "spec_non_document_objectid", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": ObjectId()}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject an ObjectId sort specification", + ), + StageTestCase( + "spec_non_document_datetime", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a datetime sort specification", + ), + StageTestCase( + "spec_non_document_binary", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": Binary(b"\x01")}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a Binary sort specification", + ), + StageTestCase( + "spec_non_document_regex", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": Regex("^abc")}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a Regex sort specification", + ), + StageTestCase( + "spec_non_document_timestamp", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": Timestamp(1, 1)}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a Timestamp sort specification", + ), + StageTestCase( + "spec_non_document_minkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": MinKey()}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a MinKey sort specification", + ), + StageTestCase( + "spec_non_document_maxkey", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": MaxKey()}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a MaxKey sort specification", + ), + StageTestCase( + "spec_non_document_code", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": Code("function(){}")}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a Code sort specification", + ), + StageTestCase( + "spec_non_document_codewithscope", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": Code("function(){}", {"x": 1})}], + error_code=SORT_NON_OBJECT_SPEC_ERROR, + msg="$sort should reject a CodeWithScope sort specification", + ), + StageTestCase( + "spec_empty", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {}}], + error_code=SORT_EMPTY_SPEC_ERROR, + msg="$sort should reject an empty sort specification", + ), +] + +# Property [Field Path Validation Errors]: invalid field paths in the sort +# specification produce errors for empty strings, leading or trailing dots, +# consecutive dots, leading dollar signs in any path component, and paths +# exceeding 200 components. +SORT_FIELD_PATH_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "field_path_empty_string", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"": 1}}], + error_code=FIELD_PATH_EMPTY_ERROR, + msg="$sort should reject an empty string as a field path", + ), + StageTestCase( + "field_path_trailing_dot", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"a.": 1}}], + error_code=FIELD_PATH_TRAILING_DOT_ERROR, + msg="$sort should reject a field path with a trailing dot", + ), + StageTestCase( + "field_path_leading_dot", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {".a": 1}}], + error_code=FIELD_PATH_EMPTY_COMPONENT_ERROR, + msg="$sort should reject a field path with a leading dot", + ), + StageTestCase( + "field_path_consecutive_dots", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"a..b": 1}}], + error_code=FIELD_PATH_EMPTY_COMPONENT_ERROR, + msg="$sort should reject a field path with consecutive dots", + ), + StageTestCase( + "field_path_leading_dollar", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"$a": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$sort should reject a field path with a leading $ in the first component", + ), + StageTestCase( + "field_path_dollar_natural", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"$natural": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$sort should reject $natural as a field path", + ), + StageTestCase( + "field_path_bare_dollar", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"$": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$sort should reject a bare $ as a field path", + ), + StageTestCase( + "field_path_bare_double_dollar", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"$$": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$sort should reject a bare $$ as a field path", + ), + StageTestCase( + "field_path_nested_leading_dollar", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {"a.$b": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$sort should reject a leading $ in a nested path component", + ), + StageTestCase( + "field_path_depth_exceeds_200", + docs=[{"_id": 1, "v": 1}], + pipeline=[{"$sort": {".".join(["a"] * 201): 1}}], + error_code=OVERFLOW_ERROR, + msg="$sort should reject a field path exceeding 200 components", + ), +] + +# Property [Compound Sort Key Limit Errors]: more than 32 unique numeric sort +# keys produce an error. +SORT_COMPOUND_KEY_LIMIT_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "compound_key_limit_33", + docs=[{"_id": 1}], + pipeline=[{"$sort": {f"f{i}": 1 for i in range(33)}}], + error_code=SORT_COMPOUND_KEY_LIMIT_ERROR, + msg="$sort should reject more than 32 unique numeric sort keys", + ), +] + +SORT_SPEC_ERROR_TESTS = ( + SORT_SPEC_VALIDATION_ERROR_TESTS + + SORT_FIELD_PATH_ERROR_TESTS + + SORT_COMPOUND_KEY_LIMIT_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_SPEC_ERROR_TESTS)) +def test_sort_spec_errors(collection, test_case: StageTestCase): + """Test $sort specification and field path validation errors.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) + + +def _build_raw_sort_stage(fields: list[tuple[str, int]]) -> RawBSONDocument: + """Build a raw BSON $sort stage with the given fields, preserving duplicates.""" + inner = build_raw_bson_doc(fields) + stage_elements = b"\x03$sort\x00" + inner.raw + doc_len = 4 + len(stage_elements) + 1 + return RawBSONDocument(doc_len.to_bytes(4, "little") + stage_elements + b"\x00") + + +def test_sort_spec_errors_duplicate_fields(collection): + """Test $sort rejects duplicate field names in the sort specification.""" + collection.insert_many( + [ + {"_id": 1, "v": 30}, + {"_id": 2, "v": 10}, + {"_id": 3, "v": 20}, + ] + ) + sort_stage = _build_raw_sort_stage([("v", 1), ("v", -1)]) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [sort_stage], + "cursor": {}, + }, + ) + assertResult( + result, + error_code=SORT_DUPLICATE_KEY_ERROR, + msg="$sort should reject duplicate field names in the sort specification", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_type_comparison.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_type_comparison.py new file mode 100644 index 00000000..1811ef89 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_type_comparison.py @@ -0,0 +1,281 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [BSON Type Comparison Order]: documents with values of different +# BSON types sort according to the BSON comparison order. +SORT_BSON_TYPE_ORDER_TESTS: list[StageTestCase] = [ + StageTestCase( + "bson_order_all_types_asc", + docs=[ + {"_id": 1, "v": MaxKey()}, + {"_id": 2, "v": Code("f", {"x": 1})}, + {"_id": 3, "v": Code("f")}, + {"_id": 4, "v": Regex("a")}, + {"_id": 5, "v": Timestamp(1, 1)}, + {"_id": 6, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 7, "v": True}, + {"_id": 8, "v": ObjectId("000000000000000000000001")}, + {"_id": 9, "v": Binary(b"\x01")}, + {"_id": 10, "v": {"a": 1}}, + {"_id": 11, "v": "hello"}, + {"_id": 12, "v": 42}, + {"_id": 13, "v": None}, + {"_id": 14}, + {"_id": 15, "v": MinKey()}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 15, "v": MinKey()}, + {"_id": 13, "v": None}, + {"_id": 14}, + {"_id": 12, "v": 42}, + {"_id": 11, "v": "hello"}, + {"_id": 10, "v": {"a": 1}}, + {"_id": 9, "v": b"\x01"}, + {"_id": 8, "v": ObjectId("000000000000000000000001")}, + {"_id": 7, "v": True}, + {"_id": 6, "v": datetime(2024, 1, 1)}, + {"_id": 5, "v": Timestamp(1, 1)}, + {"_id": 4, "v": Regex("a")}, + {"_id": 3, "v": Code("f")}, + {"_id": 2, "v": Code("f", {"x": 1})}, + {"_id": 1, "v": MaxKey()}, + ], + msg="$sort should order BSON types according to the BSON comparison order", + ), +] + +# Property [Within-Type Ordering]: values of the same BSON type sort by +# their type-specific comparison rules rather than by BSON subtype grouping. +SORT_WITHIN_TYPE_TESTS: list[StageTestCase] = [ + # Strings sort in binary/codepoint order with no Unicode normalization, + # no null-byte truncation, and no special handling of invisible characters. + StageTestCase( + "within_string_codepoint_order", + docs=[ + {"_id": 5, "v": "\u00e9"}, + {"_id": 4, "v": "e\u0301"}, + {"_id": 3, "v": "ab\x00c"}, + {"_id": 2, "v": "abc"}, + {"_id": 1, "v": "\u200b"}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 3, "v": "ab\x00c"}, + {"_id": 2, "v": "abc"}, + {"_id": 4, "v": "e\u0301"}, + {"_id": 5, "v": "\u00e9"}, + {"_id": 1, "v": "\u200b"}, + ], + msg="$sort should sort strings by binary codepoint order without normalization", + ), + StageTestCase( + "within_string_digit_strings_as_strings", + docs=[ + {"_id": 1, "v": "0"}, + {"_id": 2, "v": 99_999}, + {"_id": 3, "v": "12345"}, + ], + pipeline=[{"$sort": {"v": 1, "_id": 1}}], + expected=[ + {"_id": 2, "v": 99_999}, + {"_id": 1, "v": "0"}, + {"_id": 3, "v": "12345"}, + ], + msg="$sort should sort digit-only strings as strings after numbers", + ), + StageTestCase( + "within_objectid_byte_comparison", + docs=[ + {"_id": 1, "v": ObjectId("ffffffffffffffffffff0001")}, + {"_id": 2, "v": ObjectId("000000000000000000000001")}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": ObjectId("000000000000000000000001")}, + {"_id": 1, "v": ObjectId("ffffffffffffffffffff0001")}, + ], + msg="$sort should sort ObjectId values by lexicographic byte comparison", + ), + StageTestCase( + "within_regex_pattern_then_flags", + docs=[ + {"_id": 1, "v": Regex("abc", "i")}, + {"_id": 2, "v": Regex("abc", "")}, + {"_id": 3, "v": Regex("abd", "")}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": Regex("abc", "")}, + {"_id": 1, "v": Regex("abc", "i")}, + {"_id": 3, "v": Regex("abd", "")}, + ], + msg="$sort should sort Regex by pattern first then by flags", + ), + StageTestCase( + "within_embedded_doc_order", + docs=[ + {"_id": 1, "v": {"a": 2}}, + {"_id": 2, "v": {}}, + {"_id": 3, "v": {"b": 1}}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": {}}, + {"_id": 1, "v": {"a": 2}}, + {"_id": 3, "v": {"b": 1}}, + ], + msg=( + "$sort should sort embedded documents by key-value pairs in order" + " with empty document first" + ), + ), + StageTestCase( + "within_code_lexicographic", + docs=[ + {"_id": 1, "v": Code("function b() {}")}, + {"_id": 2, "v": Code("function a() {}")}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": Code("function a() {}")}, + {"_id": 1, "v": Code("function b() {}")}, + ], + msg="$sort should sort Code values by code string lexicographically", + ), + StageTestCase( + "within_codewithscope_code_then_scope", + docs=[ + {"_id": 1, "v": Code("f", {"x": 2})}, + {"_id": 2, "v": Code("f", {"x": 1})}, + {"_id": 3, "v": Code("g", {"x": 1})}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": Code("f", {"x": 1})}, + {"_id": 1, "v": Code("f", {"x": 2})}, + {"_id": 3, "v": Code("g", {"x": 1})}, + ], + msg="$sort should sort CodeWithScope by code string first then by scope document", + ), + StageTestCase( + "within_binary_same_subtype_length_then_bytes", + docs=[ + {"_id": 1, "v": Binary(b"\xff", 0)}, + {"_id": 2, "v": Binary(b"\x00", 0)}, + {"_id": 3, "v": Binary(b"\x00\x01", 0)}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": b"\x00"}, + {"_id": 1, "v": b"\xff"}, + {"_id": 3, "v": b"\x00\x01"}, + ], + msg=( + "$sort should sort Binary within the same subtype by data length" + " first then by data bytes" + ), + ), + # Binary subtype ordering: 0 < 1 < 3 < 4 < 5 < 128 < 2. + StageTestCase( + "within_binary_subtype_order", + docs=[ + {"_id": 1, "v": Binary(b"\x01" * 16, 0)}, + {"_id": 2, "v": Binary(b"\x01" * 16, 1)}, + {"_id": 3, "v": Binary(b"\x01" * 16, 2)}, + {"_id": 4, "v": Binary(b"\x01" * 16, 3)}, + {"_id": 5, "v": Binary(b"\x01" * 16, 4)}, + {"_id": 6, "v": Binary(b"\x01" * 16, 5)}, + {"_id": 7, "v": Binary(b"\x01" * 16, 128)}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 1, "v": b"\x01" * 16}, + {"_id": 2, "v": Binary(b"\x01" * 16, 1)}, + {"_id": 4, "v": Binary(b"\x01" * 16, 3)}, + {"_id": 5, "v": Binary(b"\x01" * 16, 4)}, + {"_id": 6, "v": Binary(b"\x01" * 16, 5)}, + {"_id": 7, "v": Binary(b"\x01" * 16, 128)}, + {"_id": 3, "v": Binary(b"\x01" * 16, 2)}, + ], + msg="$sort should order Binary subtypes as 0 < 1 < 3 < 4 < 5 < 128 < 2", + ), + StageTestCase( + "within_datetime_chronological_pre_epoch", + docs=[ + {"_id": 1, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 2, "v": datetime(1969, 12, 31, tzinfo=timezone.utc)}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": datetime(1969, 12, 31)}, + {"_id": 1, "v": datetime(2024, 1, 1)}, + ], + msg=( + "$sort should sort datetime values chronologically" + " with pre-epoch dates sorting correctly" + ), + ), + StageTestCase( + "within_datetime_millisecond_precision", + docs=[ + {"_id": 1, "v": datetime(2024, 1, 1, 0, 0, 0, 2000, tzinfo=timezone.utc)}, + {"_id": 2, "v": datetime(2024, 1, 1, 0, 0, 0, 1000, tzinfo=timezone.utc)}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": datetime(2024, 1, 1, 0, 0, 0, 1000)}, + {"_id": 1, "v": datetime(2024, 1, 1, 0, 0, 0, 2000)}, + ], + msg="$sort should respect millisecond precision in datetime ordering", + ), + StageTestCase( + "within_timestamp_time_then_increment", + docs=[ + {"_id": 1, "v": Timestamp(100, 2)}, + {"_id": 2, "v": Timestamp(100, 1)}, + {"_id": 3, "v": Timestamp(200, 1)}, + ], + pipeline=[{"$sort": {"v": 1}}], + expected=[ + {"_id": 2, "v": Timestamp(100, 1)}, + {"_id": 1, "v": Timestamp(100, 2)}, + {"_id": 3, "v": Timestamp(200, 1)}, + ], + msg="$sort should sort Timestamp by time component first then by increment", + ), +] + +SORT_TYPE_COMPARISON_TESTS = SORT_BSON_TYPE_ORDER_TESTS + SORT_WITHIN_TYPE_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SORT_TYPE_COMPARISON_TESTS)) +def test_sort_type_comparison(collection, test_case: StageTestCase): + """Test $sort BSON type comparison ordering.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_sort.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_sort.py new file mode 100644 index 00000000..4f6492cc --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_sort.py @@ -0,0 +1,221 @@ +"""Tests for $sort composing with other stages at different pipeline positions.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Position]: $sort orders documents correctly regardless of +# its position in the pipeline and composes with preceding stages that reshape +# documents. +SORT_PIPELINE_POSITION_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_first_stage", + docs=[ + {"_id": 1, "v": 30}, + {"_id": 2, "v": 10}, + {"_id": 3, "v": 20}, + ], + pipeline=[{"$sort": {"v": 1}}, {"$project": {"_id": 1}}], + expected=[{"_id": 2}, {"_id": 3}, {"_id": 1}], + msg="$sort should work as the first stage of a pipeline", + ), + StageTestCase( + "pipeline_middle_stage", + docs=[ + {"_id": 1, "v": 30, "x": "a"}, + {"_id": 2, "v": 10, "x": "b"}, + {"_id": 3, "v": 20, "x": "c"}, + ], + pipeline=[ + {"$match": {"v": {"$gte": 10}}}, + {"$sort": {"v": 1}}, + {"$project": {"_id": 1}}, + ], + expected=[{"_id": 2}, {"_id": 3}, {"_id": 1}], + msg="$sort should work as a middle stage of a pipeline", + ), + StageTestCase( + "pipeline_last_stage", + docs=[ + {"_id": 1, "v": 30}, + {"_id": 2, "v": 10}, + {"_id": 3, "v": 20}, + ], + pipeline=[ + {"$project": {"v": 1}}, + {"$sort": {"v": 1}}, + ], + expected=[ + {"_id": 2, "v": 10}, + {"_id": 3, "v": 20}, + {"_id": 1, "v": 30}, + ], + msg="$sort should work as the last stage of a pipeline", + ), + StageTestCase( + "pipeline_consecutive_sort", + docs=[ + {"_id": 1, "a": 2, "b": 30}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 3, "a": 3, "b": 10}, + ], + pipeline=[ + {"$sort": {"a": 1}}, + {"$sort": {"b": 1}}, + {"$project": {"_id": 1}}, + ], + expected=[{"_id": 3}, {"_id": 2}, {"_id": 1}], + msg="$sort consecutive stages should apply independently with last sort winning", + ), + StageTestCase( + "pipeline_after_project_drops_sort_field", + docs=[ + {"_id": 1, "v": 30, "x": "a"}, + {"_id": 2, "v": 10, "x": "b"}, + {"_id": 3, "v": 20, "x": "c"}, + ], + pipeline=[ + {"$project": {"x": 1}}, + {"$sort": {"v": 1, "_id": 1}}, + {"$project": {"_id": 1}}, + ], + expected=[{"_id": 1}, {"_id": 2}, {"_id": 3}], + msg="$sort on a field dropped by a preceding $project should treat all values as missing", + ), + StageTestCase( + "pipeline_after_project_renames_field", + docs=[ + {"_id": 1, "v": 30}, + {"_id": 2, "v": 10}, + {"_id": 3, "v": 20}, + ], + pipeline=[ + {"$project": {"w": "$v"}}, + {"$sort": {"w": 1}}, + {"$project": {"_id": 1}}, + ], + expected=[{"_id": 2}, {"_id": 3}, {"_id": 1}], + msg="$sort should sort on a field renamed by a preceding $project", + ), + StageTestCase( + "pipeline_after_project_computed_field", + docs=[ + {"_id": 1, "v": 10}, + {"_id": 2, "v": 30}, + {"_id": 3, "v": 20}, + ], + pipeline=[ + {"$project": {"neg": {"$multiply": ["$v", -1]}}}, + {"$sort": {"neg": 1}}, + {"$project": {"_id": 1}}, + ], + expected=[{"_id": 2}, {"_id": 3}, {"_id": 1}], + msg="$sort should sort on a field computed by a preceding $project", + ), + StageTestCase( + "pipeline_after_group", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "b", "val": 3}, + {"_id": 3, "cat": "a", "val": 7}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$sort": {"total": 1}}, + {"$project": {"_id": 1}}, + ], + expected=[{"_id": "b"}, {"_id": "a"}], + msg="$sort should sort on fields produced by a preceding $group", + ), + StageTestCase( + "pipeline_after_replaceroot", + docs=[ + {"_id": 1, "inner": {"x": 30}}, + {"_id": 2, "inner": {"x": 10}}, + {"_id": 3, "inner": {"x": 20}}, + ], + pipeline=[ + {"$replaceRoot": {"newRoot": "$inner"}}, + {"$sort": {"x": 1}}, + ], + expected=[{"x": 10}, {"x": 20}, {"x": 30}], + msg="$sort should sort on the document shape produced by $replaceRoot", + ), +] + +# Property [$meta Sort with Prerequisite Stages]: $sort by {$meta: 'textScore'} +# and {$meta: 'geoNearDistance'} work when preceded by the required stage. +SORT_META_PREREQUISITE_TESTS: list[StageTestCase] = [ + StageTestCase( + "text_score_sort_desc", + docs=[ + {"_id": 1, "content": "apple"}, + {"_id": 2, "content": "apple apple apple"}, + {"_id": 3, "content": "banana"}, + ], + setup=lambda collection: collection.create_index([("content", "text")]), + pipeline=[ + {"$match": {"$text": {"$search": "apple"}}}, + {"$sort": {"score": {"$meta": "textScore"}}}, + ], + expected=[ + {"_id": 2, "content": "apple apple apple"}, + {"_id": 1, "content": "apple"}, + ], + msg="$sort by textScore should order documents by text search relevance", + ), + StageTestCase( + "geo_near_distance_sort", + docs=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [10, 10]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [5, 5]}}, + ], + setup=lambda collection: collection.create_index([("loc", "2dsphere")]), + pipeline=[ + { + "$geoNear": { + "near": {"type": "Point", "coordinates": [0, 0]}, + "distanceField": "dist", + } + }, + {"$sort": {"d": {"$meta": "geoNearDistance"}}}, + {"$project": {"_id": 1}}, + ], + expected=[{"_id": 2}, {"_id": 3}, {"_id": 1}], + msg="$sort by geoNearDistance should order documents by distance", + ), +] + +SORT_STAGE_POSITION_TESTS_ALL = SORT_PIPELINE_POSITION_TESTS + SORT_META_PREREQUISITE_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SORT_STAGE_POSITION_TESTS_ALL)) +def test_stage_position_sort_cases(collection, test_case: StageTestCase): + """Test $sort composing with other stages at different pipeline positions.""" + if test_case.setup: + test_case.setup(collection) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py new file mode 100644 index 00000000..1476b046 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/utils/stage_test_case.py @@ -0,0 +1,20 @@ +""" +Shared test case for pipeline stage tests. +""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class StageTestCase(BaseTestCase): + """Test case for pipeline stage tests.""" + + docs: list[dict[str, Any]] | None = None + pipeline: list[dict[str, Any]] | None = None + setup: Callable | None = None diff --git a/documentdb_tests/framework/bson_helpers.py b/documentdb_tests/framework/bson_helpers.py new file mode 100644 index 00000000..ef2bcdba --- /dev/null +++ b/documentdb_tests/framework/bson_helpers.py @@ -0,0 +1,35 @@ +"""Utilities for constructing raw BSON documents with features not +expressible through Python mappings (e.g. duplicate keys). +""" + +from __future__ import annotations + +from typing import Any + +import bson +from bson.raw_bson import RawBSONDocument + + +def build_raw_bson_doc(fields: list[tuple[str, Any]]) -> RawBSONDocument: + """Build a raw BSON document from a list of (key, value) pairs. + + Unlike Python dicts, this preserves duplicate keys, allowing tests to verify + server behavior when the same field name appears more than once. Values can + be any type that ``bson.encode`` accepts. + + Args: + fields: Ordered (key, value) pairs. + + Returns: + A RawBSONDocument containing the encoded fields. + """ + elements = b"" + for name, value in fields: + # Encode a single-key document and strip the wrapper to get + # the raw element (type byte + key + value). + encoded = bson.encode({name: value}) + # BSON layout: 4-byte length + elements + trailing \x00 + # Strip the 4-byte length prefix and the trailing \x00. + elements += encoded[4:-1] + doc_len = 4 + len(elements) + 1 + return RawBSONDocument(doc_len.to_bytes(4, "little") + elements + b"\x00") diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index c285b5aa..09c7cf85 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -4,11 +4,27 @@ """ DIVIDE_BY_ZERO_ERROR = 2 +FAILED_TO_PARSE_ERROR = 9 TYPE_MISMATCH_ERROR = 14 +OVERFLOW_ERROR = 15 +UNRECOGNIZED_EXPRESSION_ERROR = 168 +SORT_COMPOUND_KEY_LIMIT_ERROR = 13103 +MATCH_FILTER_NOT_OBJECT_ERROR = 15959 +SORT_NON_OBJECT_SPEC_ERROR = 15973 +SORT_ORDER_TYPE_ERROR = 15974 +SORT_ORDER_RANGE_ERROR = 15975 +SORT_EMPTY_SPEC_ERROR = 15976 +FIELD_PATH_EMPTY_COMPONENT_ERROR = 15998 EXPRESSION_TYPE_MISMATCH_ERROR = 16020 +FIELD_PATH_DOLLAR_PREFIX_ERROR = 16410 MODULO_ZERO_REMAINDER_ERROR = 16610 MODULO_NON_NUMERIC_ERROR = 16611 MORE_THAN_ONE_DATE_ERROR = 16612 +INVALID_DOLLAR_FIELD_PATH = 16872 +SETUNION_TYPE_ERROR = 17043 +UNSUPPORTED_META_FIELD_ERROR = 17308 +SORT_NON_META_OBJECT_ERROR = 17312 +MATCH_TEXT_NOT_FIRST_STAGE_ERROR = 17313 ABS_OVERFLOW_ERROR = 28680 LOG_NON_NUMERIC_VALUE_ERROR = 28756 LOG_NON_NUMERIC_BASE_ERROR = 28757 @@ -20,4 +36,10 @@ POW_BASE_ZERO_EXP_NEGATIVE_ERROR = 28764 NON_NUMERIC_TYPE_MISMATCH_ERROR = 28765 LN_NON_POSITIVE_INPUT_ERROR = 28766 +SORT_ILLEGAL_META_ERROR = 31138 +QUERY_METADATA_NOT_AVAILABLE_ERROR = 40218 +FIELD_PATH_EMPTY_ERROR = 40352 +FIELD_PATH_TRAILING_DOT_ERROR = 40353 +NEAR_NOT_ALLOWED_ERROR = 5626500 MODULO_DECIMAL128_ZERO_REMAINDER_ERROR = 5733415 +SORT_DUPLICATE_KEY_ERROR = 7472500 diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py index 9e3088ed..3613a7db 100644 --- a/documentdb_tests/framework/test_constants.py +++ b/documentdb_tests/framework/test_constants.py @@ -17,10 +17,13 @@ INT64_MAX_MINUS_1 = Int64(9223372036854775806) # Double boundary values +DOUBLE_MIN = -1.7976931348623157e308 DOUBLE_MIN_NEGATIVE_SUBNORMAL = -5e-324 DOUBLE_NEGATIVE_ZERO = -0.0 DOUBLE_ZERO = 0.0 DOUBLE_MIN_SUBNORMAL = 5e-324 +DOUBLE_MIN_NORMAL = 2.2250738585072014e-308 +DOUBLE_MAX = 1.7976931348623157e308 DOUBLE_NEAR_MAX = 1e308 DOUBLE_NEAR_MIN = 1e-308 DOUBLE_MAX_SAFE_INTEGER = 9007199254740992 @@ -43,10 +46,14 @@ # Decimal128 boundary values DECIMAL128_NEGATIVE_INFINITY = Decimal128("-Infinity") DECIMAL128_MIN = Decimal128("-9.999999999999999999999999999999999E+6144") +DECIMAL128_MAX_NEGATIVE = Decimal128("-1E-6176") DECIMAL128_ZERO = Decimal128("0") DECIMAL128_NEGATIVE_ZERO = Decimal128("-0") +DECIMAL128_MIN_POSITIVE = Decimal128("1E-6176") DECIMAL128_INFINITY = Decimal128("Infinity") DECIMAL128_MAX = Decimal128("9.999999999999999999999999999999999E+6144") +DECIMAL128_INT64_OVERFLOW = Decimal128("9223372036854775808") +DECIMAL128_INT64_UNDERFLOW = Decimal128("-9223372036854775809") DECIMAL128_LARGE_EXPONENT = Decimal128("1E+6144") DECIMAL128_SMALL_EXPONENT = Decimal128("1E-6143") DECIMAL128_TRAILING_ZERO = Decimal128("1.0") From 396819c8e038c8d234b1634e8d688a5518b06176 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 15 Apr 2026 16:17:01 -0700 Subject: [PATCH 2/6] Fix misleading msg on $meta acceptance tests that expect errors Signed-off-by: Daniel Frankcom --- .../operator/stages/sort/test_sort_direction_values.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py index 450c4063..23ed0633 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_direction_values.py @@ -110,14 +110,17 @@ docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], pipeline=[{"$sort": {"v": 1, "t": {"$meta": "textScore"}}}], error_code=QUERY_METADATA_NOT_AVAILABLE_ERROR, - msg="$sort should accept {$meta: 'textScore'} as a sort order value", + msg="$sort should accept {$meta: 'textScore'} but fail without a preceding $text stage", ), StageTestCase( "accept_meta_geoneardistance", docs=[{"_id": 1, "v": 20}, {"_id": 2, "v": 10}], pipeline=[{"$sort": {"v": 1, "d": {"$meta": "geoNearDistance"}}}], error_code=QUERY_METADATA_NOT_AVAILABLE_ERROR, - msg="$sort should accept {$meta: 'geoNearDistance'} as a sort order value", + msg=( + "$sort should accept {$meta: 'geoNearDistance'}" + " but fail without a preceding $geoNear stage" + ), ), # $meta keys do not count toward the 32-key compound sort limit. StageTestCase( From c81f5f5ae9d3b5d4435365261b5c158dd0d09746 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 15 Apr 2026 16:17:18 -0700 Subject: [PATCH 3/6] Add descending variants for dot notation and non-traversable field path tests Signed-off-by: Daniel Frankcom --- .../stages/sort/test_sort_field_paths.py | 43 +++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py index f85b7240..22c5cad8 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_field_paths.py @@ -16,7 +16,7 @@ # documents and arrays of objects for sort key extraction. SORT_NESTED_FIELD_TESTS: list[StageTestCase] = [ StageTestCase( - "nested_dot_notation", + "nested_dot_notation_asc", docs=[ {"_id": 1, "a": {"b": 30}}, {"_id": 2, "a": {"b": 10}}, @@ -26,7 +26,20 @@ {"_id": 2, "a": {"b": 10}}, {"_id": 1, "a": {"b": 30}}, ], - msg="$sort should traverse embedded documents via dot notation", + msg="$sort should traverse embedded documents via dot notation ascending", + ), + StageTestCase( + "nested_dot_notation_desc", + docs=[ + {"_id": 1, "a": {"b": 30}}, + {"_id": 2, "a": {"b": 10}}, + ], + pipeline=[{"$sort": {"a.b": -1}}], + expected=[ + {"_id": 1, "a": {"b": 30}}, + {"_id": 2, "a": {"b": 10}}, + ], + msg="$sort should traverse embedded documents via dot notation descending", ), StageTestCase( "nested_array_of_objects_asc", @@ -65,7 +78,7 @@ ), ), StageTestCase( - "nested_non_traversable_intermediate_treated_as_missing", + "nested_non_traversable_intermediate_treated_as_missing_asc", docs=[ {"_id": 1, "a": 42}, {"_id": 2, "a": None}, @@ -81,7 +94,29 @@ {"_id": 5, "a": {"b": 5}}, {"_id": 3, "a": {"b": 10}}, ], - msg="$sort should treat scalar and null at an intermediate path level as missing", + msg="$sort ascending should treat scalar and null at an intermediate path level as missing", + ), + StageTestCase( + "nested_non_traversable_intermediate_treated_as_missing_desc", + docs=[ + {"_id": 1, "a": 42}, + {"_id": 2, "a": None}, + {"_id": 3, "a": {"b": 10}}, + {"_id": 4}, + {"_id": 5, "a": {"b": 5}}, + ], + pipeline=[{"$sort": {"a.b": -1, "_id": 1}}], + expected=[ + {"_id": 3, "a": {"b": 10}}, + {"_id": 5, "a": {"b": 5}}, + {"_id": 1, "a": 42}, + {"_id": 2, "a": None}, + {"_id": 4}, + ], + msg=( + "$sort descending should treat scalar and null" + " at an intermediate path level as missing" + ), ), # The server limits document nesting to 180 levels, so the sort path # can only be verified up to that depth. The 200-component path test From a0b9804ca947e3a3d3ea63ee446ab49319d4ff5c Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 15 Apr 2026 16:17:32 -0700 Subject: [PATCH 4/6] Add desc/asc and desc/desc compound sort direction test cases Signed-off-by: Daniel Frankcom --- .../stages/sort/test_sort_key_resolution.py | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py index bf6c7d23..4ec57891 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/sort/test_sort_key_resolution.py @@ -49,7 +49,7 @@ msg="$sort should use key insertion order, not alphabetical field name order", ), StageTestCase( - "compound_mixed_directions", + "compound_asc_desc", docs=[ {"_id": 1, "a": 2, "b": 30}, {"_id": 2, "a": 1, "b": 20}, @@ -65,6 +65,40 @@ ], msg="$sort should apply ascending on first field and descending on second field", ), + StageTestCase( + "compound_desc_asc", + docs=[ + {"_id": 1, "a": 2, "b": 30}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 3, "a": 2, "b": 10}, + {"_id": 4, "a": 1, "b": 40}, + ], + pipeline=[{"$sort": {"a": -1, "b": 1}}], + expected=[ + {"_id": 3, "a": 2, "b": 10}, + {"_id": 1, "a": 2, "b": 30}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 4, "a": 1, "b": 40}, + ], + msg="$sort should apply descending on first field and ascending on second field", + ), + StageTestCase( + "compound_desc_desc", + docs=[ + {"_id": 1, "a": 2, "b": 30}, + {"_id": 2, "a": 1, "b": 20}, + {"_id": 3, "a": 2, "b": 10}, + {"_id": 4, "a": 1, "b": 40}, + ], + pipeline=[{"$sort": {"a": -1, "b": -1}}], + expected=[ + {"_id": 1, "a": 2, "b": 30}, + {"_id": 3, "a": 2, "b": 10}, + {"_id": 4, "a": 1, "b": 40}, + {"_id": 2, "a": 1, "b": 20}, + ], + msg="$sort should apply descending on both first and second fields", + ), ] # Property [Null and Missing Field Behavior]: documents where the sort field From 75f9c58ff5c3e74e4de350160ad93393740607d2 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 16 Apr 2026 09:52:38 -0700 Subject: [PATCH 5/6] Add $sort stage integration tests Signed-off-by: Daniel Frankcom --- .../stages/test_stages_combination_sort.py | 261 ++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py new file mode 100644 index 00000000..833575d5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py @@ -0,0 +1,261 @@ +"""Tests for interesting $sort combinations with other pipeline stages.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Sort → Project → Group Field Visibility]: when $sort feeds into +# $project then $group, the projection controls which fields the downstream +# $group accumulator can see. Some server implementations optimize this +# pipeline shape by merging the sort into the group, which risks bypassing +# the intermediate projection and exposing fields that should have been +# removed. Using $first makes the $sort functionally necessary (it +# determines which document's value the accumulator picks) while still +# exercising the optimizer path. +SORT_PROJECT_GROUP_TESTS: list[StageTestCase] = [ + StageTestCase( + "inclusion_project_preserves_field_for_group", + docs=[ + {"_id": 1, "cat": "a", "val": 10}, + {"_id": 2, "cat": "a", "val": 20}, + {"_id": 3, "cat": "b", "val": 30}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$project": {"cat": 1, "val": 1}}, + {"$group": {"_id": "$cat", "top": {"$first": "$val"}}}, + ], + expected=[ + {"_id": "a", "top": 20}, + {"_id": "b", "top": 30}, + ], + msg="Inclusion projection that keeps a field should let $group $first see its value", + ), + StageTestCase( + "inclusion_project_excludes_field_from_group", + docs=[ + {"_id": 1, "cat": "a", "val": 10}, + {"_id": 2, "cat": "a", "val": 20}, + {"_id": 3, "cat": "b", "val": 30}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$project": {"cat": 1}}, + {"$group": {"_id": "$cat", "top": {"$first": "$val"}}}, + ], + expected=[ + {"_id": "a", "top": None}, + {"_id": "b", "top": None}, + ], + msg="Inclusion projection that omits a field should make $group $first receive missing", + ), + StageTestCase( + "exclusion_project_removes_unrelated_field", + docs=[ + {"_id": 1, "cat": "a", "val": 10, "extra": "x"}, + {"_id": 2, "cat": "a", "val": 20, "extra": "y"}, + {"_id": 3, "cat": "b", "val": 30, "extra": "z"}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$project": {"extra": 0}}, + {"$group": {"_id": "$cat", "top": {"$first": "$val"}}}, + ], + expected=[ + {"_id": "a", "top": 20}, + {"_id": "b", "top": 30}, + ], + msg="Exclusion projection removing an unrelated field should not affect $group $first", + ), + StageTestCase( + "exclusion_project_removes_needed_field", + docs=[ + {"_id": 1, "cat": "a", "val": 10}, + {"_id": 2, "cat": "a", "val": 20}, + {"_id": 3, "cat": "b", "val": 30}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$project": {"val": 0}}, + {"$group": {"_id": "$cat", "top": {"$first": "$val"}}}, + ], + expected=[ + {"_id": "a", "top": None}, + {"_id": "b", "top": None}, + ], + msg=( + "Exclusion projection removing the accumulated field should" + " make $group $first receive missing" + ), + ), +] + +# Property [Sort → Group Order-Dependent Accumulators]: $last in $group +# respects the document order established by a preceding $sort. +SORT_GROUP_ORDER_TESTS: list[StageTestCase] = [ + StageTestCase( + "sort_desc_group_last", + docs=[ + {"_id": 1, "cat": "a", "val": 10}, + {"_id": 2, "cat": "a", "val": 30}, + {"_id": 3, "cat": "a", "val": 20}, + {"_id": 4, "cat": "b", "val": 5}, + {"_id": 5, "cat": "b", "val": 15}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$group": {"_id": "$cat", "last_val": {"$last": "$val"}}}, + ], + expected=[ + {"_id": "a", "last_val": 10}, + {"_id": "b", "last_val": 5}, + ], + msg="Sort descending then $group with $last should pick the lowest value", + ), +] + +# Property [Sort → Limit/Skip Pagination]: $sort followed by $limit or +# $skip and $limit returns the correct documents in sorted order. +SORT_LIMIT_TESTS: list[StageTestCase] = [ + StageTestCase( + "sort_limit_top_n", + docs=[ + {"_id": 1, "val": 50}, + {"_id": 2, "val": 10}, + {"_id": 3, "val": 40}, + {"_id": 4, "val": 30}, + {"_id": 5, "val": 20}, + ], + pipeline=[{"$sort": {"val": -1}}, {"$limit": 3}], + expected=[ + {"_id": 1, "val": 50}, + {"_id": 3, "val": 40}, + {"_id": 4, "val": 30}, + ], + msg="$sort descending then $limit should return the top N documents in order", + ), + StageTestCase( + "sort_skip_limit_page", + docs=[ + {"_id": 1, "val": 50}, + {"_id": 2, "val": 10}, + {"_id": 3, "val": 40}, + {"_id": 4, "val": 30}, + {"_id": 5, "val": 20}, + ], + pipeline=[{"$sort": {"val": 1}}, {"$skip": 1}, {"$limit": 2}], + expected=[ + {"_id": 5, "val": 20}, + {"_id": 4, "val": 30}, + ], + msg="$sort then $skip then $limit should return the correct page window", + ), +] + +# Property [Sort → Unwind → Group Order Preservation]: sort order established +# before $unwind is preserved through array expansion for downstream +# order-dependent accumulators. +SORT_UNWIND_GROUP_TESTS: list[StageTestCase] = [ + StageTestCase( + "sort_unwind_group_first", + docs=[ + {"_id": 1, "cat": "a", "val": 30, "tags": ["x", "y"]}, + {"_id": 2, "cat": "a", "val": 10, "tags": ["z"]}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$unwind": "$tags"}, + {"$group": {"_id": "$cat", "first_tag": {"$first": "$tags"}}}, + ], + expected=[{"_id": "a", "first_tag": "x"}], + msg="$first after $sort and $unwind should pick from the document that sorted first", + ), +] + +# Property [Match → Sort → Group Filter-Then-Pick]: $match narrows input before +# $sort establishes order for an order-dependent $group accumulator. +MATCH_SORT_GROUP_TESTS: list[StageTestCase] = [ + StageTestCase( + "match_sort_group_first", + docs=[ + {"_id": 1, "cat": "a", "val": 5, "active": False}, + {"_id": 2, "cat": "a", "val": 30, "active": True}, + {"_id": 3, "cat": "a", "val": 20, "active": True}, + {"_id": 4, "cat": "b", "val": 10, "active": True}, + ], + pipeline=[ + {"$match": {"active": True}}, + {"$sort": {"val": -1}}, + {"$group": {"_id": "$cat", "top": {"$first": "$val"}}}, + ], + expected=[ + {"_id": "a", "top": 30}, + {"_id": "b", "top": 10}, + ], + msg="$match filtering before $sort and $group $first should respect filtered sort order", + ), +] + +# Property [Sort → AddFields → Group Computed Override]: when $addFields +# overwrites the sort field with a computed value, the downstream $group +# accumulator should see the computed value, not the original. Some server +# implementations may optimize this pipeline shape in a way that bypasses +# the $addFields override. +SORT_ADDFIELDS_GROUP_TESTS: list[StageTestCase] = [ + StageTestCase( + "sort_addfields_overwrites_sort_field", + docs=[ + {"_id": 1, "cat": "a", "val": 10}, + {"_id": 2, "cat": "a", "val": 20}, + {"_id": 3, "cat": "b", "val": 30}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$addFields": {"val": {"$multiply": ["$val", -1]}}}, + {"$group": {"_id": "$cat", "top": {"$first": "$val"}}}, + ], + expected=[ + {"_id": "a", "top": -20}, + {"_id": "b", "top": -30}, + ], + msg="$group $first should see the $addFields-computed value, not the original sort field", + ), +] + +STAGE_COMBINATIONS_SORT_TESTS = ( + SORT_PROJECT_GROUP_TESTS + + SORT_GROUP_ORDER_TESTS + + SORT_LIMIT_TESTS + + SORT_UNWIND_GROUP_TESTS + + MATCH_SORT_GROUP_TESTS + + SORT_ADDFIELDS_GROUP_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(STAGE_COMBINATIONS_SORT_TESTS)) +def test_stage_combinations_sort(collection, test_case: StageTestCase): + """Test interesting $sort combinations with other pipeline stages.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) From 612761a7f785ce15ce21886651c14529d2ba4dc0 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 16 Apr 2026 10:53:12 -0700 Subject: [PATCH 6/6] Use ignore_doc_order for group output ordering Signed-off-by: Daniel Frankcom --- .../tests/core/operator/stages/test_stages_combination_sort.py | 1 + 1 file changed, 1 insertion(+) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py index 833575d5..15403ec0 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_sort.py @@ -258,4 +258,5 @@ def test_stage_combinations_sort(collection, test_case: StageTestCase): expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg, + ignore_doc_order=True, )