From 9f49f5f67975635462a4e8ac7e01f814fd993d0c Mon Sep 17 00:00:00 2001 From: Mitchell Elholm Date: Thu, 16 Apr 2026 15:03:27 -0700 Subject: [PATCH 1/5] Add tests for $rand Signed-off-by: Mitchell Elholm --- .../misc/rand/test_rand_argument_handling.py | 293 ++++++++++++++++ .../misc/rand/test_rand_return_value.py | 141 ++++++++ .../test_expressions_combination_rand.py | 322 ++++++++++++++++++ 3 files changed, 756 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py new file mode 100644 index 00000000..7c80a3c3 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py @@ -0,0 +1,293 @@ +""" +Tests for $rand argument handling and error cases. +""" + +from datetime import datetime + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.expression_test_case import ( # noqa: E501 + ExpressionTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, + execute_expression_with_insert, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + EXPRESSION_NOT_OBJECT_ERROR, + RAND_UNEXPECTED_ARG_ERROR, +) +from documentdb_tests.framework.test_constants import MISSING + + +def test_rand_empty_array_type(collection): + """Test rand with empty array argument returns double type.""" + result = execute_expression(collection, {"$type": {"$rand": []}}) + assert_expression_result(result, expected="double") + + +def test_rand_empty_array_range(collection): + """Test rand with empty array argument returns value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$rand": []}}, + "in": {"$and": [{"$gte": ["$$r", 0.0]}, {"$lt": ["$$r", 1.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True) + + +INVALID_INPUTS: list[ExpressionTestCase] = [ + ExpressionTestCase( + "int_1", + expression={"$rand": 1}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for integer argument", + ), + ExpressionTestCase( + "int_0", + expression={"$rand": 0}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for zero argument", + ), + ExpressionTestCase( + "int_neg1", + expression={"$rand": -1}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for negative integer argument", + ), + ExpressionTestCase( + "int_2", + expression={"$rand": 2}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for integer 2 argument", + ), + ExpressionTestCase( + "string", + expression={"$rand": "string"}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for string argument", + ), + ExpressionTestCase( + "null", + expression={"$rand": None}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for null argument", + ), + ExpressionTestCase( + "bool_true", + expression={"$rand": True}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for boolean true argument", + ), + ExpressionTestCase( + "bool_false", + expression={"$rand": False}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for boolean false argument", + ), + ExpressionTestCase( + "double", + expression={"$rand": 1.5}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for double argument", + ), + ExpressionTestCase( + "decimal128", + expression={"$rand": Decimal128("1")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for Decimal128 argument", + ), + ExpressionTestCase( + "decimal128_nan", + expression={"$rand": Decimal128("NaN")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for Decimal128 NaN argument", + ), + ExpressionTestCase( + "decimal128_inf", + expression={"$rand": Decimal128("Infinity")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for Decimal128 Infinity argument", + ), + ExpressionTestCase( + "decimal128_neg_inf", + expression={"$rand": Decimal128("-Infinity")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for Decimal128 negative Infinity argument", + ), + ExpressionTestCase( + "long", + expression={"$rand": Int64(1)}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for long argument", + ), + ExpressionTestCase( + "date", + expression={"$rand": datetime(2024, 1, 1)}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for date argument", + ), + ExpressionTestCase( + "objectid", + expression={"$rand": ObjectId("000000000000000000000000")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for ObjectId argument", + ), + ExpressionTestCase( + "regex", + expression={"$rand": Regex(".*")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for regex argument", + ), + ExpressionTestCase( + "javascript", + expression={"$rand": Code("function(){}")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for JavaScript argument", + ), + ExpressionTestCase( + "timestamp", + expression={"$rand": Timestamp(0, 1)}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for Timestamp argument", + ), + ExpressionTestCase( + "minkey", + expression={"$rand": MinKey()}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for MinKey argument", + ), + ExpressionTestCase( + "maxkey", + expression={"$rand": MaxKey()}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for MaxKey argument", + ), + ExpressionTestCase( + "binary", + expression={"$rand": Binary(b"")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for binary argument", + ), + ExpressionTestCase( + "float_nan", + expression={"$rand": float("nan")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for float NaN argument", + ), + ExpressionTestCase( + "float_inf", + expression={"$rand": float("inf")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for float Infinity argument", + ), + ExpressionTestCase( + "float_neg_inf", + expression={"$rand": float("-inf")}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for float negative Infinity argument", + ), + ExpressionTestCase( + "array_with_int", + expression={"$rand": [1]}, + error_code=RAND_UNEXPECTED_ARG_ERROR, + msg="Should error for non-empty array with integer", + ), + ExpressionTestCase( + "array_with_object", + expression={"$rand": [{}]}, + error_code=RAND_UNEXPECTED_ARG_ERROR, + msg="Should error for non-empty array with object", + ), + ExpressionTestCase( + "array_with_array", + expression={"$rand": [[]]}, + error_code=RAND_UNEXPECTED_ARG_ERROR, + msg="Should error for non-empty array with nested array", + ), + ExpressionTestCase( + "array_with_two_objects", + expression={"$rand": [{}, {}]}, + error_code=RAND_UNEXPECTED_ARG_ERROR, + msg="Should error for array with two objects", + ), + ExpressionTestCase( + "object_with_field", + expression={"$rand": {"a": 1}}, + error_code=RAND_UNEXPECTED_ARG_ERROR, + msg="Should error for non-empty object with field", + ), + ExpressionTestCase( + "object_with_null", + expression={"$rand": {"a": None}}, + error_code=RAND_UNEXPECTED_ARG_ERROR, + msg="Should error for non-empty object with null field", + ), +] + + +@pytest.mark.parametrize("test", INVALID_INPUTS, ids=lambda t: t.id) +def test_rand_invalid_inputs(collection, test): + """Test rand rejects non-empty array and object arguments.""" + result = execute_expression(collection, test.expression) + assertResult(result, error_code=test.error_code, msg=test.msg) + + +FIELD_REF_TESTS: list[ExpressionTestCase] = [ + ExpressionTestCase( + "field_ref_scalar", + expression={"$rand": "$a"}, + doc={"a": 1}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for field reference to scalar", + ), + ExpressionTestCase( + "missing_field_ref", + expression={"$rand": "$not_exist"}, + doc={}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for missing field reference", + ), + ExpressionTestCase( + "missing_constant", + expression={"$rand": MISSING}, + doc={}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for missing constant field reference", + ), + ExpressionTestCase( + "field_ref_object", + expression={"$rand": "$a"}, + doc={"a": {}}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for field reference resolving to object", + ), + ExpressionTestCase( + "field_ref_array", + expression={"$rand": "$a"}, + doc={"a": []}, + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg="Should error for field reference resolving to array", + ), + ExpressionTestCase( + "array_missing_ref", + expression={"$rand": ["$not_exist"]}, + doc={}, + error_code=RAND_UNEXPECTED_ARG_ERROR, + msg="Should error for array with missing field reference", + ), +] + + +@pytest.mark.parametrize("test", FIELD_REF_TESTS, ids=lambda t: t.id) +def test_rand_field_ref_errors(collection, test): + """Test rand rejects field reference arguments.""" + result = execute_expression_with_insert(collection, test.expression, test.doc) + assertResult(result, error_code=test.error_code, msg=test.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py new file mode 100644 index 00000000..813da717 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py @@ -0,0 +1,141 @@ +""" +Tests for $rand return value properties. + +Validates return type (double), range [0, 1), per-document independence, +statistical distribution, and precision. +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.test_constants import DOUBLE_ZERO + + +def test_rand_basic(collection): + """Test {$rand: {}} is >= 0.0 and < 1.0.""" + result = execute_expression(collection, {"$rand": {}}) + assert_expression_result( + result, expected=pytest.approx(0.5, abs=0.5), msg="Should return value in [0, 1)" + ) + + +def test_rand_return_type(collection): + """Test {$type: {$rand: {}}} returns 'double'.""" + result = execute_expression(collection, {"$type": {"$rand": {}}}) + assert_expression_result(result, expected="double", msg="Should return double type") + + +def test_rand_two_calls_differ(collection): + """Test two $rand calls in same $project produce different values (high probability).""" + result = execute_expression(collection, {"$ne": [{"$rand": {}}, {"$rand": {}}]}) + assert_expression_result( + result, expected=True, msg="Should produce different values per invocation" + ) + + +def test_rand_per_document_independence(collection): + """Test $rand produces unique values across 100 documents.""" + collection.insert_many([{"_id": i} for i in range(100)]) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "r": {"$rand": {}}}}, + {"$group": {"_id": None, "vals": {"$addToSet": "$r"}}}, + {"$project": {"_id": 0, "uniqueCount": {"$size": "$vals"}}}, + ], + "cursor": {}, + }, + ) + # With ~17 significant digits (~10^17 possible values), collision probability + # among 100 values is ~100^2 / (2 * 10^17) = 5e-14 + assertSuccess(result, [{"uniqueCount": 100}], msg="Should produce unique value per document") + + +def test_rand_statistical_average(collection): + """Test $rand average over 10000 docs is near 0.5 (within 10 std devs).""" + collection.insert_many([{"_id": i} for i in range(10000)]) + # Mean of uniform [0,1) = 0.5, std = 1/sqrt(12) ~ 0.2887 + # std of mean = 0.2887/sqrt(10000) ~ 0.002887 + # ±0.03 = ~10.4 std devs, so average should be in [0.47, 0.53] + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "r": {"$rand": {}}}}, + {"$group": {"_id": None, "avg": {"$avg": "$r"}}}, + { + "$project": { + "_id": 0, + "inRange": { + "$and": [ + {"$gte": ["$avg", 0.47]}, + {"$lte": ["$avg", 0.53]}, + ] + }, + } + }, + ], + "cursor": {}, + }, + ) + assertSuccess(result, [{"inRange": True}], msg="Should average near 0.5 over 10000 samples") + + +def test_rand_range_validation_1000(collection): + """Test all 1000 $rand values are in [0, 1).""" + collection.insert_many([{"_id": i} for i in range(1000)]) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "r": {"$rand": {}}}}, + { + "$match": { + "$expr": { + "$or": [ + {"$lt": ["$r", DOUBLE_ZERO]}, + {"$gte": ["$r", 1.0]}, + ] + } + } + }, + {"$count": "outOfRange"}, + ], + "cursor": {}, + }, + ) + # Expect empty result (no out-of-range values) + assertSuccess(result, [], msg="Should have no out-of-range values") + + +def test_rand_uniform_distribution(collection): + """Test $rand follows uniform distribution by checking 10 equal buckets.""" + collection.insert_many([{"_id": i} for i in range(100000)]) + # Bucket each value into [0..9] via floor(rand * 10). + # For uniform [0,1), each bucket expects ~10000 of 100000 samples. + # Binomial std for each bucket: sqrt(100000 * 0.1 * 0.9) ~ 95. + # We check each bucket has at least 9000 (~10.5 std devs below expected). + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "bucket": {"$floor": {"$multiply": [{"$rand": {}}, 10]}}}}, + {"$group": {"_id": "$bucket", "count": {"$sum": 1}}}, + {"$match": {"$expr": {"$lt": ["$count", 9000]}}}, + {"$count": "underfilled"}, + ], + "cursor": {}, + }, + ) + # Expect empty result (no underfilled buckets) + assertSuccess(result, [], msg="Should have no underfilled buckets") diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py b/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py new file mode 100644 index 00000000..e1b7095f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py @@ -0,0 +1,322 @@ +""" +Tests for $rand expression contexts and operator interactions. +""" + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, + execute_project, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.test_constants import DOUBLE_ZERO + + +def test_rand_nested_in_multiply(collection): + """Test rand nested in multiply expression produces value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$multiply": [{"$rand": {}}, 100]}}, + "in": {"$and": [{"$gte": ["$$r", DOUBLE_ZERO]}, {"$lt": ["$$r", 100.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in [0, 100)") + + +def test_rand_in_object_expression(collection): + """Test rand in object expression context.""" + result = execute_project(collection, {"result": {"a": {"$rand": {}}}}) + batch = result["cursor"]["firstBatch"] + assertSuccess(result, batch, msg="Should produce object with rand field") + + +def test_rand_deep_nesting(collection): + """Test rand deeply nested in floor and multiply produces integer in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$floor": {"$multiply": [{"$rand": {}}, 100]}}}, + "in": {"$and": [{"$gte": ["$$r", 0]}, {"$lte": ["$$r", 99]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce integer in [0, 99]") + + +# --------------------------------------------------------------------------- +# Null / Missing / Empty collection handling +# --------------------------------------------------------------------------- +def test_rand_on_null_field_document(collection): + """Test rand on a document with null fields still returns a double.""" + collection.insert_one({"_id": 1, "a": None}) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$project": {"_id": 0, "result": {"$type": {"$rand": {}}}}}], + "cursor": {}, + }, + ) + assertSuccess(result, [{"result": "double"}], msg="Should return double on null field doc") + + +def test_rand_on_empty_collection(collection): + """Test rand projection on empty collection returns empty result.""" + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$project": {"_id": 0, "r": {"$rand": {}}}}], + "cursor": {}, + }, + ) + assertSuccess(result, [], msg="Should return empty result on empty collection") + + +# --------------------------------------------------------------------------- +# Arithmetic operators +# --------------------------------------------------------------------------- +def test_rand_multiply_zero(collection): + """Test rand multiplied by zero should always return zero.""" + result = execute_expression(collection, {"$multiply": [{"$rand": {}}, 0]}) + assert_expression_result(result, expected=DOUBLE_ZERO, msg="Should return zero") + + +def test_rand_add_10(collection): + """Test rand plus 10 produces value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$add": [{"$rand": {}}, 10]}}, + "in": {"$and": [{"$gte": ["$$r", 10.0]}, {"$lt": ["$$r", 11.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in [10, 11)") + + +def test_rand_subtract_from_one(collection): + """Test one minus rand produces value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$subtract": [1, {"$rand": {}}]}}, + "in": {"$and": [{"$gt": ["$$r", DOUBLE_ZERO]}, {"$lte": ["$$r", 1.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in (0, 1]") + + +def test_rand_multiply_neg1(collection): + """Test rand multiplied by negative one produces value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$multiply": [{"$rand": {}}, -1]}}, + "in": {"$and": [{"$gt": ["$$r", -1.0]}, {"$lte": ["$$r", DOUBLE_ZERO]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in (-1, 0]") + + +def test_rand_pow_zero(collection): + """Test rand raised to power zero should return one.""" + result = execute_expression(collection, {"$pow": [{"$rand": {}}, 0]}) + assert_expression_result(result, expected=1.0, msg="Should return 1") + + +def test_rand_pow_one(collection): + """Test rand raised to power one returns value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$pow": [{"$rand": {}}, 1]}}, + "in": {"$and": [{"$gte": ["$$r", DOUBLE_ZERO]}, {"$lt": ["$$r", 1.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in [0, 1)") + + +def test_rand_mod_one(collection): + """Test rand mod one returns value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$mod": [{"$rand": {}}, 1]}}, + "in": {"$and": [{"$gte": ["$$r", DOUBLE_ZERO]}, {"$lt": ["$$r", 1.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in [0, 1)") + + +def test_rand_divide_one_by_rand(collection): + """Test one divided by rand produces value greater than or equal to one.""" + result = execute_expression( + collection, + {"$gte": [{"$divide": [1, {"$rand": {}}]}, 1.0]}, + ) + assert_expression_result(result, expected=True, msg="Should produce value >= 1") + + +# --------------------------------------------------------------------------- +# Multiple invocations with arithmetic +# --------------------------------------------------------------------------- +def test_rand_add_two_calls(collection): + """Test adding two rand calls produces value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$add": [{"$rand": {}}, {"$rand": {}}]}}, + "in": {"$and": [{"$gte": ["$$r", DOUBLE_ZERO]}, {"$lt": ["$$r", 2.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in [0, 2)") + + +def test_rand_subtract_two_calls(collection): + """Test subtracting two rand calls produces value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$subtract": [{"$rand": {}}, {"$rand": {}}]}}, + "in": {"$and": [{"$gt": ["$$r", -1.0]}, {"$lt": ["$$r", 1.0]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce value in (-1, 1)") + + +# --------------------------------------------------------------------------- +# Conditional operators +# --------------------------------------------------------------------------- +def test_rand_cond_true_branch(collection): + """Test rand in cond true branch returns a double.""" + result = execute_expression( + collection, + {"$type": {"$cond": [True, {"$rand": {}}, 0]}}, + ) + assert_expression_result(result, expected="double", msg="Should return double from true branch") + + +def test_rand_cond_false_branch(collection): + """Test cond false branch skips rand and returns fallback value.""" + result = execute_expression( + collection, + {"$cond": [False, {"$rand": {}}, 0]}, + ) + assert_expression_result(result, expected=0, msg="Should return fallback value") + + +# --------------------------------------------------------------------------- +# Type operators +# --------------------------------------------------------------------------- +def test_rand_tostring(collection): + """Test rand converted to string produces string type.""" + result = execute_expression( + collection, + {"$eq": [{"$type": {"$toString": {"$rand": {}}}}, "string"]}, + ) + assert_expression_result(result, expected=True, msg="Should produce string type") + + +def test_rand_concat(collection): + """Test rand converted to string and concatenated produces string type.""" + result = execute_expression( + collection, + {"$eq": [{"$type": {"$concat": ["value: ", {"$toString": {"$rand": {}}}]}}, "string"]}, + ) + assert_expression_result(result, expected=True, msg="Should produce string type") + + +# --------------------------------------------------------------------------- +# $let variable binding +# --------------------------------------------------------------------------- +def test_rand_let_variable_reuse(collection): + """Test let variable bound to rand is consistent when reused in same expression.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$rand": {}}}, + "in": {"$eq": [{"$add": ["$$r", "$$r"]}, {"$multiply": ["$$r", 2]}]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should be consistent when reused") + + +def test_rand_let_multiple_vars_independent(collection): + """Test multiple let variables bound to rand produce independent values.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"a": {"$rand": {}}, "b": {"$rand": {}}}, + "in": {"$ne": ["$$a", "$$b"]}, + } + }, + ) + assert_expression_result(result, expected=True, msg="Should produce independent values") + + +# --------------------------------------------------------------------------- +# $switch +# --------------------------------------------------------------------------- +def test_rand_switch_produces_valid_result(collection): + """Test rand in switch branches produces one of the expected string values.""" + result = execute_expression( + collection, + { + "$in": [ + { + "$switch": { + "branches": [ + {"case": {"$lt": [{"$rand": {}}, 0.33]}, "then": "low"}, + {"case": {"$lt": [{"$rand": {}}, 0.66]}, "then": "mid"}, + ], + "default": "high", + } + }, + ["low", "mid", "high"], + ] + }, + ) + assert_expression_result(result, expected=True, msg="Should produce one of low/mid/high") + + +def test_rand_filter_produces_subset(collection): + """Test rand in filter condition produces subset of expected size.""" + result = execute_expression( + collection, + { + "$lte": [ + { + "$size": { + "$filter": { + "input": [1, 2, 3, 4, 5], + "as": "x", + "cond": {"$lt": [{"$rand": {}}, 0.5]}, + } + } + }, + 5, + ] + }, + ) + assert_expression_result(result, expected=True, msg="Should produce subset of size <= 5") From 0a2c9fa1a75d1f8562ace8249601637e86b52014 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 16 Apr 2026 15:07:14 -0700 Subject: [PATCH 2/5] Use pytest_params for $rand test parametrization Signed-off-by: Daniel Frankcom --- .../expressions/misc/rand/test_rand_argument_handling.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py index 7c80a3c3..3d2059dc 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_argument_handling.py @@ -20,6 +20,7 @@ EXPRESSION_NOT_OBJECT_ERROR, RAND_UNEXPECTED_ARG_ERROR, ) +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING @@ -233,7 +234,7 @@ def test_rand_empty_array_range(collection): ] -@pytest.mark.parametrize("test", INVALID_INPUTS, ids=lambda t: t.id) +@pytest.mark.parametrize("test", pytest_params(INVALID_INPUTS)) def test_rand_invalid_inputs(collection, test): """Test rand rejects non-empty array and object arguments.""" result = execute_expression(collection, test.expression) @@ -286,7 +287,7 @@ def test_rand_invalid_inputs(collection, test): ] -@pytest.mark.parametrize("test", FIELD_REF_TESTS, ids=lambda t: t.id) +@pytest.mark.parametrize("test", pytest_params(FIELD_REF_TESTS)) def test_rand_field_ref_errors(collection, test): """Test rand rejects field reference arguments.""" result = execute_expression_with_insert(collection, test.expression, test.doc) From 07a26e0482ad0df180554fa41f288c33aa73eb74 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 16 Apr 2026 16:11:10 -0700 Subject: [PATCH 3/5] Redistribute $rand tests to appropriate levels in tree Signed-off-by: Daniel Frankcom --- .../misc/rand/test_rand_return_value.py | 109 +------------- .../test_expressions_combination_rand.py | 34 ----- .../test_operator_combination_rand_stages.py | 141 ++++++++++++++++++ 3 files changed, 142 insertions(+), 142 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/test_operator_combination_rand_stages.py diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py index 813da717..8cf6416c 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py @@ -1,8 +1,7 @@ """ Tests for $rand return value properties. -Validates return type (double), range [0, 1), per-document independence, -statistical distribution, and precision. +Validates return type (double), range [0, 1), and per-invocation independence. """ import pytest @@ -11,9 +10,6 @@ assert_expression_result, execute_expression, ) -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.test_constants import DOUBLE_ZERO def test_rand_basic(collection): @@ -36,106 +32,3 @@ def test_rand_two_calls_differ(collection): assert_expression_result( result, expected=True, msg="Should produce different values per invocation" ) - - -def test_rand_per_document_independence(collection): - """Test $rand produces unique values across 100 documents.""" - collection.insert_many([{"_id": i} for i in range(100)]) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$project": {"_id": 0, "r": {"$rand": {}}}}, - {"$group": {"_id": None, "vals": {"$addToSet": "$r"}}}, - {"$project": {"_id": 0, "uniqueCount": {"$size": "$vals"}}}, - ], - "cursor": {}, - }, - ) - # With ~17 significant digits (~10^17 possible values), collision probability - # among 100 values is ~100^2 / (2 * 10^17) = 5e-14 - assertSuccess(result, [{"uniqueCount": 100}], msg="Should produce unique value per document") - - -def test_rand_statistical_average(collection): - """Test $rand average over 10000 docs is near 0.5 (within 10 std devs).""" - collection.insert_many([{"_id": i} for i in range(10000)]) - # Mean of uniform [0,1) = 0.5, std = 1/sqrt(12) ~ 0.2887 - # std of mean = 0.2887/sqrt(10000) ~ 0.002887 - # ±0.03 = ~10.4 std devs, so average should be in [0.47, 0.53] - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$project": {"_id": 0, "r": {"$rand": {}}}}, - {"$group": {"_id": None, "avg": {"$avg": "$r"}}}, - { - "$project": { - "_id": 0, - "inRange": { - "$and": [ - {"$gte": ["$avg", 0.47]}, - {"$lte": ["$avg", 0.53]}, - ] - }, - } - }, - ], - "cursor": {}, - }, - ) - assertSuccess(result, [{"inRange": True}], msg="Should average near 0.5 over 10000 samples") - - -def test_rand_range_validation_1000(collection): - """Test all 1000 $rand values are in [0, 1).""" - collection.insert_many([{"_id": i} for i in range(1000)]) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$project": {"_id": 0, "r": {"$rand": {}}}}, - { - "$match": { - "$expr": { - "$or": [ - {"$lt": ["$r", DOUBLE_ZERO]}, - {"$gte": ["$r", 1.0]}, - ] - } - } - }, - {"$count": "outOfRange"}, - ], - "cursor": {}, - }, - ) - # Expect empty result (no out-of-range values) - assertSuccess(result, [], msg="Should have no out-of-range values") - - -def test_rand_uniform_distribution(collection): - """Test $rand follows uniform distribution by checking 10 equal buckets.""" - collection.insert_many([{"_id": i} for i in range(100000)]) - # Bucket each value into [0..9] via floor(rand * 10). - # For uniform [0,1), each bucket expects ~10000 of 100000 samples. - # Binomial std for each bucket: sqrt(100000 * 0.1 * 0.9) ~ 95. - # We check each bucket has at least 9000 (~10.5 std devs below expected). - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$project": {"_id": 0, "bucket": {"$floor": {"$multiply": [{"$rand": {}}, 10]}}}}, - {"$group": {"_id": "$bucket", "count": {"$sum": 1}}}, - {"$match": {"$expr": {"$lt": ["$count", 9000]}}}, - {"$count": "underfilled"}, - ], - "cursor": {}, - }, - ) - # Expect empty result (no underfilled buckets) - assertSuccess(result, [], msg="Should have no underfilled buckets") diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py b/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py index e1b7095f..6b4357c0 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py @@ -8,7 +8,6 @@ execute_project, ) from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.test_constants import DOUBLE_ZERO @@ -47,39 +46,6 @@ def test_rand_deep_nesting(collection): assert_expression_result(result, expected=True, msg="Should produce integer in [0, 99]") -# --------------------------------------------------------------------------- -# Null / Missing / Empty collection handling -# --------------------------------------------------------------------------- -def test_rand_on_null_field_document(collection): - """Test rand on a document with null fields still returns a double.""" - collection.insert_one({"_id": 1, "a": None}) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [{"$project": {"_id": 0, "result": {"$type": {"$rand": {}}}}}], - "cursor": {}, - }, - ) - assertSuccess(result, [{"result": "double"}], msg="Should return double on null field doc") - - -def test_rand_on_empty_collection(collection): - """Test rand projection on empty collection returns empty result.""" - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [{"$project": {"_id": 0, "r": {"$rand": {}}}}], - "cursor": {}, - }, - ) - assertSuccess(result, [], msg="Should return empty result on empty collection") - - -# --------------------------------------------------------------------------- -# Arithmetic operators -# --------------------------------------------------------------------------- def test_rand_multiply_zero(collection): """Test rand multiplied by zero should always return zero.""" result = execute_expression(collection, {"$multiply": [{"$rand": {}}, 0]}) diff --git a/documentdb_tests/compatibility/tests/core/operator/test_operator_combination_rand_stages.py b/documentdb_tests/compatibility/tests/core/operator/test_operator_combination_rand_stages.py new file mode 100644 index 00000000..67a579c3 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/test_operator_combination_rand_stages.py @@ -0,0 +1,141 @@ +""" +Integration tests for $rand with pipeline stages. + +Covers $rand used within $project, $group, and $match stages: +per-document independence, statistical distribution, range validation, +null field handling, and empty collection behavior. +""" + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.test_constants import DOUBLE_ZERO + + +def test_rand_project_on_null_field_document(collection): + """Test rand in $project on a document with null fields still returns a double.""" + collection.insert_one({"_id": 1, "a": None}) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$project": {"_id": 0, "result": {"$type": {"$rand": {}}}}}], + "cursor": {}, + }, + ) + assertSuccess(result, [{"result": "double"}], msg="Should return double on null field doc") + + +def test_rand_project_on_empty_collection(collection): + """Test rand in $project on empty collection returns empty result.""" + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$project": {"_id": 0, "r": {"$rand": {}}}}], + "cursor": {}, + }, + ) + assertSuccess(result, [], msg="Should return empty result on empty collection") + + +def test_rand_project_group_per_document_independence(collection): + """Test $rand produces unique values across 100 documents via $project and $group.""" + collection.insert_many([{"_id": i} for i in range(100)]) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "r": {"$rand": {}}}}, + {"$group": {"_id": None, "vals": {"$addToSet": "$r"}}}, + {"$project": {"_id": 0, "uniqueCount": {"$size": "$vals"}}}, + ], + "cursor": {}, + }, + ) + # With ~17 significant digits (~10^17 possible values), collision probability + # among 100 values is ~100^2 / (2 * 10^17) = 5e-14 + assertSuccess(result, [{"uniqueCount": 100}], msg="Should produce unique value per document") + + +def test_rand_project_group_statistical_average(collection): + """Test $rand average over 10000 docs is near 0.5 via $project and $group.""" + collection.insert_many([{"_id": i} for i in range(10000)]) + # Mean of uniform [0,1) = 0.5, std = 1/sqrt(12) ~ 0.2887 + # std of mean = 0.2887/sqrt(10000) ~ 0.002887 + # ±0.03 = ~10.4 std devs, so average should be in [0.47, 0.53] + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "r": {"$rand": {}}}}, + {"$group": {"_id": None, "avg": {"$avg": "$r"}}}, + { + "$project": { + "_id": 0, + "inRange": { + "$and": [ + {"$gte": ["$avg", 0.47]}, + {"$lte": ["$avg", 0.53]}, + ] + }, + } + }, + ], + "cursor": {}, + }, + ) + assertSuccess(result, [{"inRange": True}], msg="Should average near 0.5 over 10000 samples") + + +def test_rand_project_match_range_validation(collection): + """Test all 1000 $rand values are in [0, 1) via $project and $match.""" + collection.insert_many([{"_id": i} for i in range(1000)]) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "r": {"$rand": {}}}}, + { + "$match": { + "$expr": { + "$or": [ + {"$lt": ["$r", DOUBLE_ZERO]}, + {"$gte": ["$r", 1.0]}, + ] + } + } + }, + {"$count": "outOfRange"}, + ], + "cursor": {}, + }, + ) + # Expect empty result (no out-of-range values) + assertSuccess(result, [], msg="Should have no out-of-range values") + + +def test_rand_project_group_uniform_distribution(collection): + """Test $rand uniform distribution by checking 10 equal buckets via $project and $group.""" + collection.insert_many([{"_id": i} for i in range(100000)]) + # Bucket each value into [0..9] via floor(rand * 10). + # For uniform [0,1), each bucket expects ~10000 of 100000 samples. + # Binomial std for each bucket: sqrt(100000 * 0.1 * 0.9) ~ 95. + # We check each bucket has at least 9000 (~10.5 std devs below expected). + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "bucket": {"$floor": {"$multiply": [{"$rand": {}}, 10]}}}}, + {"$group": {"_id": "$bucket", "count": {"$sum": 1}}}, + {"$match": {"$expr": {"$lt": ["$count", 9000]}}}, + {"$count": "underfilled"}, + ], + "cursor": {}, + }, + ) + # Expect empty result (no underfilled buckets) + assertSuccess(result, [], msg="Should have no underfilled buckets") From 244efb6df8ff67b3681d68816288a4ae8843cad4 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Fri, 17 Apr 2026 11:17:38 -0700 Subject: [PATCH 4/5] Replace test_rand_basic with strict [0, 1) range check Signed-off-by: Daniel Frankcom --- .../misc/rand/test_rand_return_value.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py index 8cf6416c..6e1d2f52 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/misc/rand/test_rand_return_value.py @@ -4,20 +4,25 @@ Validates return type (double), range [0, 1), and per-invocation independence. """ -import pytest - from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( assert_expression_result, execute_expression, ) - - -def test_rand_basic(collection): - """Test {$rand: {}} is >= 0.0 and < 1.0.""" - result = execute_expression(collection, {"$rand": {}}) - assert_expression_result( - result, expected=pytest.approx(0.5, abs=0.5), msg="Should return value in [0, 1)" +from documentdb_tests.framework.test_constants import DOUBLE_ZERO + + +def test_rand_empty_object_range(collection): + """Test rand with empty object argument returns value in expected range.""" + result = execute_expression( + collection, + { + "$let": { + "vars": {"r": {"$rand": {}}}, + "in": {"$and": [{"$gte": ["$$r", DOUBLE_ZERO]}, {"$lt": ["$$r", 1.0]}]}, + } + }, ) + assert_expression_result(result, expected=True) def test_rand_return_type(collection): From d9062a7199a06293d71c63ecedfbd1d29709210a Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Fri, 17 Apr 2026 11:17:38 -0700 Subject: [PATCH 5/5] Replace test_rand_divide_one_by_rand with test_rand_divide_by_two Signed-off-by: Daniel Frankcom --- .../test_expressions_combination_rand.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py b/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py index 6b4357c0..2c81fa1a 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/test_expressions_combination_rand.py @@ -128,13 +128,18 @@ def test_rand_mod_one(collection): assert_expression_result(result, expected=True, msg="Should produce value in [0, 1)") -def test_rand_divide_one_by_rand(collection): - """Test one divided by rand produces value greater than or equal to one.""" +def test_rand_divide_by_two(collection): + """Test rand divided by two produces value in expected range.""" result = execute_expression( collection, - {"$gte": [{"$divide": [1, {"$rand": {}}]}, 1.0]}, + { + "$let": { + "vars": {"r": {"$divide": [{"$rand": {}}, 2]}}, + "in": {"$and": [{"$gte": ["$$r", DOUBLE_ZERO]}, {"$lt": ["$$r", 0.5]}]}, + } + }, ) - assert_expression_result(result, expected=True, msg="Should produce value >= 1") + assert_expression_result(result, expected=True, msg="Should produce value in [0, 0.5)") # ---------------------------------------------------------------------------