diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index ccbf21ac..37026f58 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -11,7 +11,7 @@ jobs: services: mongodb: - image: mongo:8.2 + image: mongo:8.2.4 ports: - 27017:27017 options: >- diff --git a/documentdb_tests/compatibility/tests/core/__init__.py b/documentdb_tests/compatibility/tests/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/__init__.py b/documentdb_tests/compatibility/tests/core/operator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_arguments.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_arguments.py new file mode 100644 index 00000000..1f425f36 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_arguments.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, + execute_project_with_insert, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.error_codes import ( + BSON_TO_STRING_CONVERSION_ERROR, + EXPRESSION_TYPE_MISMATCH_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.toLower_common import ( + ToLowerTest, + _expr, +) + +# Property [Expression Arguments]: the operator accepts expressions, $literal, and coercible +# expression results, resolving and coercing them before lowercasing. +TOLOWER_EXPR_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "expr_toupper_roundtrip", + value={"$toUpper": "hello"}, + expected="hello", + msg="$toLower should reverse $toUpper on ASCII letters", + ), + ToLowerTest( + "expr_literal_dollar_sign", + value={"$literal": "$HELLO"}, + expected="$hello", + msg="$toLower should lowercase $literal string preserving dollar sign", + ), + ToLowerTest( + "expr_coercible_numeric_result", + value={"$add": [1, 2]}, + expected="3", + msg="$toLower should coerce numeric expression result to string", + ), +] + +# Property [Arity Success]: a single-element array is unwrapped and processed as the argument, +# including null and coercible types. +TOLOWER_ARITY_SUCCESS_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "arity_single_element", + value=["HELLO"], + expected="hello", + msg="$toLower should unwrap single-element string array", + ), + ToLowerTest( + "arity_single_null", + value=[None], + expected="", + msg="$toLower should unwrap single-element null array to empty string", + ), + ToLowerTest( + "arity_single_coercible", + value=[42], + expected="42", + msg="$toLower should unwrap single-element coercible array", + ), +] + +# Property [Arity Errors]: an empty array or an array with more than one element produces an +# error. +TOLOWER_ARITY_ERROR_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "arity_empty_array", + value=[], + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$toLower should reject empty array", + ), + ToLowerTest( + "arity_two_elements", + value=["a", "b"], + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$toLower should reject two-element array", + ), + # Single-element array containing a non-coercible type is unwrapped and errors. + ToLowerTest( + "arity_single_bool", + value=[True], + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject single-element array with boolean", + ), +] + +# Property [Field Path Errors]: bare '$' and '$$' are rejected as invalid field paths. +TOLOWER_FIELD_PATH_ERROR_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "fieldpath_bare_dollar", + value="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$toLower should reject bare '$' as invalid field path", + ), + ToLowerTest( + "fieldpath_double_dollar", + value="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$toLower should reject '$$' as empty variable name", + ), +] + +TOLOWER_ARGUMENT_TESTS = ( + TOLOWER_EXPR_TESTS + + TOLOWER_ARITY_SUCCESS_TESTS + + TOLOWER_ARITY_ERROR_TESTS + + TOLOWER_FIELD_PATH_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_ARGUMENT_TESTS)) +def test_tolower_arguments(collection, test_case: ToLowerTest): + """Test $toLower argument handling.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) + + +def test_tolower_document_fields(collection): + """Test $toLower reads values from document fields.""" + result = execute_project_with_insert( + collection, + {"val": "HELLO WORLD"}, + {"result": {"$toLower": "$val"}}, + ) + assertSuccess( + result, + [{"result": "hello world"}], + msg="$toLower should read values from document fields", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_characters.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_characters.py new file mode 100644 index 00000000..8fe863c7 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_characters.py @@ -0,0 +1,280 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.toLower_common import ( + ToLowerTest, + _expr, +) + +# Property [Core Conversion Behavior]: only ASCII uppercase letters A-Z are converted to lowercase; +# all other characters are unchanged. +TOLOWER_CORE_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "core_ascii_upper", + value="HELLO", + expected="hello", + msg="$toLower should convert ASCII uppercase to lowercase", + ), + ToLowerTest( + "core_mixed_case", + value="Hello World", + expected="hello world", + msg="$toLower should convert mixed case to lowercase", + ), + ToLowerTest( + "core_digits", value="12345", expected="12345", msg="$toLower should leave digits unchanged" + ), + ToLowerTest( + "core_punctuation", + value="!@#$%^&*()", + expected="!@#$%^&*()", + msg="$toLower should leave punctuation unchanged", + ), + ToLowerTest( + "core_whitespace", + value="hello world\t\n", + expected="hello world\t\n", + msg="$toLower should preserve whitespace characters", + ), + # U+00A0 non-breaking space, U+2002 en space, U+2003 em space. + ToLowerTest( + "core_unicode_whitespace_nbsp", + value="A\u00a0B", + expected="a\u00a0b", + msg="$toLower should preserve non-breaking space", + ), + ToLowerTest( + "core_unicode_whitespace_en_em", + value="A\u2002B\u2003C", + expected="a\u2002b\u2003c", + msg="$toLower should preserve en space and em space", + ), + # Control characters U+0001, U+0002, U+001F. + ToLowerTest( + "core_control_chars", + value="\x01\x02\x1f", + expected="\x01\x02\x1f", + msg="$toLower should leave control characters unchanged", + ), + # Non-ASCII Unicode letters with case folding are not converted. + ToLowerTest( + "core_mixed_ascii_nonascii", + value="RÉSUMÉ", + expected="rÉsumÉ", + msg="$toLower should only convert ASCII letters in mixed string", + ), + # Greek uppercase Sigma, Omega, Pi. + ToLowerTest( + "core_greek_unchanged", + value="ΣΩΠ", + expected="ΣΩΠ", + msg="$toLower should not convert Greek uppercase letters", + ), + # Cyrillic uppercase De, Zhe. + ToLowerTest( + "core_cyrillic_unchanged", + value="ДЖ", + expected="ДЖ", + msg="$toLower should not convert Cyrillic uppercase letters", + ), + # Deseret capital letter long I (U+10400). + ToLowerTest( + "core_deseret_unchanged", + value="\U00010400", + expected="\U00010400", + msg="$toLower should not convert Deseret capital letter", + ), + # Latin extended: ñ. + ToLowerTest( + "core_latin_extended_lower_unchanged", + value="ñ", + expected="ñ", + msg="$toLower should leave Latin extended lowercase unchanged", + ), + # CJK and emoji pass through. + ToLowerTest( + "core_cjk_unchanged", + value="日本語", + expected="日本語", + msg="$toLower should leave CJK characters unchanged", + ), + ToLowerTest( + "core_emoji_unchanged", + value="🎉🚀", + expected="🎉🚀", + msg="$toLower should leave emoji unchanged", + ), + # Special Unicode case mappings are not applied. + ToLowerTest( + "core_eszett_no_expansion", + value="ß", + expected="ß", + msg="$toLower should not apply special case mapping for eszett", + ), + # U+FB01 Latin small ligature fi. + ToLowerTest( + "core_fi_ligature_no_expansion", + value="\ufb01", + expected="\ufb01", + msg="$toLower should not expand fi ligature", + ), + # U+0130 Latin capital letter I with dot above. + ToLowerTest( + "core_turkish_i_no_mapping", + value="\u0130", + expected="\u0130", + msg="$toLower should not apply Turkish I mapping", + ), + # Zero-width characters: BOM (U+FEFF), ZWSP (U+200B), ZWJ (U+200D), LRM (U+200E), + # RLM (U+200F). + ToLowerTest( + "core_bom", value="\ufeff", expected="\ufeff", msg="$toLower should preserve BOM character" + ), + ToLowerTest( + "core_zwsp", + value="\u200b", + expected="\u200b", + msg="$toLower should preserve zero-width space", + ), + ToLowerTest( + "core_zwj", + value="\u200d", + expected="\u200d", + msg="$toLower should preserve zero-width joiner", + ), + ToLowerTest( + "core_lrm", + value="\u200e", + expected="\u200e", + msg="$toLower should preserve left-to-right mark", + ), + ToLowerTest( + "core_rlm", + value="\u200f", + expected="\u200f", + msg="$toLower should preserve right-to-left mark", + ), + # ZWJ emoji sequence: man + ZWJ + woman + ZWJ + girl. + ToLowerTest( + "core_zwj_emoji_sequence", + value="\U0001f468\u200d\U0001f469\u200d\U0001f467", + expected="\U0001f468\u200d\U0001f469\u200d\U0001f467", + msg="$toLower should preserve ZWJ emoji sequence", + ), + # ASCII boundary characters: backtick (U+0060) before 'a', '{' (U+007B) after 'z', + # '@' (U+0040) before 'A', '[' (U+005B) after 'Z'. + ToLowerTest( + "core_backtick_before_a", + value="`", + expected="`", + msg="$toLower should leave backtick unchanged", + ), + ToLowerTest( + "core_lbrace_after_z", + value="{", + expected="{", + msg="$toLower should leave open brace unchanged", + ), + ToLowerTest( + "core_at_before_A", value="@", expected="@", msg="$toLower should leave at sign unchanged" + ), + ToLowerTest( + "core_lbracket_after_Z", + value="[", + expected="[", + msg="$toLower should leave open bracket unchanged", + ), + # Multi-byte UTF-8: 2-byte (U+00E9). + ToLowerTest( + "core_2byte_utf8", + value="\u00e9", + expected="\u00e9", + msg="$toLower should leave 2-byte UTF-8 character unchanged", + ), + # Embedded null byte (U+0000) preserved, surrounding characters still converted. + ToLowerTest( + "core_embedded_null_byte", + value="A\x00B", + expected="a\x00b", + msg="$toLower should preserve embedded null byte", + ), +] + +# Property [Normalization Independence]: the operator processes individual codepoints, not +# normalized or composed grapheme clusters. +TOLOWER_COMBINING_TESTS: list[ToLowerTest] = [ + # A (U+0041) + combining acute accent (U+0301). + ToLowerTest( + "combining_ascii_base_with_acute", + value="A\u0301", + expected="a\u0301", + msg="$toLower should lowercase ASCII base with combining acute", + ), + # Precomposed É (U+00C9) is a single non-ASCII codepoint, not converted. + ToLowerTest( + "combining_precomposed_unchanged", + value="\u00c9", + expected="\u00c9", + msg="$toLower should not convert precomposed E-acute", + ), + # Combining acute accent alone (U+0301), no base character. + ToLowerTest( + "combining_mark_alone", + value="\u0301", + expected="\u0301", + msg="$toLower should preserve combining mark alone", + ), + # Non-ASCII base Σ (U+03A3) + combining acute (U+0301). + ToLowerTest( + "combining_nonascii_base_with_mark", + value="\u03a3\u0301", + expected="\u03a3\u0301", + msg="$toLower should not convert non-ASCII base with combining mark", + ), +] + +# Property [Identity]: empty strings and already-lowercase strings return unchanged. +TOLOWER_IDENTITY_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "identity_empty", value="", expected="", msg="$toLower should return empty string unchanged" + ), + ToLowerTest( + "identity_lowercase", + value="hello", + expected="hello", + msg="$toLower should return lowercase string unchanged", + ), + ToLowerTest( + "identity_lowercase_with_space", + value="hello world", + expected="hello world", + msg="$toLower should return lowercase string with spaces unchanged", + ), + ToLowerTest( + "identity_lowercase_with_digits", + value="abc123", + expected="abc123", + msg="$toLower should return lowercase string with digits unchanged", + ), +] + +TOLOWER_CHARACTER_TESTS = TOLOWER_CORE_TESTS + TOLOWER_COMBINING_TESTS + TOLOWER_IDENTITY_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_CHARACTER_TESTS)) +def test_tolower_characters(collection, test_case: ToLowerTest): + """Test $toLower character conversion behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_coercion.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_coercion.py new file mode 100644 index 00000000..795228ea --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_coercion.py @@ -0,0 +1,365 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Code, Decimal128, Int64, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_LARGE_EXPONENT, + DECIMAL128_MAX_NEGATIVE, + DECIMAL128_MIN_POSITIVE, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ONE_AND_HALF, + DECIMAL128_ZERO, + DOUBLE_MAX, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEGATIVE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT32_MAX, + INT32_MIN, + INT64_MAX, + INT64_MIN, +) + +from .utils.toLower_common import ( + ToLowerTest, + _expr, +) + +# Property [Type Coercion]: non-string coercible types are converted to their string +# representation before lowercasing. +TOLOWER_COERCION_TESTS: list[ToLowerTest] = [ + # int32 + ToLowerTest( + "coerce_int32", value=42, expected="42", msg="$toLower should coerce int32 to string" + ), + ToLowerTest( + "coerce_int32_zero", + value=0, + expected="0", + msg="$toLower should coerce int32 zero to string", + ), + ToLowerTest( + "coerce_int32_negative", + value=-123, + expected="-123", + msg="$toLower should coerce negative int32 to string", + ), + # int64 + ToLowerTest( + "coerce_int64", + value=Int64(9_876_543_210), + expected="9876543210", + msg="$toLower should coerce int64 to string", + ), + ToLowerTest( + "coerce_int64_max", + value=INT64_MAX, + expected="9223372036854775807", + msg="$toLower should coerce INT64_MAX to string", + ), + ToLowerTest( + "coerce_int64_min", + value=INT64_MIN, + expected="-9223372036854775808", + msg="$toLower should coerce INT64_MIN to string", + ), + # double: whole numbers drop trailing .0 + ToLowerTest( + "coerce_double_whole", + value=3.0, + expected="3", + msg="$toLower should coerce whole double without trailing .0", + ), + ToLowerTest( + "coerce_double_fractional", + value=3.14, + expected="3.14", + msg="$toLower should coerce fractional double to string", + ), + # Negative zero preserved. + ToLowerTest( + "coerce_double_neg_zero", + value=DOUBLE_NEGATIVE_ZERO, + expected="-0", + msg="$toLower should preserve negative zero in coerced double", + ), + # Double precision is limited to 6 significant digits. + ToLowerTest( + "coerce_double_precision_limit", + value=123456.789, + expected="123457", + msg="$toLower should round double to 6 significant digits", + ), + # Scientific notation for large values (>= 1,000,000). + ToLowerTest( + "coerce_double_large_sci", + value=1e20, + expected="1e+20", + msg="$toLower should use lowercase e in scientific notation for large double", + ), + ToLowerTest( + "coerce_double_million", + value=1_000_000.0, + expected="1e+06", + msg="$toLower should use scientific notation for one million", + ), + ToLowerTest( + "coerce_double_below_million", + value=999_999.0, + expected="999999", + msg="$toLower should use decimal notation below one million", + ), + # Rounding can push a value across the scientific notation threshold. + ToLowerTest( + "coerce_double_round_to_sci", + value=999_999.5, + expected="1e+06", + msg="$toLower should use scientific notation when rounding crosses threshold", + ), + # Scientific notation for small values (< 0.0001). + ToLowerTest( + "coerce_double_small_sci", + value=0.000001, + expected="1e-06", + msg="$toLower should use scientific notation for small double", + ), + ToLowerTest( + "coerce_double_at_threshold", + value=0.0001, + expected="0.0001", + msg="$toLower should use decimal notation at small threshold", + ), + ToLowerTest( + "coerce_double_below_threshold", + value=0.00009, + expected="9e-05", + msg="$toLower should use scientific notation below small threshold", + ), + # Special float values. + ToLowerTest( + "coerce_double_nan", + value=FLOAT_NAN, + expected="nan", + msg="$toLower should coerce NaN to lowercase nan", + ), + ToLowerTest( + "coerce_double_inf", + value=FLOAT_INFINITY, + expected="inf", + msg="$toLower should coerce Infinity to lowercase inf", + ), + ToLowerTest( + "coerce_double_neg_inf", + value=FLOAT_NEGATIVE_INFINITY, + expected="-inf", + msg="$toLower should coerce -Infinity to lowercase -inf", + ), + # Decimal128 preserves full precision and trailing zeros. + ToLowerTest( + "coerce_decimal128", + value=DECIMAL128_ONE_AND_HALF, + expected="1.5", + msg="$toLower should coerce Decimal128 to string", + ), + ToLowerTest( + "coerce_decimal128_trailing", + value=Decimal128("1.500"), + expected="1.500", + msg="$toLower should preserve Decimal128 trailing zeros", + ), + ToLowerTest( + "coerce_decimal128_zero", + value=DECIMAL128_ZERO, + expected="0", + msg="$toLower should coerce Decimal128 zero to string", + ), + ToLowerTest( + "coerce_decimal128_neg_zero", + value=DECIMAL128_NEGATIVE_ZERO, + expected="-0", + msg="$toLower should preserve Decimal128 negative zero", + ), + ToLowerTest( + "coerce_decimal128_negative", + value=Decimal128("-42.5"), + expected="-42.5", + msg="$toLower should coerce negative Decimal128 to string", + ), + ToLowerTest( + "coerce_decimal128_full_precision", + value=Decimal128("12345678901234567890.1234567890"), + expected="12345678901234567890.1234567890", + msg="$toLower should preserve Decimal128 full precision", + ), + # Decimal128 special values. + ToLowerTest( + "coerce_decimal128_nan", + value=DECIMAL128_NAN, + expected="nan", + msg="$toLower should coerce Decimal128 NaN to lowercase nan", + ), + ToLowerTest( + "coerce_decimal128_inf", + value=DECIMAL128_INFINITY, + expected="infinity", + msg="$toLower should coerce Decimal128 Infinity to lowercase infinity", + ), + ToLowerTest( + "coerce_decimal128_neg_inf", + value=DECIMAL128_NEGATIVE_INFINITY, + expected="-infinity", + msg="$toLower should coerce Decimal128 -Infinity to lowercase -infinity", + ), + # Datetime coerced to ISO 8601 with lowercase t and z. + ToLowerTest( + "coerce_datetime", + value=datetime(2024, 6, 15, 12, 30, 45, tzinfo=timezone.utc), + expected="2024-06-15t12:30:45.000z", + msg="$toLower should coerce datetime to ISO 8601 with lowercase t and z", + ), + ToLowerTest( + "coerce_datetime_epoch", + value=datetime(1970, 1, 1, tzinfo=timezone.utc), + expected="1970-01-01t00:00:00.000z", + msg="$toLower should coerce epoch datetime to ISO 8601", + ), + ToLowerTest( + "coerce_datetime_millis", + value=datetime(2024, 6, 15, 12, 30, 45, 123_000, tzinfo=timezone.utc), + expected="2024-06-15t12:30:45.123z", + msg="$toLower should coerce datetime with milliseconds", + ), + # Timestamp coerced to custom format with month abbreviation lowercased. + ToLowerTest( + "coerce_timestamp_single_digit_day", + value=Timestamp(1704067200, 1), + expected="jan 1 00:00:00:1", + msg="$toLower should coerce Timestamp with single-digit day", + ), + ToLowerTest( + "coerce_timestamp_double_digit_day", + value=Timestamp(1721500800, 1), + expected="jul 20 18:40:00:1", + msg="$toLower should coerce Timestamp with double-digit day", + ), + ToLowerTest( + "coerce_timestamp_increment", + value=Timestamp(1704067200, 42), + expected="jan 1 00:00:00:42", + msg="$toLower should coerce Timestamp preserving increment", + ), + # Code (without scope) coerced to its code string before lowercasing. + ToLowerTest( + "coerce_code", + value=Code("function() { return HELLO; }"), + expected="function() { return hello; }", + msg="$toLower should coerce Code and lowercase its content", + ), + # Integer boundary values. + ToLowerTest( + "coerce_int32_max", + value=INT32_MAX, + expected="2147483647", + msg="$toLower should coerce INT32_MAX to string", + ), + ToLowerTest( + "coerce_int32_min", + value=INT32_MIN, + expected="-2147483648", + msg="$toLower should coerce INT32_MIN to string", + ), + # Float boundary values. + ToLowerTest( + "coerce_subnormal_double", + value=DOUBLE_MIN_SUBNORMAL, + expected="4.94066e-324", + msg="$toLower should coerce subnormal double to string", + ), + ToLowerTest( + "coerce_max_double", + value=DOUBLE_MAX, + expected="1.79769e+308", + msg="$toLower should coerce near-max double to string", + ), + # Decimal128 boundary values. + ToLowerTest( + "coerce_decimal128_max_precision", + value=Decimal128("1234567890123456789012345678901234"), + expected="1234567890123456789012345678901234", + msg="$toLower should preserve Decimal128 34-digit precision", + ), + ToLowerTest( + "coerce_decimal128_extreme_exponent", + value=DECIMAL128_LARGE_EXPONENT, + expected="1.000000000000000000000000000000000e+6144", + msg="$toLower should lowercase Decimal128 large exponent notation", + ), + ToLowerTest( + "coerce_decimal128_tiny_exponent", + value=DECIMAL128_MIN_POSITIVE, + expected="1e-6176", + msg="$toLower should lowercase Decimal128 tiny exponent notation", + ), + ToLowerTest( + "coerce_decimal128_max_negative", + value=DECIMAL128_MAX_NEGATIVE, + expected="-1e-6176", + msg="$toLower should lowercase Decimal128 maximum negative value", + ), + # Datetime boundary values. + ToLowerTest( + "coerce_datetime_pre_epoch", + value=datetime(1969, 12, 31, 23, 59, 59, tzinfo=timezone.utc), + expected="1969-12-31t23:59:59.000z", + msg="$toLower should coerce pre-epoch datetime to ISO 8601", + ), + ToLowerTest( + "coerce_datetime_far_future", + value=datetime(9999, 12, 31, 23, 59, 59, tzinfo=timezone.utc), + expected="9999-12-31t23:59:59.000z", + msg="$toLower should coerce far-future datetime to ISO 8601", + ), + # Timestamp boundary values. + ToLowerTest( + "coerce_timestamp_zero", + value=Timestamp(0, 0), + expected="jan 1 00:00:00:0", + msg="$toLower should coerce zero Timestamp to epoch", + ), + ToLowerTest( + "coerce_timestamp_max_increment", + value=Timestamp(0, 2**32 - 1), + expected="jan 1 00:00:00:4294967295", + msg="$toLower should coerce Timestamp with max increment", + ), + ToLowerTest( + "coerce_timestamp_max_time", + value=Timestamp(2**32 - 1, 1), + expected="feb 7 06:28:15:1", + msg="$toLower should coerce Timestamp with max time value", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_COERCION_TESTS)) +def test_tolower_coercion(collection, test_case: ToLowerTest): + """Test $toLower type coercion behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_invariants.py new file mode 100644 index 00000000..4f606aac --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_invariants.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF + +from .utils.toLower_common import ( + ToLowerTest, +) + +# Property [Idempotency]: applying $toLower to an already-lowercased result produces the same +# result. +TOLOWER_IDEMPOTENCY_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "idempotency_upper", + value="HELLO WORLD", + expected="hello world", + msg="$toLower applied twice should equal single application on uppercase", + ), + ToLowerTest( + "idempotency_lowercase", + value="hello world", + expected="hello world", + msg="$toLower applied twice should equal single application on lowercase", + ), + ToLowerTest( + "idempotency_mixed", + value="HeLLo WoRLd", + expected="hello world", + msg="$toLower applied twice should equal single application on mixed case", + ), + ToLowerTest( + "idempotency_nonascii", + value="RÉSUMÉ", + expected="rÉsumÉ", + msg="$toLower applied twice should equal single application on non-ASCII", + ), + ToLowerTest( + "idempotency_empty", + value="", + expected="", + msg="$toLower applied twice should equal single application on empty string", + ), + ToLowerTest( + "idempotency_digits", + value="12345", + expected="12345", + msg="$toLower applied twice should equal single application on digits", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_IDEMPOTENCY_TESTS)) +def test_tolower_idempotency(collection, test_case: ToLowerTest): + """Test $toLower idempotency.""" + once = {"$toLower": test_case.value} + twice = {"$toLower": once} + result = execute_project(collection, {"once": once, "twice": twice}) + assertSuccess( + result, [{"once": test_case.expected, "twice": test_case.expected}], msg=test_case.msg + ) + + +# Property [Return Type]: the result is always a string when the expression succeeds. +TOLOWER_RETURN_TYPE_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "return_type_string", + value="HELLO", + msg="$toLower should return type string for string input", + ), + ToLowerTest( + "return_type_int32", value=42, msg="$toLower should return type string for coerced int32" + ), + ToLowerTest( + "return_type_double", + value=3.14, + msg="$toLower should return type string for coerced double", + ), + ToLowerTest( + "return_type_decimal128", + value=DECIMAL128_ONE_AND_HALF, + msg="$toLower should return type string for coerced Decimal128", + ), + ToLowerTest( + "return_type_expression", + value={"$concat": ["A", "B"]}, + msg="$toLower should return type string for expression input", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_RETURN_TYPE_TESTS)) +def test_tolower_return_type(collection, test_case: ToLowerTest): + """Test $toLower result is always type string.""" + result = execute_expression(collection, {"$type": {"$toLower": test_case.value}}) + assertSuccess(result, [{"result": "string"}], msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_null.py new file mode 100644 index 00000000..a8de6752 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_null.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import MISSING + +from .utils.toLower_common import ( + ToLowerTest, + _expr, +) + +# Property [Null and Missing Behavior]: null, missing, undefined, and expressions evaluating to +# null all produce an empty string. +TOLOWER_NULL_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "null_value", value=None, expected="", msg="$toLower should return empty string for null" + ), + ToLowerTest( + "missing_value", + value=MISSING, + expected="", + msg="$toLower should return empty string for missing field", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_NULL_TESTS)) +def test_tolower_null(collection, test_case: ToLowerTest): + """Test $toLower null and missing behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_size_limit.py new file mode 100644 index 00000000..37702891 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_size_limit.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES + +from .utils.toLower_common import ( + ToLowerTest, + _expr, +) + +# Property [String Size Limit Success]: input strings just under the size limit are accepted. +TOLOWER_SIZE_LIMIT_SUCCESS_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "size_one_under", + value="A" * (STRING_SIZE_LIMIT_BYTES - 1), + expected="a" * (STRING_SIZE_LIMIT_BYTES - 1), + msg="$toLower should accept input string one byte under the 16 MB limit", + ), +] + +# Property [String Size Limit]: input strings at or above the size limit produce an error. +TOLOWER_SIZE_LIMIT_ERROR_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "size_at_limit", + value="a" * STRING_SIZE_LIMIT_BYTES, + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$toLower should reject input string at the 16 MB byte limit", + ), +] + +TOLOWER_SIZE_LIMIT_TESTS = TOLOWER_SIZE_LIMIT_SUCCESS_TESTS + TOLOWER_SIZE_LIMIT_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_SIZE_LIMIT_TESTS)) +def test_tolower_size_limit(collection, test_case: ToLowerTest): + """Test $toLower string size limit behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_type_errors.py new file mode 100644 index 00000000..3e7afff7 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/test_toLower_type_errors.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import BSON_TO_STRING_CONVERSION_ERROR +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.toLower_common import ( + ToLowerTest, + _expr, +) + +# Property [Type Strictness]: non-coercible BSON types produce an error. +TOLOWER_TYPE_ERROR_TESTS: list[ToLowerTest] = [ + ToLowerTest( + "type_bool_true", + value=True, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject boolean true", + ), + ToLowerTest( + "type_bool_false", + value=False, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject boolean false", + ), + ToLowerTest( + "type_object", + value={"a": 1}, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject object", + ), + ToLowerTest( + "type_objectid", + value=ObjectId("000000000000000000000000"), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject ObjectId", + ), + ToLowerTest( + "type_binary", + value=Binary(b"\x00"), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject Binary", + ), + ToLowerTest( + "type_binary_uuid", + value=Binary(b"\x00" * 16, 4), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject Binary UUID", + ), + ToLowerTest( + "type_regex", + value=Regex("abc"), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject Regex", + ), + ToLowerTest( + "type_code_with_scope", + value=Code("x", {}), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject Code with scope", + ), + ToLowerTest( + "type_minkey", + value=MinKey(), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject MinKey", + ), + ToLowerTest( + "type_maxkey", + value=MaxKey(), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject MaxKey", + ), + ToLowerTest( + "type_nested_array", + value=[[1]], + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject nested array", + ), + # Expression that evaluates to an array at runtime. + ToLowerTest( + "type_runtime_array", + value={"$literal": [1, 2]}, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toLower should reject array from expression", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOLOWER_TYPE_ERROR_TESTS)) +def test_tolower_type_errors(collection, test_case: ToLowerTest): + """Test $toLower type strictness.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/utils/toLower_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/utils/toLower_common.py new file mode 100644 index 00000000..a253157a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toLower/utils/toLower_common.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class ToLowerTest(BaseTestCase): + """Test case for $toLower operator.""" + + value: Any = None + + +def _expr(test_case: ToLowerTest) -> dict[str, Any]: + return {"$toLower": test_case.value} diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_arguments.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_arguments.py new file mode 100644 index 00000000..6d6de531 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_arguments.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, + execute_project_with_insert, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.error_codes import ( + BSON_TO_STRING_CONVERSION_ERROR, + EXPRESSION_TYPE_MISMATCH_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.toUpper_common import ( + ToUpperTest, + _expr, +) + +# Property [Expression Arguments]: the operator accepts expressions, $literal, and operator +# composition as arguments. +TOUPPER_EXPR_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "expr_operator_composition", + value={"$concat": ["hello", " world"]}, + expected="HELLO WORLD", + msg="$toUpper should uppercase result of $concat expression", + ), + ToUpperTest( + "expr_literal_dollar", + value={"$literal": "$hello"}, + expected="$HELLO", + msg="$toUpper should uppercase $literal string preserving dollar sign", + ), + ToUpperTest( + "expr_coercible_result", + value={"$add": [1, 2]}, + expected="3", + msg="$toUpper should coerce numeric expression result to string", + ), +] + +# Property [Arity Success]: a single-element array is unwrapped and processed as the argument. +TOUPPER_ARITY_SUCCESS_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "arity_single_string", + value=["hello"], + expected="HELLO", + msg="$toUpper should unwrap single-element string array", + ), + ToUpperTest( + "arity_single_null", + value=[None], + expected="", + msg="$toUpper should unwrap single-element null array to empty string", + ), + ToUpperTest( + "arity_single_coercible", + value=[42], + expected="42", + msg="$toUpper should unwrap single-element coercible array", + ), +] + +# Property [Arity Errors]: an empty array or an array with more than one element produces an +# error; a single-element array containing a non-coercible type produces a type error. +TOUPPER_ARITY_ERROR_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "arity_empty_array", + value=[], + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$toUpper should reject empty array", + ), + ToUpperTest( + "arity_two_elements", + value=["a", "b"], + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$toUpper should reject two-element array", + ), + ToUpperTest( + "arity_single_non_coercible", + value=[True], + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject single-element array with non-coercible type", + ), +] + +# Property [Field Path Errors]: bare '$' and '$$' are rejected as invalid field paths. +TOUPPER_FIELD_PATH_ERROR_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "fieldpath_bare_dollar", + value="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$toUpper should reject bare '$' as invalid field path", + ), + ToUpperTest( + "fieldpath_double_dollar", + value="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$toUpper should reject '$$' as empty variable name", + ), +] + +TOUPPER_ARGUMENT_TESTS = ( + TOUPPER_EXPR_TESTS + + TOUPPER_ARITY_SUCCESS_TESTS + + TOUPPER_ARITY_ERROR_TESTS + + TOUPPER_FIELD_PATH_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_ARGUMENT_TESTS)) +def test_toupper_arguments(collection, test_case: ToUpperTest): + """Test $toUpper argument handling.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) + + +# Property [Document Field References]: field references and nested field references are resolved +# before conversion. +def test_toupper_document_fields(collection): + """Test $toUpper reads values from document fields.""" + result = execute_project_with_insert( + collection, + {"val": "hello"}, + {"result": {"$toUpper": "$val"}}, + ) + assertSuccess( + result, + [{"result": "HELLO"}], + msg="$toUpper should read values from document fields", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_characters.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_characters.py new file mode 100644 index 00000000..40304c18 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_characters.py @@ -0,0 +1,426 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.toUpper_common import ( + ToUpperTest, + _expr, +) + +# Property [Core Conversion Behavior]: only ASCII lowercase letters are converted to uppercase; +# all other characters pass through unchanged. +TOUPPER_CORE_TESTS: list[ToUpperTest] = [ + # ASCII lowercase to uppercase. + ToUpperTest( + "core_ascii_lowercase", + value="hello", + expected="HELLO", + msg="$toUpper should convert ASCII lowercase to uppercase", + ), + ToUpperTest( + "core_all_lowercase", + value="abcdefghijklmnopqrstuvwxyz", + expected="ABCDEFGHIJKLMNOPQRSTUVWXYZ", + msg="$toUpper should convert all ASCII lowercase letters", + ), + # ASCII uppercase unchanged. + ToUpperTest( + "core_ascii_uppercase", + value="HELLO", + expected="HELLO", + msg="$toUpper should leave ASCII uppercase unchanged", + ), + ToUpperTest( + "core_all_uppercase", + value="ABCDEFGHIJKLMNOPQRSTUVWXYZ", + expected="ABCDEFGHIJKLMNOPQRSTUVWXYZ", + msg="$toUpper should leave all ASCII uppercase letters unchanged", + ), + # Mixed case. + ToUpperTest( + "core_mixed_case", + value="HeLLo WoRLd", + expected="HELLO WORLD", + msg="$toUpper should convert mixed case to uppercase", + ), + # Digits unchanged. + ToUpperTest( + "core_digits", + value="abc123xyz", + expected="ABC123XYZ", + msg="$toUpper should leave digits unchanged", + ), + # Punctuation unchanged. + ToUpperTest( + "core_punctuation", + value="hello, world!", + expected="HELLO, WORLD!", + msg="$toUpper should leave punctuation unchanged", + ), + ToUpperTest( + "core_punctuation_only", + value="!@#$%", + expected="!@#$%", + msg="$toUpper should return punctuation-only string unchanged", + ), + # Whitespace unchanged. + ToUpperTest( + "core_whitespace_space", + value="hello world", + expected="HELLO WORLD", + msg="$toUpper should preserve spaces", + ), + ToUpperTest( + "core_whitespace_tab", + value="hello\tworld", + expected="HELLO\tWORLD", + msg="$toUpper should preserve tabs", + ), + ToUpperTest( + "core_whitespace_newline", + value="hello\nworld", + expected="HELLO\nWORLD", + msg="$toUpper should preserve newlines", + ), + # Unicode whitespace unchanged (non-breaking space U+00A0, en space U+2002, em space U+2003). + ToUpperTest( + "core_unicode_nbsp", + value="a\u00a0b", + expected="A\u00a0B", + msg="$toUpper should preserve non-breaking space", + ), + ToUpperTest( + "core_unicode_en_space", + value="a\u2002b", + expected="A\u2002B", + msg="$toUpper should preserve en space", + ), + ToUpperTest( + "core_unicode_em_space", + value="a\u2003b", + expected="A\u2003B", + msg="$toUpper should preserve em space", + ), + # Control characters unchanged (U+0000 null, U+001F unit separator). + ToUpperTest( + "core_control_null", + value="a\x00b", + expected="A\x00B", + msg="$toUpper should preserve null byte", + ), + ToUpperTest( + "core_control_unit_sep", + value="a\x1fb", + expected="A\x1fB", + msg="$toUpper should preserve unit separator", + ), + # Non-ASCII Unicode letters not converted. + ToUpperTest( + "core_latin_extended", + value="café", + expected="CAFé", + msg="$toUpper should not convert non-ASCII Latin characters", + ), + ToUpperTest( + "core_latin_diaeresis", + value="naïve", + expected="NAïVE", + msg="$toUpper should not convert non-ASCII diaeresis characters", + ), + ToUpperTest( + "core_greek_lowercase", + value="αβγ", + expected="αβγ", + msg="$toUpper should not convert Greek lowercase letters", + ), + ToUpperTest( + "core_cyrillic_lowercase", + value="привет", + expected="привет", + msg="$toUpper should not convert Cyrillic lowercase letters", + ), + # Greek final sigma (ς, U+03C2) not converted. + ToUpperTest( + "core_greek_final_sigma", + value="ς", + expected="ς", + msg="$toUpper should not convert Greek final sigma", + ), + # Turkish dotless i (ı, U+0131) not converted. + ToUpperTest( + "core_turkish_dotless_i", + value="ı", + expected="ı", + msg="$toUpper should not convert Turkish dotless i", + ), + # Titlecase characters not converted (Dz U+01F2, dz U+01F3). + ToUpperTest( + "core_titlecase_dz", value="Dz", expected="Dz", msg="$toUpper should not convert titlecase dz" + ), + ToUpperTest( + "core_titlecase_dz_lower", + value="dz", + expected="dz", + msg="$toUpper should not convert lowercase dz digraph", + ), + # Cherokee lowercase not converted (Ꭰ U+13A0 is uppercase, ꭰ U+AB70 is lowercase). + ToUpperTest( + "core_cherokee_lowercase", + value="ꭰ", + expected="ꭰ", + msg="$toUpper should not convert Cherokee lowercase", + ), + # Enclosed alphanumerics not converted (ⓐ U+24D0). + ToUpperTest( + "core_enclosed_alpha", + value="ⓐ", + expected="ⓐ", + msg="$toUpper should not convert enclosed alphanumerics", + ), + # Fullwidth ASCII not converted (a U+FF41). + ToUpperTest( + "core_fullwidth_a", + value="a", + expected="a", + msg="$toUpper should not convert fullwidth ASCII", + ), + # Roman numeral letter forms not converted (ⅰ U+2170). + ToUpperTest( + "core_roman_numeral", + value="ⅰ", + expected="ⅰ", + msg="$toUpper should not convert Roman numeral letter forms", + ), + # CJK unchanged. + ToUpperTest( + "core_cjk", + value="日本語", + expected="日本語", + msg="$toUpper should leave CJK characters unchanged", + ), + # Emoji unchanged. + ToUpperTest( + "core_emoji", value="🎉🚀", expected="🎉🚀", msg="$toUpper should leave emoji unchanged" + ), + # Special Unicode case mappings not applied (ß does not become SS). + ToUpperTest( + "core_eszett", + value="ß", + expected="ß", + msg="$toUpper should not apply special case mapping for eszett", + ), + # fi ligature (fi U+FB01) not converted. + ToUpperTest( + "core_fi_ligature", value="fi", expected="fi", msg="$toUpper should not convert fi ligature" + ), + # Mixed ASCII and non-ASCII: only ASCII converted. + ToUpperTest( + "core_mixed_ascii_nonascii", + value="résumé", + expected="RéSUMé", + msg="$toUpper should only convert ASCII letters in mixed string", + ), + ToUpperTest( + "core_mixed_ascii_cjk", + value="hello世界", + expected="HELLO世界", + msg="$toUpper should only convert ASCII letters mixed with CJK", + ), + # Zero-width characters preserved. + # BOM (U+FEFF). + ToUpperTest( + "core_bom", + value="a\ufeffb", + expected="A\ufeffB", + msg="$toUpper should preserve BOM character", + ), + # Zero-width space (U+200B). + ToUpperTest( + "core_zwsp", + value="a\u200bb", + expected="A\u200bB", + msg="$toUpper should preserve zero-width space", + ), + # Zero-width joiner (U+200D). + ToUpperTest( + "core_zwj", + value="a\u200db", + expected="A\u200dB", + msg="$toUpper should preserve zero-width joiner", + ), + # Left-to-right mark (U+200E). + ToUpperTest( + "core_lrm", + value="a\u200eb", + expected="A\u200eB", + msg="$toUpper should preserve left-to-right mark", + ), + # Right-to-left mark (U+200F). + ToUpperTest( + "core_rlm", + value="a\u200fb", + expected="A\u200fB", + msg="$toUpper should preserve right-to-left mark", + ), + # ZWJ emoji sequence preserved intact (man technologist: U+1F468 U+200D U+1F4BB). + ToUpperTest( + "core_zwj_emoji", + value="\U0001f468\u200d\U0001f4bb", + expected="\U0001f468\u200d\U0001f4bb", + msg="$toUpper should preserve ZWJ emoji sequence", + ), + # ASCII boundary characters unchanged. + # Backtick (U+0060) is immediately before 'a' (U+0061). + ToUpperTest( + "core_backtick", value="`", expected="`", msg="$toUpper should leave backtick unchanged" + ), + # Open brace (U+007B) is immediately after 'z' (U+007A). + ToUpperTest( + "core_open_brace", value="{", expected="{", msg="$toUpper should leave open brace unchanged" + ), + # At sign (U+0040) is immediately before 'A' (U+0041). + ToUpperTest( + "core_at_sign", value="@", expected="@", msg="$toUpper should leave at sign unchanged" + ), + # Open bracket (U+005B) is immediately after 'Z' (U+005A). + ToUpperTest( + "core_open_bracket", + value="[", + expected="[", + msg="$toUpper should leave open bracket unchanged", + ), + # All boundary characters mixed with letters. + ToUpperTest( + "core_boundary_mix", + value="`a{z@A[Z", + expected="`A{Z@A[Z", + msg="$toUpper should only convert letters among ASCII boundary characters", + ), + # Multi-byte UTF-8 characters: 2-byte (é U+00E9), 3-byte (日 U+65E5), 4-byte (U+1F389). + ToUpperTest( + "core_multibyte", + value="\u00e9\u65e5\U0001f389", + expected="\u00e9\u65e5\U0001f389", + msg="$toUpper should leave multi-byte UTF-8 characters unchanged", + ), + # Only digits unchanged. + ToUpperTest( + "core_only_digits", + value="1234567890", + expected="1234567890", + msg="$toUpper should leave digit-only string unchanged", + ), + # Only whitespace unchanged. + ToUpperTest( + "core_only_whitespace", + value=" \t\n ", + expected=" \t\n ", + msg="$toUpper should leave whitespace-only string unchanged", + ), + # Mixed alphanumeric: only ASCII letters uppercased. + ToUpperTest( + "core_mixed_alphanumeric", + value="abc123def", + expected="ABC123DEF", + msg="$toUpper should uppercase only letters in alphanumeric string", + ), + # Leading and trailing whitespace preserved. + ToUpperTest( + "core_leading_trailing_whitespace", + value=" hello ", + expected=" HELLO ", + msg="$toUpper should preserve leading and trailing whitespace", + ), +] + +# Property [Combining Characters]: ASCII base characters are uppercased while combining marks are +# preserved; precomposed characters and non-ASCII bases are not converted. +TOUPPER_COMBINING_TESTS: list[ToUpperTest] = [ + # ASCII base + combining acute accent (U+0301): base uppercased, mark preserved. + ToUpperTest( + "combining_ascii_base_acute", + value="e\u0301", + expected="E\u0301", + msg="$toUpper should uppercase ASCII base with combining acute", + ), + # ASCII base + combining diaeresis (U+0308): base uppercased, mark preserved. + ToUpperTest( + "combining_ascii_base_diaeresis", + value="a\u0308", + expected="A\u0308", + msg="$toUpper should uppercase ASCII base with combining diaeresis", + ), + # Precomposed é (U+00E9) is not converted. + ToUpperTest( + "combining_precomposed_e_acute", + value="\u00e9", + expected="\u00e9", + msg="$toUpper should not convert precomposed e-acute", + ), + # Precomposed ä (U+00E4) is not converted. + ToUpperTest( + "combining_precomposed_a_umlaut", + value="\u00e4", + expected="\u00e4", + msg="$toUpper should not convert precomposed a-umlaut", + ), + # Combining mark alone (U+0301) is preserved. + ToUpperTest( + "combining_mark_alone", + value="\u0301", + expected="\u0301", + msg="$toUpper should preserve combining mark alone", + ), + # Non-ASCII base (α U+03B1) + combining mark: not converted. + ToUpperTest( + "combining_nonascii_base", + value="\u03b1\u0301", + expected="\u03b1\u0301", + msg="$toUpper should not convert non-ASCII base with combining mark", + ), +] + +# Property [Identity]: empty strings and already-uppercase strings return unchanged. +TOUPPER_IDENTITY_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "identity_empty", value="", expected="", msg="$toUpper should return empty string unchanged" + ), + ToUpperTest( + "identity_uppercase", + value="HELLO", + expected="HELLO", + msg="$toUpper should return uppercase string unchanged", + ), + ToUpperTest( + "identity_uppercase_with_space", + value="HELLO WORLD", + expected="HELLO WORLD", + msg="$toUpper should return uppercase string with spaces unchanged", + ), + ToUpperTest( + "identity_uppercase_with_digits", + value="ABC123", + expected="ABC123", + msg="$toUpper should return uppercase string with digits unchanged", + ), +] + +TOUPPER_CHARACTER_TESTS = TOUPPER_CORE_TESTS + TOUPPER_COMBINING_TESTS + TOUPPER_IDENTITY_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_CHARACTER_TESTS)) +def test_toupper_characters(collection, test_case: ToUpperTest): + """Test $toUpper character conversion behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_coercion.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_coercion.py new file mode 100644 index 00000000..3e9ef844 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_coercion.py @@ -0,0 +1,310 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Code, Decimal128, Int64, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_LARGE_EXPONENT, + DECIMAL128_MAX_NEGATIVE, + DECIMAL128_MIN_POSITIVE, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DOUBLE_MAX, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEGATIVE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT32_MAX, + INT32_MIN, + INT64_MAX, + INT64_MIN, +) + +from .utils.toUpper_common import ( + ToUpperTest, + _expr, +) + +# Property [Type Coercion]: numeric, datetime, timestamp, Code, and Symbol values are coerced to +# their string representation before uppercasing. +TOUPPER_COERCION_TESTS: list[ToUpperTest] = [ + # int32. + ToUpperTest( + "coerce_int32", value=42, expected="42", msg="$toUpper should coerce int32 to string" + ), + ToUpperTest( + "coerce_int32_neg", + value=-42, + expected="-42", + msg="$toUpper should coerce negative int32 to string", + ), + # int64. + ToUpperTest( + "coerce_int64", value=Int64(42), expected="42", msg="$toUpper should coerce int64 to string" + ), + ToUpperTest( + "coerce_int64_neg", + value=Int64(-42), + expected="-42", + msg="$toUpper should coerce negative int64 to string", + ), + # double: whole numbers drop trailing .0. + ToUpperTest( + "coerce_double_whole", + value=3.0, + expected="3", + msg="$toUpper should coerce whole double without trailing .0", + ), + ToUpperTest( + "coerce_double_fractional", + value=3.14, + expected="3.14", + msg="$toUpper should coerce fractional double to string", + ), + # Negative zero preserved. + ToUpperTest( + "coerce_double_neg_zero", + value=DOUBLE_NEGATIVE_ZERO, + expected="-0", + msg="$toUpper should preserve negative zero in coerced double", + ), + # Scientific notation uses uppercase E. + ToUpperTest( + "coerce_double_sci_large", + value=1e20, + expected="1E+20", + msg="$toUpper should use uppercase E in scientific notation for large double", + ), + ToUpperTest( + "coerce_double_sci_small", + value=0.000001, + expected="1E-06", + msg="$toUpper should use uppercase E in scientific notation for small double", + ), + # Double precision limited to approximately 6 significant digits. + ToUpperTest( + "coerce_double_six_digits", + value=999999.0, + expected="999999", + msg="$toUpper should preserve 6 significant digits for double", + ), + ToUpperTest( + "coerce_double_seven_digits", + value=1_000_000.0, + expected="1E+06", + msg="$toUpper should use scientific notation for double with 7 digits", + ), + # Special float values. + ToUpperTest( + "coerce_double_nan", + value=FLOAT_NAN, + expected="NAN", + msg="$toUpper should coerce NaN to uppercase NAN", + ), + ToUpperTest( + "coerce_double_inf", + value=FLOAT_INFINITY, + expected="INF", + msg="$toUpper should coerce Infinity to uppercase INF", + ), + ToUpperTest( + "coerce_double_neg_inf", + value=FLOAT_NEGATIVE_INFINITY, + expected="-INF", + msg="$toUpper should coerce -Infinity to uppercase -INF", + ), + # Decimal128 preserves full precision and trailing zeros. + ToUpperTest( + "coerce_decimal", + value=Decimal128("3.14"), + expected="3.14", + msg="$toUpper should coerce Decimal128 to string", + ), + ToUpperTest( + "coerce_decimal_trailing", + value=Decimal128("3.1400"), + expected="3.1400", + msg="$toUpper should preserve Decimal128 trailing zeros", + ), + ToUpperTest( + "coerce_decimal_neg_zero", + value=DECIMAL128_NEGATIVE_ZERO, + expected="-0", + msg="$toUpper should preserve Decimal128 negative zero", + ), + # Decimal128 special values. + ToUpperTest( + "coerce_decimal_nan", + value=DECIMAL128_NAN, + expected="NAN", + msg="$toUpper should coerce Decimal128 NaN to uppercase NAN", + ), + ToUpperTest( + "coerce_decimal_inf", + value=DECIMAL128_INFINITY, + expected="INFINITY", + msg="$toUpper should coerce Decimal128 Infinity to uppercase INFINITY", + ), + ToUpperTest( + "coerce_decimal_neg_inf", + value=DECIMAL128_NEGATIVE_INFINITY, + expected="-INFINITY", + msg="$toUpper should coerce Decimal128 -Infinity to uppercase -INFINITY", + ), + # Datetime coerced to ISO 8601 format. + ToUpperTest( + "coerce_datetime", + value=datetime(2024, 6, 15, 12, 30, 45, tzinfo=timezone.utc), + expected="2024-06-15T12:30:45.000Z", + msg="$toUpper should coerce datetime to ISO 8601 string", + ), + # Timestamp coerced to custom format with uppercased month abbreviation. + ToUpperTest( + "coerce_timestamp", + value=Timestamp(1, 1), + expected="JAN 1 00:00:01:1", + msg="$toUpper should coerce Timestamp to uppercased month format", + ), + ToUpperTest( + "coerce_timestamp_feb", + value=Timestamp(1707000000, 1), + expected="FEB 3 22:40:00:1", + msg="$toUpper should coerce Timestamp with February date", + ), + # JavaScript Code coerced to its code string before uppercasing. + ToUpperTest( + "coerce_code", + value=Code("hello"), + expected="HELLO", + msg="$toUpper should coerce Code to uppercased string", + ), + ToUpperTest( + "coerce_code_function", + value=Code("function() { return x; }"), + expected="FUNCTION() { RETURN X; }", + msg="$toUpper should uppercase Code function body", + ), + # Integer boundary values. + ToUpperTest( + "coerce_max_int32", + value=INT32_MAX, + expected="2147483647", + msg="$toUpper should coerce INT32_MAX to string", + ), + ToUpperTest( + "coerce_min_int32", + value=INT32_MIN, + expected="-2147483648", + msg="$toUpper should coerce INT32_MIN to string", + ), + ToUpperTest( + "coerce_max_int64", + value=INT64_MAX, + expected="9223372036854775807", + msg="$toUpper should coerce INT64_MAX to string", + ), + ToUpperTest( + "coerce_min_int64", + value=INT64_MIN, + expected="-9223372036854775808", + msg="$toUpper should coerce INT64_MIN to string", + ), + # Float boundary values. + ToUpperTest( + "coerce_subnormal", + value=DOUBLE_MIN_SUBNORMAL, + expected="4.94066E-324", + msg="$toUpper should coerce subnormal double to string", + ), + ToUpperTest( + "coerce_max_double", + value=DOUBLE_MAX, + expected="1.79769E+308", + msg="$toUpper should coerce near-max double to string", + ), + # Decimal128 boundary values. + ToUpperTest( + "coerce_decimal_max_precision", + value=Decimal128("1234567890123456789012345678901234"), + expected="1234567890123456789012345678901234", + msg="$toUpper should preserve Decimal128 34-digit precision", + ), + ToUpperTest( + "coerce_decimal_large_exp", + value=DECIMAL128_LARGE_EXPONENT, + expected="1.000000000000000000000000000000000E+6144", + msg="$toUpper should preserve Decimal128 large exponent", + ), + ToUpperTest( + "coerce_decimal_small_exp", + value=DECIMAL128_MIN_POSITIVE, + expected="1E-6176", + msg="$toUpper should preserve Decimal128 small exponent", + ), + ToUpperTest( + "coerce_decimal_max_negative", + value=DECIMAL128_MAX_NEGATIVE, + expected="-1E-6176", + msg="$toUpper should preserve Decimal128 maximum negative value", + ), + # Datetime boundary values. + ToUpperTest( + "coerce_datetime_epoch", + value=datetime(1970, 1, 1, tzinfo=timezone.utc), + expected="1970-01-01T00:00:00.000Z", + msg="$toUpper should coerce epoch datetime to ISO 8601", + ), + ToUpperTest( + "coerce_datetime_pre_epoch", + value=datetime(1969, 12, 31, 23, 59, 59, tzinfo=timezone.utc), + expected="1969-12-31T23:59:59.000Z", + msg="$toUpper should coerce pre-epoch datetime to ISO 8601", + ), + ToUpperTest( + "coerce_datetime_far_future", + value=datetime(9999, 12, 31, 23, 59, 59, tzinfo=timezone.utc), + expected="9999-12-31T23:59:59.000Z", + msg="$toUpper should coerce far-future datetime to ISO 8601", + ), + # Timestamp boundary values. + ToUpperTest( + "coerce_timestamp_max_time", + value=Timestamp(4_294_967_295, 1), + expected="FEB 7 06:28:15:1", + msg="$toUpper should coerce Timestamp with max time value", + ), + ToUpperTest( + "coerce_timestamp_max_inc", + value=Timestamp(1, 4_294_967_295), + expected="JAN 1 00:00:01:4294967295", + msg="$toUpper should coerce Timestamp with max increment value", + ), + ToUpperTest( + "coerce_timestamp_zero", + value=Timestamp(0, 0), + expected="JAN 1 00:00:00:0", + msg="$toUpper should coerce zero Timestamp to epoch", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_COERCION_TESTS)) +def test_toupper_coercion(collection, test_case: ToUpperTest): + """Test $toUpper type coercion behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_invariants.py new file mode 100644 index 00000000..31e2ac1f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_invariants.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import pytest +from bson import Decimal128 + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.toUpper_common import ( + ToUpperTest, +) + +# Property [Idempotency]: applying $toUpper to an already-uppercased result produces the same +# result. +TOUPPER_IDEMPOTENCY_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "idempotency_lowercase", + value="hello", + expected="HELLO", + msg="$toUpper applied twice should equal single application on lowercase", + ), + ToUpperTest( + "idempotency_uppercase", + value="HELLO", + expected="HELLO", + msg="$toUpper applied twice should equal single application on uppercase", + ), + ToUpperTest( + "idempotency_mixed", + value="HeLLo", + expected="HELLO", + msg="$toUpper applied twice should equal single application on mixed case", + ), + ToUpperTest( + "idempotency_nonascii", + value="café", + expected="CAFé", + msg="$toUpper applied twice should equal single application on non-ASCII", + ), + ToUpperTest( + "idempotency_empty", + value="", + expected="", + msg="$toUpper applied twice should equal single application on empty string", + ), + ToUpperTest( + "idempotency_digits", + value="123", + expected="123", + msg="$toUpper applied twice should equal single application on digits", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_IDEMPOTENCY_TESTS)) +def test_toupper_idempotency(collection, test_case: ToUpperTest): + """Test $toUpper idempotency.""" + once = {"$toUpper": test_case.value} + twice = {"$toUpper": once} + result = execute_project(collection, {"once": once, "twice": twice}) + assertSuccess( + result, [{"once": test_case.expected, "twice": test_case.expected}], msg=test_case.msg + ) + + +# Property [Return Type]: the result is always a string when the expression succeeds. +TOUPPER_RETURN_TYPE_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "return_type_lowercase", + value="hello", + msg="$toUpper should return type string for lowercase input", + ), + ToUpperTest( + "return_type_uppercase", + value="HELLO", + msg="$toUpper should return type string for uppercase input", + ), + ToUpperTest( + "return_type_empty", value="", msg="$toUpper should return type string for empty input" + ), + ToUpperTest( + "return_type_coerced_int", + value=42, + msg="$toUpper should return type string for coerced int", + ), + ToUpperTest( + "return_type_coerced_decimal", + value=Decimal128("3.14"), + msg="$toUpper should return type string for coerced Decimal128", + ), + ToUpperTest( + "return_type_unicode", + value="café", + msg="$toUpper should return type string for Unicode input", + ), + ToUpperTest( + "return_type_expression", + value={"$concat": ["a", "b"]}, + msg="$toUpper should return type string for expression input", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_RETURN_TYPE_TESTS)) +def test_toupper_return_type(collection, test_case: ToUpperTest): + """Test $toUpper result is always type string.""" + result = execute_expression(collection, {"$type": {"$toUpper": test_case.value}}) + assertSuccess(result, [{"result": "string"}], msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_null.py new file mode 100644 index 00000000..c2308e7d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_null.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import MISSING + +from .utils.toUpper_common import ( + ToUpperTest, + _expr, +) + +# Property [Null and Missing Behavior]: null, missing, or undefined arguments return empty string. +TOUPPER_NULL_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "null_literal", value=None, expected="", msg="$toUpper should return empty string for null" + ), + ToUpperTest( + "missing_field", + value=MISSING, + expected="", + msg="$toUpper should return empty string for missing field", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_NULL_TESTS)) +def test_toupper_null(collection, test_case: ToUpperTest): + """Test $toUpper null and missing behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_size_limit.py new file mode 100644 index 00000000..f63bfb91 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_size_limit.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES + +from .utils.toUpper_common import ( + ToUpperTest, + _expr, +) + +# Property [String Size Limit Success]: input strings just under the size limit are accepted. +TOUPPER_SIZE_LIMIT_SUCCESS_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "size_one_under", + value="a" * (STRING_SIZE_LIMIT_BYTES - 1), + expected="A" * (STRING_SIZE_LIMIT_BYTES - 1), + msg="$toUpper should accept input string one byte under the 16 MB limit", + ), +] + +# Property [String Size Limit]: input strings at or above the size limit produce an error. +TOUPPER_SIZE_LIMIT_ERROR_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "size_at_limit", + value="a" * STRING_SIZE_LIMIT_BYTES, + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$toUpper should reject input string at the 16 MB byte limit", + ), +] + +TOUPPER_SIZE_LIMIT_TESTS = TOUPPER_SIZE_LIMIT_SUCCESS_TESTS + TOUPPER_SIZE_LIMIT_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_SIZE_LIMIT_TESTS)) +def test_toupper_size_limit(collection, test_case: ToUpperTest): + """Test $toUpper string size limit behavior.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_type_errors.py new file mode 100644 index 00000000..65ef4a12 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/test_toUpper_type_errors.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import BSON_TO_STRING_CONVERSION_ERROR +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.toUpper_common import ( + ToUpperTest, + _expr, +) + +# Property [Type Strictness]: non-coercible types produce an error. +TOUPPER_TYPE_ERROR_TESTS: list[ToUpperTest] = [ + ToUpperTest( + "type_bool_true", + value=True, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject boolean true", + ), + ToUpperTest( + "type_bool_false", + value=False, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject boolean false", + ), + ToUpperTest( + "type_object", + value={"key": "val"}, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject object", + ), + ToUpperTest( + "type_objectid", + value=ObjectId("000000000000000000000000"), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject ObjectId", + ), + ToUpperTest( + "type_binary", + value=Binary(b"\x00"), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject Binary", + ), + ToUpperTest( + "type_binary_uuid", + value=Binary(b"\x00" * 16, 4), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject Binary UUID", + ), + ToUpperTest( + "type_regex", + value=Regex("abc"), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject Regex", + ), + ToUpperTest( + "type_code_with_scope", + value=Code("x", {}), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject Code with scope", + ), + ToUpperTest( + "type_minkey", + value=MinKey(), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject MinKey", + ), + ToUpperTest( + "type_maxkey", + value=MaxKey(), + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject MaxKey", + ), + ToUpperTest( + "type_nested_array", + value=[[1, 2]], + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject nested array", + ), + ToUpperTest( + "type_array_from_expression", + value={"$literal": [1, 2]}, + error_code=BSON_TO_STRING_CONVERSION_ERROR, + msg="$toUpper should reject array from expression", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TOUPPER_TYPE_ERROR_TESTS)) +def test_toupper_type_errors(collection, test_case: ToUpperTest): + """Test $toUpper type strictness.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/utils/toUpper_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/utils/toUpper_common.py new file mode 100644 index 00000000..2738ee46 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/toUpper/utils/toUpper_common.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class ToUpperTest(BaseTestCase): + """Test case for $toUpper operator.""" + + value: Any = None + + +def _expr(test_case: ToUpperTest) -> dict[str, Any]: + return {"$toUpper": test_case.value} diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index c103289b..f9a506b6 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -4,12 +4,16 @@ """ BAD_VALUE_ERROR = 2 +FAILED_TO_PARSE_ERROR = 9 TYPE_MISMATCH_ERROR = 14 INVALID_DATE_STRING_ERROR = 241 +BSON_TO_STRING_CONVERSION_ERROR = 16007 EXPRESSION_TYPE_MISMATCH_ERROR = 16020 +STRING_SIZE_LIMIT_ERROR = 16493 MODULO_ZERO_REMAINDER_ERROR = 16610 MODULO_NON_NUMERIC_ERROR = 16611 MORE_THAN_ONE_DATE_ERROR = 16612 +INVALID_DOLLAR_FIELD_PATH = 16872 ABS_OVERFLOW_ERROR = 28680 LOG_NON_NUMERIC_VALUE_ERROR = 28756 LOG_NON_NUMERIC_BASE_ERROR = 28757 diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py index 5b1852fd..83940527 100644 --- a/documentdb_tests/framework/test_constants.py +++ b/documentdb_tests/framework/test_constants.py @@ -26,6 +26,7 @@ DOUBLE_MIN_SUBNORMAL = 5e-324 DOUBLE_NEAR_MAX = 1e308 DOUBLE_NEAR_MIN = 1e-308 +DOUBLE_MAX = 1.7976931348623157e308 DOUBLE_MAX_SAFE_INTEGER = 9007199254740992 DOUBLE_PRECISION_LOSS = 9007199254740993 @@ -52,6 +53,8 @@ DECIMAL128_MAX = Decimal128("9.999999999999999999999999999999999E+6144") DECIMAL128_LARGE_EXPONENT = Decimal128("1E+6144") DECIMAL128_SMALL_EXPONENT = Decimal128("1E-6143") +DECIMAL128_MIN_POSITIVE = Decimal128("1E-6176") +DECIMAL128_MAX_NEGATIVE = Decimal128("-1E-6176") DECIMAL128_TRAILING_ZERO = Decimal128("1.0") DECIMAL128_MANY_TRAILING_ZEROS = Decimal128("1.00000000000000000000000000000000") DECIMAL128_NAN = Decimal128("nan") @@ -67,6 +70,7 @@ # Other constant values MISSING = "$missing" +STRING_SIZE_LIMIT_BYTES = 16 * 1024 * 1024 # Int32 lists NUMERIC_INT32_NEGATIVE = [INT32_UNDERFLOW, INT32_MIN]