From bf800af9b3392fb733da7524f77d38a5b36f892e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 08:50:22 +0000 Subject: [PATCH 1/2] Initial plan From 55cd8ec0506c684761e5e031e65f7bfa4afb2208 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 09:41:58 +0000 Subject: [PATCH 2/2] Fix strcspn/strspn boundary scans in decode_string_or_url and decode_escape_at; add targeted tests Agent-Logs-Url: https://github.com/WordPress/php-toolkit/sessions/c4d1289b-697f-44ee-9750-fac7d627e1b5 Co-authored-by: sirreal <841763+sirreal@users.noreply.github.com> --- .../DataLiberation/CSS/class-cssprocessor.php | 16 +++---- .../DataLiberation/Tests/CSSProcessorTest.php | 42 +++++++++++++++++++ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/components/DataLiberation/CSS/class-cssprocessor.php b/components/DataLiberation/CSS/class-cssprocessor.php index e7f50adb..adcdd087 100644 --- a/components/DataLiberation/CSS/class-cssprocessor.php +++ b/components/DataLiberation/CSS/class-cssprocessor.php @@ -1564,13 +1564,11 @@ private function decode_string_or_url( int $start, int $length ): string { $end = $start + $length; while ( $at < $end ) { - // Find next special character. - $normal_len = strcspn( $this->css, $special_chars, $at ); + // Find next special character within the token boundary. + $normal_len = strcspn( $this->css, $special_chars, $at, $end - $at ); if ( $normal_len > 0 ) { - // Clamp to not exceed the end boundary. - $normal_len = min( $normal_len, $end - $at ); - $decoded .= substr( $this->css, $at, $normal_len ); - $at += $normal_len; + $decoded .= substr( $this->css, $at, $normal_len ); + $at += $normal_len; } if ( $at >= $end ) { @@ -1648,11 +1646,9 @@ private function decode_escape_at( int $offset, &$bytes_consumed ): string { return "\u{FFFD}"; } - // Hex digits. - $hex_len = strspn( $this->css, '0123456789ABCDEFabcdef', $at ); + // Hex digits (CSS spec allows at most 6). + $hex_len = strspn( $this->css, '0123456789ABCDEFabcdef', $at, 6 ); if ( $hex_len > 0 ) { - // Consume up to 6 hex digits. - $hex_len = min( $hex_len, 6 ); $hex = substr( $this->css, $at, $hex_len ); $at += $hex_len; diff --git a/components/DataLiberation/Tests/CSSProcessorTest.php b/components/DataLiberation/Tests/CSSProcessorTest.php index a6f7f7f3..113af1b0 100644 --- a/components/DataLiberation/Tests/CSSProcessorTest.php +++ b/components/DataLiberation/Tests/CSSProcessorTest.php @@ -1556,4 +1556,46 @@ public function test_bad_string_token_value_is_null(): void { $this->assertSame( CSSProcessor::TOKEN_BAD_STRING, $processor->get_token_type() ); $this->assertNull( $processor->get_token_value() ); } + + /** + * Tests that decode_string_or_url() respects the token's length boundary + * and does not include content from beyond the token end. + * + * The escape sequence \41 (= 'A') triggers the slow path in + * decode_string_or_url(). The CSS string continues with "; color: red;" + * after the closing quote, which must not appear in the token value. + */ + public function test_decode_string_or_url_respects_length_boundary(): void { + // \41 = 'A' — triggers the slow path; "; color: red;" follows the token. + $css = '"hello\\41 world"; color: red;'; + + $processor = CSSProcessor::create( $css ); + $processor->next_token(); + + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'helloAworld', $processor->get_token_value() ); + $this->assertSame( '"helloAworld"', $processor->get_normalized_token() ); + } + + /** + * Tests that decode_escape_at() consumes at most 6 hex digits, as required + * by the CSS Syntax Level 3 specification. + * + * A hex escape with 7 consecutive hex digits must only consume the first 6, + * leaving the 7th as a literal character in the string value. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point + */ + public function test_decode_escape_at_hex_limit_is_six_digits(): void { + // \000041 is 6 hex digits → U+0041 = 'A'; the trailing '1' is literal. + // Without the length limit, strspn() would scan 7 hex digits (0000411), + // giving U+0411 = 'Б' (Cyrillic), which is incorrect. + $css = '"\\0000411rest"'; + + $processor = CSSProcessor::create( $css ); + $processor->next_token(); + + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'A1rest', $processor->get_token_value() ); + } }