diff --git a/src/sed/command.rs b/src/sed/command.rs index 250bcbc5..e927ebe5 100644 --- a/src/sed/command.rs +++ b/src/sed/command.rs @@ -105,6 +105,16 @@ pub enum ReplacementPart { Group(u32), // \1 to \9 } +// The maximum value allowed in regex quantifier +pub const RE_DUP_MAX: usize = 32767; + +/// Regex modes (BRE or ERE) +#[derive(Copy, Clone, Debug)] +pub enum RegexMode { + Basic, + Extended, +} + #[derive(Debug)] /// All specified replacements for an RE pub struct ReplacementTemplate { diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index 193300bd..691239e3 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -9,8 +9,8 @@ // file that was distributed with this source code. use crate::sed::command::{ - Address, Command, CommandData, ProcessingContext, ReplacementPart, ReplacementTemplate, - Substitution, Transliteration, + Address, Command, CommandData, ProcessingContext, RegexMode, ReplacementPart, + ReplacementTemplate, Substitution, Transliteration, }; use crate::sed::delimited_parser::{parse_char_escape, parse_regex, parse_transliteration}; use crate::sed::error_handling::{ScriptLocation, compilation_error, semantic_error}; @@ -286,7 +286,6 @@ fn compile_sequence( let n_addr = compile_address_range(lines, line, &mut cmd, context)?; line.eat_spaces(); let mut cmd_spec = get_verified_cmd_spec(lines, line, n_addr, context.posix)?; - // Compile the command according to its specification. let mut cmd_mut = cmd.borrow_mut(); cmd_mut.code = line.current(); @@ -331,10 +330,8 @@ fn compile_address_range( let mut is_line0 = false; line.eat_spaces(); - if !line.eol() - && is_address_char(line.current()) - && let Ok(addr1) = compile_address(lines, line, context) - { + if !line.eol() && is_address_char(line.current()) { + let addr1 = compile_address(lines, line, context)?; is_line0 = matches!(addr1, Address::Line(0)); cmd.addr1 = Some(addr1); if is_line0 && context.posix { @@ -364,9 +361,8 @@ fn compile_address_range( } // Look for second address. - if !line.eol() - && let Ok(addr2) = compile_address(lines, line, context) - { + if !line.eol() { + let addr2 = compile_address(lines, line, context)?; // Set step_n to the number specified in the (required numeric) address. let step_n = if is_step_match || is_step_end { match addr2 { @@ -449,7 +445,12 @@ fn compile_address( // The next character is an arbitrary delimiter line.advance(); } - let re = parse_regex(lines, line)?; + let regex_mode = if context.regex_extended { + RegexMode::Extended + } else { + RegexMode::Basic + }; + let re = parse_regex(lines, line, regex_mode)?; // Skip over delimiter line.advance(); @@ -532,7 +533,7 @@ fn parse_command_ending( } /// Convert a primitive BRE pattern to a safe ERE-compatible pattern string. -/// - Replaces `\(`, `\)`, `\?`, `\+` and `\|` with `(`, `)`, `?`, `+` and `|`. +/// - Replaces `\(`, `\)`, `\?`, `\+`, `\|`, `\{` and `\}` with `(`, `)`, `?`, `+`, `|`, `{` and `}`. /// - Puts single-digit back-references in non-capturing groups.. /// - Escapes ERE-only metacharacters: `+ ? { } | ( )`. /// - Leaves all other characters as-is. @@ -565,6 +566,14 @@ fn bre_to_ere(pattern: &str) -> String { chars.next(); result.push('|'); // Alternation operator } + Some('{') => { + chars.next(); + result.push('{'); // Brace quantifier start + } + Some('}') => { + chars.next(); + result.push('}'); // Brace quantifier end + } Some(v) if v.is_ascii_digit() => { // Back-reference. In sed BREs these are single-digit // (\1-\9) whereas fancy_regex supports multi-digit @@ -639,7 +648,6 @@ fn compile_regex( } else { &bre_to_ere(pattern) }; - // Add case-insensitive modifier if needed. let pattern = if icase { format!("(?i){pattern}") @@ -786,8 +794,12 @@ fn compile_subst_command( ); } - let pattern = parse_regex(lines, line)?; - + let regex_mode = if context.regex_extended { + RegexMode::Extended + } else { + RegexMode::Basic + }; + let pattern = parse_regex(lines, line, regex_mode)?; let mut subst = Box::new(Substitution::default()); subst.replacement = compile_replacement(lines, line)?; @@ -817,7 +829,6 @@ fn compile_subst_command( ), ); } - cmd.data = CommandData::Substitution(subst); parse_command_ending(lines, line, cmd)?; @@ -1577,6 +1588,21 @@ mod tests { assert!(!regex.is_match(&mut IOChunk::new_from_str("ABC")).unwrap()); } + #[test] + fn test_compile_re_extended() { + let (lines, chars) = make_providers("acaa\nbbb\nccc"); + let mut ctx = ctx(); + ctx.regex_extended = true; + let regex = compile_regex(&lines, &chars, "cc{0,}", &ctx, false) + .unwrap() + .expect("regex should be present"); + assert!( + regex + .is_match(&mut IOChunk::new_from_str("acaa\nccc")) + .unwrap() + ); + } + #[test] fn test_compile_re_case_insensitive() { let (lines, chars) = dummy_providers(); @@ -1807,6 +1833,17 @@ mod tests { } } + #[test] + fn test_compile_address_range_error_propagation() { + let (lines, mut chars) = make_providers("1,/abc"); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let result = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()); + + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("unterminated regular expression")); + } + // compile_sequence fn empty_line() -> ScriptCharProvider { ScriptCharProvider::new("") @@ -2214,6 +2251,11 @@ mod tests { assert_eq!(bre_to_ere(r"a\(b\)c"), "a(b)c"); } + #[test] + fn test_bre_brace_quantifier_translation() { + assert_eq!(bre_to_ere(r"\{1,4\}"), "{1,4}"); + } + #[test] fn test_ere_metacharacters_escaped() { assert_eq!(bre_to_ere(r"a+b?c{1}|(d)"), r"a\+b\?c\{1\}\|\(d\)"); diff --git a/src/sed/delimited_parser.rs b/src/sed/delimited_parser.rs index 295e8f40..7fe5f7f8 100644 --- a/src/sed/delimited_parser.rs +++ b/src/sed/delimited_parser.rs @@ -8,6 +8,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::sed::command::{RE_DUP_MAX, RegexMode}; use crate::sed::error_handling::compilation_error; use crate::sed::script_char_provider::ScriptCharProvider; use crate::sed::script_line_provider::ScriptLineProvider; @@ -309,11 +310,16 @@ fn scan_delimiter(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> /// Parse the regular expression delimited by the current line /// character and return it as a string. -/// On return the line is on the closing delimiter. -pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> UResult { +/// On return, the line is on the closing delimiter. +/// In Basic mode, quantifiers like {m,n} must be escaped (\{m,n\}). +/// In Extended mode, quantifiers like {m,n} don't require escaping. +pub fn parse_regex( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + regex_mode: RegexMode, +) -> UResult { let delimiter = scan_delimiter(lines, line)?; let mut result = String::new(); - while !line.eol() { match line.current() { '[' if delimiter != '[' => { @@ -332,6 +338,20 @@ pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> line.advance(); continue; } + if line.current() == '{' && matches!(regex_mode, RegexMode::Basic) { + validate_quantifier_structure(lines, line, delimiter, RegexMode::Basic)?; + let quantifier = validate_quantifier_numbers(lines, line)?; + result.push('\\'); + result.push('{'); + result.push_str(&quantifier); + continue; + } + if line.current() == '}' { + result.push('\\'); + result.push('}'); + line.advance(); + continue; + } if let Some(decoded) = parse_char_escape(line) { result.push(decoded); } else { @@ -342,6 +362,19 @@ pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> } continue; } + '{' if matches!(regex_mode, RegexMode::Extended) => { + validate_quantifier_structure(lines, line, delimiter, RegexMode::Extended)?; + let quantifier = validate_quantifier_numbers(lines, line)?; + result.push('{'); + result.push_str(&quantifier); + continue; + } + '}' => { + result.push('}'); + line.advance(); + continue; + } + c if c == delimiter => return Ok(result), c => result.push(c), } @@ -350,6 +383,171 @@ pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> compilation_error(lines, line, "unterminated regular expression") } +// Check for closing brace and the structure/content. +fn validate_quantifier_structure( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + delimiter: char, + regex_mode: RegexMode, +) -> UResult { + let invalid_content_error_msg = "Invalid content of \\{\\}"; + let mut found_closing_brace = false; + let mut seen_comma = false; + let mut invalid_content_detected = false; + let mut is_quantifier_empty = true; + let initial_pos = line.get_pos(); + line.advance(); + + while !line.eol() && line.current() != delimiter { + match regex_mode { + RegexMode::Extended => { + // In ERE mode, look for } + if line.current() == '}' { + // Empty quantifier {} is not valid + if is_quantifier_empty { + invalid_content_detected = true; + } + found_closing_brace = true; + break; + } + // Entering means there is no } immediately after the { + is_quantifier_empty = false; + // Only digits and one comma allowed + if line.current() == ',' { + if seen_comma { + invalid_content_detected = true; + } + seen_comma = true; + } else if !line.current().is_ascii_digit() { + invalid_content_detected = true; + } + line.advance(); + } + RegexMode::Basic => { + // In BRE mode, look for \} + if line.current() == '\\' { + line.advance(); + if !line.eol() && line.current() == '}' { + if is_quantifier_empty { + invalid_content_detected = true; + } + found_closing_brace = true; + } else { + invalid_content_detected = true; + } + break; + } + is_quantifier_empty = false; + if line.current() == ',' { + if seen_comma { + invalid_content_detected = true; + } + seen_comma = true; + } else if !line.current().is_ascii_digit() { + invalid_content_detected = true; + } + line.advance(); + } + } + } + + if !found_closing_brace { + return compilation_error(lines, line, "Unmatched \\{"); + } + + if invalid_content_detected { + return compilation_error(lines, line, invalid_content_error_msg); + } + + line.set_position(initial_pos); + Ok(initial_pos) +} + +// Performs validations on m and/or n values of the quantifier +// and returns the valid content as a string (without braces). +fn validate_quantifier_numbers( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, +) -> UResult { + line.advance(); + + // Handle {,} (zero or more) special case + if line.current() == ',' { + line.advance(); + if line.current() == '}' { + return Ok("0,".to_string()); + } + + // Continue to parse n value + let mut result = String::new(); + result.push('0'); + result.push(','); + while line.current() != '}' && line.current() != '\\' { + result.push(line.current()); + line.advance(); + } + return Ok(result); + } + // Parse m value + let mut m = String::new(); + while line.current() != ',' && line.current() != '}' && line.current() != '\\' { + m.push(line.current()); + line.advance(); + } + let m_val: usize = match m.parse() { + Ok(val) => { + if val > RE_DUP_MAX { + return compilation_error(lines, line, "Regular expression too big"); + } + val + } + //never happens due to previous validation, but needed to satisfy the type checker + Err(_) => return compilation_error(lines, line, "Invalid content of \\{\\}"), + }; + + // Parse n if comma is present + let mut n = String::new(); + let has_comma = line.current() == ','; + if has_comma { + line.advance(); + while line.current() != '}' && line.current() != '\\' { + n.push(line.current()); + line.advance(); + } + } + let n_val: Option = if n.is_empty() { + None + } else { + match n.parse::() { + Ok(val) => { + if val > RE_DUP_MAX { + return compilation_error(lines, line, "Regular expression too big"); + } + Some(val) + } + Err(_) => return compilation_error(lines, line, "Invalid content of \\{\\}"), + } + }; + + // Validate m <= n if both present + if let Some(n_val) = n_val + && m_val > n_val + { + return compilation_error(lines, line, "Invalid content of \\{\\}"); + } + + // Valid quantifier content (without braces) + let mut result = m.clone(); + if has_comma { + result.push(','); + if !n.is_empty() { + result.push_str(&n); + } + } + + Ok(result) +} + /// Parse the transliteration string delimited by the current line /// character and return it as a string. /// On return the line is on the closing delimiter. @@ -753,7 +951,7 @@ mod tests { #[test] fn test_simple_regex() { let (lines, mut line) = make_providers("/abc/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "abc"); assert_eq!(line.current(), '/'); } @@ -761,7 +959,7 @@ mod tests { #[test] fn test_regex_with_escaped_delimiter() { let (lines, mut line) = make_providers("/ab\\/c/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "ab/c"); assert_eq!(line.current(), '/'); } @@ -769,7 +967,7 @@ mod tests { #[test] fn test_regex_with_capture() { let (lines, mut line) = make_providers(r"/\(.\)/c/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, r"\(.\)"); assert_eq!(line.current(), '/'); } @@ -777,29 +975,101 @@ mod tests { #[test] fn test_regex_with_escape_sequence() { let (lines, mut line) = make_providers("/ab\\n/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "ab\n"); assert_eq!(line.current(), '/'); } + #[test] + fn test_basic_regex_quantifier() { + let (lines, mut line) = make_providers("/a\\{2,3\\}/p"); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); + assert_eq!(parsed, "a\\{2,3\\}"); + assert_eq!(line.current(), '/'); + } + + #[test] + fn test_basic_regex_with_unmatched_brace_quantifier() { + let (lines, mut line) = make_providers("/a\\{2,3/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_basic_regex_with_invalid_content() { + let (lines, mut line) = make_providers("/a\\{2d,3\\}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_quantifier() { + let (lines, mut line) = make_providers("/a{2,3}/p"); + let parsed = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap(); + assert_eq!(parsed, "a{2,3}"); + assert_eq!(line.current(), '/'); + } + + #[test] + fn test_extended_regex_with_unmatched_brace_quantifier() { + let (lines, mut line) = make_providers("/a{2,3/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_extended_regex_with_empty_quantifier() { + let (lines, mut line) = make_providers("/a{}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_whitespace_quantifier() { + let (lines, mut line) = make_providers("/a{}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_invalid_m() { + let (lines, mut line) = make_providers("/a{2d,3}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_invalid_n() { + let (lines, mut line) = make_providers("/a{2,-3}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_m_gt_n() { + let (lines, mut line) = make_providers("/a{3,2}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + #[test] fn errors_on_unterminated_regex() { let (lines, mut line) = make_providers("/unterminated"); - let err = parse_regex(&lines, &mut line).unwrap_err(); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); assert!(err.to_string().contains("unterminated regular expression")); } #[test] fn errors_on_esc_at_re_eol() { let (lines, mut line) = make_providers("/foo\\"); - let err = parse_regex(&lines, &mut line).unwrap_err(); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); assert!(err.to_string().contains("unterminated regular expression")); } #[test] fn errors_on_backslash_delimiter() { let (lines, mut line) = make_providers("\\bad"); - let err = parse_regex(&lines, &mut line).unwrap_err(); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); assert!( err.to_string() .contains("\\ cannot be used as a string delimiter") @@ -809,7 +1079,7 @@ mod tests { #[test] fn test_regex_with_character_class() { let (lines, mut line) = make_providers("/[a-z]/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "[a-z]"); assert_eq!(line.current(), '/'); } @@ -817,7 +1087,7 @@ mod tests { #[test] fn test_regex_with_bracket_delimiter() { let (lines, mut line) = make_providers("[abc["); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "abc"); assert_eq!(line.current(), '['); } @@ -825,7 +1095,7 @@ mod tests { #[test] fn test_bracket_regex_with_bracket_delimiter() { let (lines, mut line) = make_providers("[a\\[0-9]bc["); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "a[0-9]bc"); assert_eq!(line.current(), '['); } @@ -833,7 +1103,7 @@ mod tests { #[test] fn test_regex_with_escaped_bracket_in_character_class() { let (lines, mut line) = make_providers("/[a\\]z]/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "[a\\]z]"); assert_eq!(line.current(), '/'); } @@ -841,7 +1111,7 @@ mod tests { #[test] fn test_regex_with_delimiter_inside_character_class() { let (lines, mut line) = make_providers("/[a/c]/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "[a/c]"); assert_eq!(line.current(), '/'); } @@ -849,11 +1119,150 @@ mod tests { #[test] fn test_regex_with_escaped_paren_and_backslash() { let (lines, mut line) = make_providers("/\\(\\\\/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "\\(\\\\"); assert_eq!(line.current(), '/'); } + // validate_quantifier_structure + //BRE tests + #[test] + fn test_validate_quantifier_structure_bre_valid() { + let (lines, mut line) = make_providers("{2,3\\}"); + let result = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap(); + assert_eq!(result, 0); + assert_eq!(line.current(), '{'); // Line should be back on the opening brace + } + + #[test] + fn test_validate_quantifier_structure_bre_with_unmatched_brace() { + let (lines, mut line) = make_providers("{2,3"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_validate_quantifier_structure_bre_with_empty_content() { + let (lines, mut line) = make_providers("{\\}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_bre_with_invalid_char() { + let (lines, mut line) = make_providers("{2d,3\\}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_bre_with_double_comma() { + let (lines, mut line) = make_providers("{2,3,\\}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + // ERE tests + #[test] + fn test_validate_quantifier_structure_ere_valid() { + let (lines, mut line) = make_providers("{2,3}"); + let result = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap(); + assert_eq!(result, 0); + assert_eq!(line.current(), '{'); // Line should be back on the opening brace + } + + #[test] + fn test_validate_quantifier_structure_ere_with_unmatched_brace() { + let (lines, mut line) = make_providers("{2,3"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_validate_quantifier_structure_ere_with_empty_content() { + let (lines, mut line) = make_providers("{}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_ere_with_invalid_char() { + let (lines, mut line) = make_providers("{2d,3}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_ere_with_double_comma() { + let (lines, mut line) = make_providers("{2,3,}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + // validate_quantifier_numbers + #[test] + fn test_validate_quantifier_numbers_with_m() { + let (lines, mut line) = make_providers("{2}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, "2"); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_with_single_comma() { + let (lines, mut line) = make_providers("{,}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, "0,"); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_with_comma_n() { + let (lines, mut line) = make_providers("{,3}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, "0,3"); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_valid() { + let (lines, mut line) = make_providers("{2,3}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, "2,3"); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_with_m_too_big() { + let (lines, mut line) = make_providers("{32768}"); + let err = validate_quantifier_numbers(&lines, &mut line).unwrap_err(); + assert!(err.to_string().contains("Regular expression too big")); + } + + #[test] + fn test_validate_quantifier_numbers_with_n_too_big() { + let (lines, mut line) = make_providers("{2,32768}"); + let err = validate_quantifier_numbers(&lines, &mut line).unwrap_err(); + assert!(err.to_string().contains("Regular expression too big")); + } + + #[test] + fn test_validate_quantifier_numbers_with_m_gt_n() { + let (lines, mut line) = make_providers("{3,2}"); + let err = validate_quantifier_numbers(&lines, &mut line).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + // parse_transliteration #[test] fn test_simple_transliteration() { diff --git a/src/sed/script_char_provider.rs b/src/sed/script_char_provider.rs index a3e3a85f..4e4ecd16 100644 --- a/src/sed/script_char_provider.rs +++ b/src/sed/script_char_provider.rs @@ -34,6 +34,11 @@ impl ScriptCharProvider { self.pos = self.pos.saturating_sub(n); } + /// Sets new current position. + pub fn set_position(&mut self, pos: usize) { + self.pos = pos; + } + /// Returns the current character. Panics if out of bounds. pub fn current(&self) -> char { self.line[self.pos] diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index 401e887d..adb6f8db 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -284,6 +284,157 @@ check_output!(addr_range_step_zero, ["-n", "10~0p", LINES1]); check_output!(addr_range_end_multiple, ["-n", "/l1_2/,~10p", LINES1]); //////////////////////////////////////////////////////////// + +// Quantifiers: {m,n} +// m and n are considered to be the first and second numbers in the interval, respectively. + +const REGEX_QUANTIFIERS_INPUT: &str = + "Hello World\nHelo World\nHelllllo World\nHeo Word\nHeo Worl}d\n"; + +#[test] +fn ere_quantifier_exactly_m() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{2}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Hello World\nHelllllo World\n"); +} + +#[test] +fn ere_quantifier_minimum_m() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{1,}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Hello World\nHelo World\nHelllllo World\nHeo Worl}d\n"); +} + +#[test] +fn ere_quantifier_m_to_n() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{3,4}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Helllllo World\n"); +} + +#[test] +fn ere_quantifier_comma_n() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{,4}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is(REGEX_QUANTIFIERS_INPUT); +} + +#[test] +fn bre_quantifier_minimum_m() { + new_ucmd!() + .args(&["-n", "-e", "/l\\{3,\\}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Helllllo World\n"); +} + +#[test] +fn bre_quantifier_comma() { + new_ucmd!() + .args(&["-n", "-e", "/l\\{,\\}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is(REGEX_QUANTIFIERS_INPUT); +} + +#[test] +fn bre_quantifier_only_closing_brace() { + new_ucmd!() + .args(&["-n", "-e", "/l\\}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Heo Worl}d\n"); +} + +#[test] +fn test_ere_quantifier_n_gt_m() { + new_ucmd!() + .args(&["-E", "-e", "/l{3,2}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_negative_m() { + new_ucmd!() + .args(&["-E", "-e", "/l{-2,4}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_invalid_m() { + new_ucmd!() + .args(&["-E", "-e", "/l{d,}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_m_too_big() { + new_ucmd!() + .args(&["-E", "-e", "/l{32768,}/p"]) + .fails() + .code_is(1) + .stderr_contains("Regular expression too big"); +} + +#[test] +fn test_ere_quantifier_empty() { + new_ucmd!() + .args(&["-E", "-e", "/l{}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_whitespace() { + new_ucmd!() + .args(&["-E", "-e", "/l{ }/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_unmatched_brace() { + new_ucmd!() + .args(&["-E", "-e", "/l{,/p"]) + .fails() + .code_is(1) + .stderr_contains("Unmatched \\{"); +} + +#[test] +fn test_ere_quantifier_unmatched_brace_2() { + new_ucmd!() + .args(&["-E", "-e", "/l{m,n/p"]) + .fails() + .code_is(1) + .stderr_contains("Unmatched \\{"); +} + +#[test] +fn test_bre_quantifier_unmatched_brace() { + new_ucmd!() + .args(&["-e", "/l\\{1,2}/p"]) + .fails() + .code_is(1) + .stderr_contains("Unmatched \\{"); +} + // Substitution: s check_output!(subst_any, ["-e", r"s/./X/g", LINES1]); check_output!(subst_any_global, ["-e", r"s,.,X,g", LINES1]);