diff --git a/src/cortex-cli/src/agent_cmd/tests.rs b/src/cortex-cli/src/agent_cmd/tests.rs index e2ff07f9f..18f7ba753 100644 --- a/src/cortex-cli/src/agent_cmd/tests.rs +++ b/src/cortex-cli/src/agent_cmd/tests.rs @@ -3,10 +3,9 @@ #[cfg(test)] mod tests { use crate::agent_cmd::cli::{CopyArgs, ExportArgs}; - use crate::agent_cmd::loader::{ - load_builtin_agents, parse_frontmatter, read_file_with_encoding, - }; + use crate::agent_cmd::loader::{load_builtin_agents, parse_frontmatter}; use crate::agent_cmd::types::AgentMode; + use crate::utils::file::read_file_with_encoding; #[test] fn test_read_file_with_utf8() { diff --git a/src/cortex-cli/src/scrape_cmd/html.rs b/src/cortex-cli/src/scrape_cmd/html.rs index 4e28f6768..f6342a017 100644 --- a/src/cortex-cli/src/scrape_cmd/html.rs +++ b/src/cortex-cli/src/scrape_cmd/html.rs @@ -311,7 +311,8 @@ fn process_node_to_markdown( } "li" => { let indent = " ".repeat(list_depth.saturating_sub(1)); - output.push_str(&format!("\n{indent}- ")); + let marker = list_item_marker(element_ref); + output.push_str(&format!("\n{indent}{marker}")); process_node_to_markdown( element_ref, output, @@ -448,6 +449,28 @@ fn process_node_to_markdown( } } +fn list_item_marker(element_ref: scraper::ElementRef) -> String { + let Some(parent) = element_ref.parent().and_then(scraper::ElementRef::wrap) else { + return "- ".to_string(); + }; + + if parent.value().name() != "ol" { + return "- ".to_string(); + } + + let start = parent + .attr("start") + .and_then(|value| value.parse::().ok()) + .unwrap_or(1); + let previous_items = element_ref + .prev_siblings() + .filter_map(scraper::ElementRef::wrap) + .filter(|sibling| sibling.value().name() == "li") + .count(); + + format!("{}. ", start + previous_items) +} + /// Convert HTML to plain text. pub fn html_to_text(html: &str) -> String { let cleaned = remove_unwanted_elements(html); diff --git a/src/cortex-cli/src/scrape_cmd/tests.rs b/src/cortex-cli/src/scrape_cmd/tests.rs index bd8b39509..7d2a663aa 100644 --- a/src/cortex-cli/src/scrape_cmd/tests.rs +++ b/src/cortex-cli/src/scrape_cmd/tests.rs @@ -41,6 +41,28 @@ mod tests { assert!(!md_no_images.contains("![")); } + #[test] + fn test_html_to_markdown_ordered_lists() { + let html = r#" +
    +
  1. Install Rust
  2. +
  3. Run cortex
  4. +
  5. Check output
    • Keep nested unordered item
  6. +
+
    +
  1. Continue numbering
  2. +
+ "#; + let md = html_to_markdown(html, false, false); + + assert!(md.contains("1. Install Rust"), "got: {md}"); + assert!(md.contains("2. Run cortex"), "got: {md}"); + assert!(md.contains("3. Check output"), "got: {md}"); + assert!(md.contains(" - Keep nested unordered item"), "got: {md}"); + assert!(md.contains("4. Continue numbering"), "got: {md}"); + assert!(!md.contains("- Install Rust"), "got: {md}"); + } + #[test] fn test_html_to_text() { let html = "

Title

Hello world!

";