From 00f978bdd7590cddfcb1308365bdbe6592d1df2a Mon Sep 17 00:00:00 2001 From: Oz Date: Fri, 1 May 2026 22:04:01 +0000 Subject: [PATCH] fix: preserve markdown tables on notebook paste Co-Authored-By: adiaz327 --- app/src/notebooks/editor/view.rs | 7 +- crates/markdown_parser/src/html_parser.rs | 106 +++++++++++++++++- .../markdown_parser/src/html_parser_test.rs | 27 +++-- 3 files changed, 124 insertions(+), 16 deletions(-) diff --git a/app/src/notebooks/editor/view.rs b/app/src/notebooks/editor/view.rs index ae62959cf..9d76dceff 100644 --- a/app/src/notebooks/editor/view.rs +++ b/app/src/notebooks/editor/view.rs @@ -5,7 +5,7 @@ use std::{ sync::atomic::{AtomicBool, Ordering}, }; -use markdown_parser::{parse_html, parse_markdown, FormattedText}; +use markdown_parser::{parse_html, parse_markdown, parse_markdown_with_gfm_tables, FormattedText}; use pathfinder_geometry::vector::vec2f; use string_offset::CharOffset; use warp_editor::{ @@ -1965,6 +1965,11 @@ impl RichTextEditorView { fn paste_content(&mut self, content: ClipboardContent, ctx: &mut ViewContext) { let parsed_html = content.html.and_then(|text| parse_html(text.as_str()).ok()); + let parse_markdown = if FeatureFlag::MarkdownTables.is_enabled() { + parse_markdown_with_gfm_tables + } else { + parse_markdown + }; // If we failed to get the html string, try parsing plain text into markdown first. // If that failed as well, fall back to pasting plain text string. diff --git a/crates/markdown_parser/src/html_parser.rs b/crates/markdown_parser/src/html_parser.rs index 297fc9902..91b0b2f0d 100644 --- a/crates/markdown_parser/src/html_parser.rs +++ b/crates/markdown_parser/src/html_parser.rs @@ -12,9 +12,9 @@ use html5ever::{ use markup5ever_rcdom::{Node, NodeData, RcDom}; use crate::{ - CodeBlockText, FormattedIndentTextInline, FormattedTaskList, FormattedText, + CodeBlockText, FormattedIndentTextInline, FormattedTable, FormattedTaskList, FormattedText, FormattedTextFragment, FormattedTextHeader, FormattedTextInline, FormattedTextLine, - FormattedTextStyles, Hyperlink, OrderedFormattedIndentTextInline, + FormattedTextStyles, Hyperlink, OrderedFormattedIndentTextInline, TableAlignment, markdown_parser::RUNNABLE_BLOCK_MARKDOWN_LANG, weight::CustomWeight, }; @@ -22,7 +22,7 @@ use crate::{ // Note that we have "" here because GDocs always include a top level element to add additional // GDocs specific meta-data for its rich text content. const TOP_LEVEL_ELEMENT_TAGS_TO_SKIP: &[&str] = &[ - "head", "body", "html", "meta", "table", "b", "div", "ul", "ol", "li", "input", + "head", "body", "html", "meta", "b", "div", "ul", "ol", "li", "input", ]; const PHRASING_ELEMENT_TAGS: &[&str] = &[ "span", "i", "code", "strong", "em", "br", "a", "s", "u", "ins", @@ -335,6 +335,7 @@ pub fn parse_html(html: &str) -> Result { }), "br" => FormattedTextLine::LineBreak, "hr" => FormattedTextLine::HorizontalRule, + "table" => FormattedTextLine::Table(parse_table(node.as_ref())), _ => { // Take into consideration the indent level when parsing the nodes. let parsed_node = parse_pending_inline_nodes( @@ -365,6 +366,105 @@ pub fn parse_html(html: &str) -> Result { Ok(FormattedText { lines: result }) } +fn parse_table(table: &Node) -> FormattedTable { + let mut row_nodes = Vec::new(); + collect_table_rows(table, &mut row_nodes); + + let rows = row_nodes + .iter() + .filter_map(|row| { + let cells = collect_table_cells(row) + .into_iter() + .map(|cell| parse_phrasing_content(&cell.children.borrow(), Styling::default())) + .collect::>(); + + (!cells.is_empty()).then_some(cells) + }) + .collect::>(); + + let (headers, rows) = match rows.split_first() { + Some((headers, rows)) => (headers.clone(), rows.to_vec()), + None => (Vec::new(), Vec::new()), + }; + + let mut alignments = row_nodes + .first() + .map(|row| { + collect_table_cells(row) + .into_iter() + .map(|cell| parse_table_cell_alignment(&cell)) + .collect::>() + }) + .unwrap_or_default(); + + let mut table = FormattedTable { + headers, + alignments: std::mem::take(&mut alignments), + rows, + }; + table.normalize_shape(); + table +} + +fn collect_table_rows(node: &Node, rows: &mut Vec>) { + for child in node.children.borrow().iter() { + if element_name(child) == Some("tr") { + rows.push(Rc::clone(child)); + continue; + } + + collect_table_rows(child.as_ref(), rows); + } +} + +fn collect_table_cells(row: &Node) -> Vec> { + row.children + .borrow() + .iter() + .filter(|child| matches!(element_name(child), Some("td" | "th"))) + .cloned() + .collect() +} + +fn element_name(node: &Rc) -> Option<&str> { + match &node.data { + NodeData::Element { name, .. } => Some(name.local.as_ref()), + NodeData::Document + | NodeData::Doctype { .. } + | NodeData::Text { .. } + | NodeData::Comment { .. } + | NodeData::ProcessingInstruction { .. } => None, + } +} + +fn parse_table_cell_alignment(cell: &Node) -> TableAlignment { + let NodeData::Element { attrs, .. } = &cell.data else { + return TableAlignment::Left; + }; + let attrs = attrs.borrow(); + + if let Some(alignment) = get_attribute(&attrs, "align").and_then(alignment_from_html_value) { + return alignment; + } + + get_attribute(&attrs, "style") + .map(parse_style_into_dict) + .and_then(|style| { + style + .get("text-align") + .and_then(|alignment| alignment_from_html_value(alignment)) + }) + .unwrap_or(TableAlignment::Left) +} + +fn alignment_from_html_value(value: &str) -> Option { + match value.trim().to_ascii_lowercase().as_str() { + "left" | "start" => Some(TableAlignment::Left), + "center" => Some(TableAlignment::Center), + "right" | "end" => Some(TableAlignment::Right), + _ => None, + } +} // Push all pending inline nodes into the result. Take into consideration the active indent level. fn parse_pending_inline_nodes( diff --git a/crates/markdown_parser/src/html_parser_test.rs b/crates/markdown_parser/src/html_parser_test.rs index b1f937793..a511c5c9e 100644 --- a/crates/markdown_parser/src/html_parser_test.rs +++ b/crates/markdown_parser/src/html_parser_test.rs @@ -188,7 +188,6 @@ fn test_transform_non_breaking_spaces() { assert_eq!(test_parse_html(safari_html), expected_text); } -// TODO: remove/update this test when we eventually support these HTML element types! #[test] fn test_unsupported_html_types() { assert_eq!( @@ -203,21 +202,25 @@ fn test_unsupported_html_types() { ]) ] ); +} +#[test] +fn test_parse_table() { assert_eq!( test_parse_html( - "
Text 1Text 2
TestTest
" + r#"
Text 1Text 2
TestTest
"# ), - vec![ - FormattedTextLine::Line(vec![ - FormattedTextFragment::plain_text("Text 1"), - FormattedTextFragment::plain_text("Text 2") - ]), - FormattedTextLine::Line(vec![ - FormattedTextFragment::plain_text("Test"), - FormattedTextFragment::plain_text("Test") - ]) - ] + vec![FormattedTextLine::Table(FormattedTable { + headers: vec![ + vec![FormattedTextFragment::plain_text("Text 1")], + vec![FormattedTextFragment::bold("Text 2")] + ], + alignments: vec![TableAlignment::Left, TableAlignment::Right], + rows: vec![vec![ + vec![FormattedTextFragment::plain_text("Test")], + vec![FormattedTextFragment::italic("Test")] + ]] + })] ); }