Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions lib/html-to-storage.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class HtmlDepthExceededError extends Error {
// whatever shape the source had (markdown-it emits them without a slash).
const VOID_TAGS = new Set(['hr']);
const CALLOUT_MARKERS = ['info', 'warning', 'note'];
// these tags are wrapped into Confluence HTML macro
const HTML_MACRO_TAGS = new Set(['svg', 'div']);

// Phrasing-content tags that trigger the `<li>` / `<th>` / `<td>` `<p>`-wrap
// quirk: if an item contains only inline children and no text-node newline,
Expand Down Expand Up @@ -218,6 +220,36 @@ function convertBlockquote(node, ctx) {
</ac:structured-macro>`;
}

// `<details><summary>` becomes expand macro. If no summary child is found,
// falls through to plain HTML.
function convertDetails(node, ctx) {
const children = node.children || [];
let summaryNode = null;
let bodyNodes = [];

for (const child of children) {
if (child.type === 'tag' && child.name === 'summary') {
summaryNode = child;
} else if (!isWhitespaceOnly(child)) {
bodyNodes.push(child);
}
}

if (!summaryNode) {
return `<details${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</details>`;
}

const titleHtml = walkChildren(summaryNode, ctx);
const cleanTitle = titleHtml.replace(/<[^>]+>/g, '').trim();

const bodyHtml = bodyNodes
.map((c) => walkNode(c, ctx))
.join('')
.trim();

return `<ac:structured-macro ac:name="expand"><ac:parameter ac:name="title">${cleanTitle}</ac:parameter><ac:rich-text-body>${bodyHtml}</ac:rich-text-body></ac:structured-macro>`;
}

// Strict `<pre><code>` adjacency only — `<pre>` with whitespace siblings or
// any other shape falls through as plain `<pre>`. The body needs manual
// entity decode because the parser keeps entities raw and CDATA is opaque
Expand All @@ -243,6 +275,20 @@ function convertCodeBlock(node, ctx) {
return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${body}]]></ac:plain-text-body></ac:structured-macro>`;
}

// Wrap allowlisted HTML tags (svg, div) in Confluence HTML macro with CDATA.
// Used for embedding custom HTML that Confluence doesn't natively support.
function convertHtmlBlock(node, ctx) {
const { randomUUID } = require('crypto');
const inner = walkChildren(node, ctx);
const attrsStr = renderAttrs(node.attribs);
const openTag = `<${node.name}${attrsStr}>`;
const closeTag = `</${node.name}>`;
const htmlContent = openTag + inner + closeTag;
const safeContent = htmlContent.replace(/]]>/g, ']]]]><![CDATA[>');
const macroId = randomUUID();
return `<ac:structured-macro ac:name="html" ac:schema-version="1" ac:macro-id="${macroId}"><ac:plain-text-body><![CDATA[${safeContent}]]></ac:plain-text-body></ac:structured-macro>`;
}

// Re-escape literal `"` inside attribute values. htmlparser2 with
// `decodeEntities: false` keeps source-escaped entities intact, but a
// single-quoted source attribute (`<a title='he said "hi"'>`) lands a
Expand Down Expand Up @@ -359,6 +405,8 @@ function dispatchTag(node, ctx) {
return convertLink(node, ctx);
case 'blockquote':
return convertBlockquote(node, ctx);
case 'details':
return convertDetails(node, ctx);
case 'table':
case 'thead':
case 'tbody':
Expand All @@ -375,6 +423,9 @@ function dispatchTag(node, ctx) {
if (VOID_TAGS.has(node.name)) {
return `<${node.name}${renderAttrs(node.attribs)} />`;
}
if (HTML_MACRO_TAGS.has(node.name)) {
return convertHtmlBlock(node, ctx);
}
return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
}
}
Expand Down
21 changes: 16 additions & 5 deletions lib/macro-converter.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ const STASH_DELIM = '\uE000';
// The body alternation `"[^"]*"|'[^']*'|[^>]` makes the match quote-aware
// so a literal `>` inside a quoted attribute value (e.g.
// `<mark title="1>0">`) does not terminate the tag prematurely.
const PASSTHROUGH_TAG_RE = /<\/?(?:u|sub|sup|mark)(?=[\s/>])(?:"[^"]*"|'[^']*'|[^>])*>/gi;
const PASSTHROUGH_TAG_RE = /<\/?(?:u|sub|sup|mark|details|summary)(?=[\s/>])(?:"[^"]*"|'[^']*'|[^>])*>/gi;
// Block-level HTML elements that should pass through WITHOUT markdown processing of their content.
const PASSTHROUGH_BLOCK_RE = /<(svg|div)(?:\s[^>]*)?>[\s\S]*?<\/\1>/gi;
// Single-backtick inline code spans. Block-level code (fenced + indented) is
// detected via MarkdownIt's tokenizer in `_findCodeRanges` because a regex
// can't reliably distinguish a 4-space-indented code block from a list-item
Expand Down Expand Up @@ -92,10 +94,19 @@ class MacroConverter {
_renderMarkdownToHtml(markdown) {
const codeRanges = this._findCodeRanges(markdown);
const htmlStash = [];
const stashHtml = (text) => text.replace(PASSTHROUGH_TAG_RE, (m) => {
htmlStash.push(m);
return `${STASH_DELIM}H${htmlStash.length - 1}${STASH_DELIM}`;
});
const stashHtml = (text) => {
// block-level HTML (svg, div with all content) must be stashed before inline tags to avoid matching the closing tag as inline HTML
let result = text.replace(PASSTHROUGH_BLOCK_RE, (m) => {
htmlStash.push(m);
return `${STASH_DELIM}H${htmlStash.length - 1}${STASH_DELIM}`;
});
// Then stash inline HTML tags
result = result.replace(PASSTHROUGH_TAG_RE, (m) => {
htmlStash.push(m);
return `${STASH_DELIM}H${htmlStash.length - 1}${STASH_DELIM}`;
});
return result;
};

let src = '';
let pos = 0;
Expand Down
112 changes: 112 additions & 0 deletions tests/html-to-storage.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -393,4 +393,116 @@ describe('htmlToStorage', () => {
expect(() => htmlToStorage(html)).not.toThrow();
});
});

describe('details/summary conversion', () => {
test('basic details with summary becomes expand macro', () => {
const html = '<details><summary>Click me</summary><p>Hidden content</p></details>';
const out = htmlToStorage(html);
expect(out).toContain('<ac:structured-macro ac:name="expand">');
expect(out).toContain('<ac:parameter ac:name="title">Click me</ac:parameter>');
expect(out).toContain('<ac:rich-text-body><p>Hidden content</p></ac:rich-text-body>');
expect(out).not.toContain('<details>');
expect(out).not.toContain('<summary>');
});

test('summary with inline HTML has tags stripped from title', () => {
const html = '<details><summary>View <em>code</em></summary><p>body</p></details>';
const out = htmlToStorage(html);
expect(out).toContain('<ac:parameter ac:name="title">View code</ac:parameter>');
expect(out).not.toContain('<em>code</em>');
});

test('details with multiple paragraphs in body', () => {
const html = '<details><summary>More</summary><p>First</p><p>Second</p></details>';
const out = htmlToStorage(html);
expect(out).toContain('<ac:rich-text-body><p>First</p><p>Second</p></ac:rich-text-body>');
});

test('details with code block inside body', () => {
const html = '<details><summary>Code</summary><pre><code class="language-js">console.log();</code></pre></details>';
const out = htmlToStorage(html);
expect(out).toContain('<ac:structured-macro ac:name="expand">');
expect(out).toContain('<ac:structured-macro ac:name="code">');
expect(out).toContain('console.log();');
});

test('details without summary falls through as plain HTML', () => {
const html = '<details><p>No summary here</p></details>';
const out = htmlToStorage(html);
expect(out).toContain('<details>');
expect(out).not.toContain('ac:structured-macro');
});

test('nested details inside info callout', () => {
const html = '<blockquote><p><strong>INFO</strong></p><details><summary>More</summary><p>Nested</p></details></blockquote>';
const out = htmlToStorage(html);
expect(out).toContain('<ac:structured-macro ac:name="info">');
expect(out).toContain('<ac:structured-macro ac:name="expand">');
expect(out).toContain('Nested');
});

test('details inside details (nested expand macros)', () => {
const html = '<details><summary>Outer</summary><details><summary>Inner</summary><p>Deep</p></details></details>';
const out = htmlToStorage(html);
const macroCount = (out.match(/ac:name="expand"/g) || []).length;
expect(macroCount).toBe(2);
expect(out).toContain('Deep');
});
});

describe('HTML macro wrapping', () => {
test('SVG block is wrapped in HTML macro with CDATA', () => {
const html = '<svg width="100" height="100"><circle cx="50" cy="50" r="40"/></svg>';
const out = htmlToStorage(html);
expect(out).toContain('<ac:structured-macro ac:name="html"');
expect(out).toContain('ac:schema-version="1"');
expect(out).toContain('ac:macro-id=');
expect(out).toContain('<![CDATA[<svg');
expect(out).toContain('<circle cx="50" cy="50" r="40">');
expect(out).toContain(']]></ac:plain-text-body>');
});

test('div block is wrapped in HTML macro', () => {
const html = '<div class="custom"><p>Content</p></div>';
const out = htmlToStorage(html);
expect(out).toContain('<ac:structured-macro ac:name="html"');
expect(out).toContain('<![CDATA[<div class="custom">');
});

test('CDATA end marker is escaped in HTML content', () => {
const html = '<div>test]]>end</div>';
const out = htmlToStorage(html);
expect(out).toContain('test]]]]><![CDATA[>end');
expect(out).not.toContain('test]]>end');
});

test('each HTML block gets unique UUID', () => {
const html = '<svg id="a"></svg><svg id="b"></svg>';
const out = htmlToStorage(html);
const ids = out.match(/ac:macro-id="([^"]+)"/g);
expect(ids).toHaveLength(2);
expect(ids[0]).not.toBe(ids[1]);
});

test('video tag is NOT wrapped (not in allowlist)', () => {
const html = '<video src="test.mp4"></video>';
const out = htmlToStorage(html);
expect(out).toBe('<video src="test.mp4"></video>');
expect(out).not.toContain('ac:structured-macro');
});

test('normal paragraph is NOT wrapped', () => {
const html = '<p>Regular paragraph</p>';
const out = htmlToStorage(html);
expect(out).toBe('<p>Regular paragraph</p>');
expect(out).not.toContain('ac:structured-macro');
});

test('span is NOT wrapped (not in allowlist)', () => {
const html = '<span>inline</span>';
const out = htmlToStorage(html);
expect(out).toBe('<span>inline</span>');
expect(out).not.toContain('ac:structured-macro');
});
});
});
45 changes: 45 additions & 0 deletions tests/macro-converter.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1316,3 +1316,48 @@ describe('MacroConverter integration smoke tests', () => {
expect(result).toContain('<td><p>c</p></td>');
});
});

describe('markdown with details/summary', () => {
const converter = new MacroConverter({ isCloud: true });

test('details in markdown converts to expand macro', () => {
const md = `<details>
<summary>Show more</summary>

Hidden paragraph

</details>`;
const out = converter.markdownToStorage(md);
expect(out).toContain('<ac:structured-macro ac:name="expand">');
expect(out).toContain('<ac:parameter ac:name="title">Show more</ac:parameter>');
expect(out).toContain('Hidden paragraph');
});

test('details with code block inside', () => {
const md = `<details>
<summary>View code</summary>

\`\`\`javascript
console.log("test");
\`\`\`

</details>`;
const out = converter.markdownToStorage(md);
expect(out).toContain('<ac:structured-macro ac:name="expand">');
expect(out).toContain('<ac:structured-macro ac:name="code">');
expect(out).toContain('console.log');
});
});

describe('markdown with HTML blocks', () => {
const converter = new MacroConverter({ isCloud: true });

test('SVG in markdown wraps in HTML macro', () => {
const md = `<svg width="50" height="50">
<circle cx="25" cy="25" r="20"/>
</svg>`;
const out = converter.markdownToStorage(md);
expect(out).toContain('<ac:structured-macro ac:name="html"');
expect(out).toContain('<![CDATA[<svg');
});
});