diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py index 37ffceccf..0e951d9a1 100644 --- a/confluence-mdx/bin/reverse_sync/patch_builder.py +++ b/confluence-mdx/bin/reverse_sync/patch_builder.py @@ -147,7 +147,7 @@ def _find_roundtrip_sidecar_block( return xpath_match # identity fallback: mapping.yaml이 어긋난 경우 hash 기반으로 재탐색 - # xpath 태그 타입(p, ul, ol, table 등)이 일치하는 경우에만 반환하여 cross-type 오매칭 방지 + # block family(paragraph/list/table 등)가 일치하는 경우에만 반환하여 cross-type 오매칭 방지 if identity_block is not None and identity_block.content: identity_match = find_sidecar_block_by_identity( roundtrip_sidecar.blocks, @@ -155,15 +155,43 @@ def _find_roundtrip_sidecar_block( (identity_block.line_start, identity_block.line_end), ) if identity_match is not None: - mapping_tag = mapping.xhtml_xpath.split('[')[0] if mapping else '' - identity_tag = identity_match.xhtml_xpath.split('[')[0] if identity_match.xhtml_xpath else '' - if mapping_tag == identity_tag: + if mapping is not None and _mapping_block_family(mapping) == _xpath_block_family(identity_match.xhtml_xpath): return identity_match # xpath 결과를 마지막 fallback으로 반환 (hash 불일치라도 없는 것보다 나음) return xpath_match +def _xpath_root_tag(xpath: str) -> str: + """Extract the top-level tag portion from an xpath-like storage path.""" + head = xpath.split("/", 1)[0] + return head.split("[", 1)[0] + + +def _xpath_block_family(xpath: str) -> str: + """xpath의 최상위 태그를 block family 문자열로 변환한다. + + 알 수 없는 태그(pre, blockquote, ac:* 등)는 raw tag를 반환하여 + cross-type 보호 목적상 보수적으로 동작한다. + """ + root_tag = _xpath_root_tag(xpath) + if root_tag == "p": + return "paragraph" + if root_tag in {"ul", "ol"}: + return "list" + if root_tag == "table": + return "table" + if root_tag.startswith("h") and root_tag[1:].isdigit(): + return "heading" + return root_tag + + +def _mapping_block_family(mapping: BlockMapping) -> str: + if mapping.type in {"paragraph", "list", "heading", "table"}: + return mapping.type + return _xpath_block_family(mapping.xhtml_xpath) + + def _flush_containing_changes( containing_changes: dict, used_ids: 'set | None' = None, @@ -370,9 +398,20 @@ def _mark_used(block_id: str, m: BlockMapping): list_sidecar = _find_roundtrip_sidecar_block( change, mapping, roundtrip_sidecar, xpath_to_sidecar_block, ) + # roundtrip sidecar가 있지만 이 list에 매칭되는 block이 없을 때 + # (cross-type 거부 또는 mapping drift) clean list는 whole-fragment 재생성으로 처리 + should_replace_clean_list = ( + mapping is not None + and not _contains_preserved_anchor_markup(mapping.xhtml_text) + and roundtrip_sidecar is not None + and list_sidecar is None + ) if (mapping is not None and not _contains_preserved_anchor_markup(mapping.xhtml_text) - and sidecar_block_requires_reconstruction(list_sidecar)): + and ( + sidecar_block_requires_reconstruction(list_sidecar) + or should_replace_clean_list + )): _mark_used(mapping.block_id, mapping) patches.append( _build_replace_fragment_patch( diff --git a/confluence-mdx/bin/reverse_sync/rehydrator.py b/confluence-mdx/bin/reverse_sync/rehydrator.py index d9e2dc8af..8409f061a 100644 --- a/confluence-mdx/bin/reverse_sync/rehydrator.py +++ b/confluence-mdx/bin/reverse_sync/rehydrator.py @@ -59,6 +59,45 @@ def _mdx_block_to_parser_block(mdx_block: MdxBlock) -> Block: return Block(type=block_type, content=mdx_block.content) +def _extract_frontmatter_title(mdx_blocks: List[MdxBlock]) -> str: + for block in mdx_blocks: + if block.type != "frontmatter": + continue + for raw_line in block.content.splitlines(): + line = raw_line.strip() + if not line.startswith("title:"): + continue + value = line.split(":", 1)[1].strip() + if ( + len(value) >= 2 + and value[0] == value[-1] + and value[0] in {"'", '"'} + ): + return value[1:-1] + return value + return "" + + +def _content_blocks_for_splice(mdx_text: str) -> List[MdxBlock]: + mdx_blocks = parse_mdx_blocks(mdx_text) + content_blocks = [b for b in mdx_blocks if b.type not in _NON_CONTENT] + + if not content_blocks: + return content_blocks + + frontmatter_title = _extract_frontmatter_title(mdx_blocks) + first_block = content_blocks[0] + if ( + first_block.type == "heading" + and frontmatter_title + and first_block.content.startswith("# ") + and first_block.content[2:].strip() == frontmatter_title + ): + return content_blocks[1:] + + return content_blocks + + def splice_rehydrate_xhtml( mdx_text: str, sidecar: RoundtripSidecar, @@ -71,8 +110,27 @@ def splice_rehydrate_xhtml( - 해시가 일치하는 블록: 원본 xhtml_fragment 사용 - 해시가 불일치하는 블록: emitter로 재생성 """ - mdx_blocks = parse_mdx_blocks(mdx_text) - content_blocks = [b for b in mdx_blocks if b.type not in _NON_CONTENT] + if sidecar_matches_mdx(mdx_text, sidecar): + details: List[dict] = [] + matched_count = 0 + for i, sb in enumerate(sidecar.blocks): + method = "sidecar" if sb.mdx_content_hash else "preserved" + if method == "sidecar": + matched_count += 1 + details.append({ + "index": i, + "method": method, + "xpath": sb.xhtml_xpath, + }) + return SpliceResult( + xhtml=sidecar.reassemble_xhtml(), + matched_count=matched_count, + emitted_count=0, + total_blocks=len(sidecar.blocks), + block_details=details, + ) + + content_blocks = _content_blocks_for_splice(mdx_text) matched_count = 0 emitted_count = 0 diff --git a/confluence-mdx/tests/test_reverse_sync_byte_verify.py b/confluence-mdx/tests/test_reverse_sync_byte_verify.py index bb3093ef2..00b078d7c 100644 --- a/confluence-mdx/tests/test_reverse_sync_byte_verify.py +++ b/confluence-mdx/tests/test_reverse_sync_byte_verify.py @@ -87,6 +87,23 @@ def test_verify_case_dir_splice_fails_when_sidecar_missing(tmp_path): assert result.reason.startswith("sidecar_missing") +def test_verify_case_dir_splice_skips_page_title_heading(tmp_path): + case = tmp_path / "100" + case.mkdir() + xhtml = "
Body
" + mdx = "---\ntitle: T\n---\n\n# T\n\n### Overview\n\nBody\n" + (case / "expected.mdx").write_text(mdx, encoding="utf-8") + (case / "page.xhtml").write_text(xhtml, encoding="utf-8") + write_sidecar(build_sidecar(xhtml, mdx, page_id="100"), case / "expected.roundtrip.json") + + result = verify_case_dir_splice(case) + + assert result.passed is True + assert result.reason == "byte_equal_splice" + assert result.matched_count == 2 + assert result.emitted_count == 0 + + class TestSpliceRealTestcases: """실제 testcase에 대한 forced-splice byte-equal 검증.""" diff --git a/confluence-mdx/tests/test_reverse_sync_patch_builder.py b/confluence-mdx/tests/test_reverse_sync_patch_builder.py index c79c026a9..65ea23c8d 100644 --- a/confluence-mdx/tests/test_reverse_sync_patch_builder.py +++ b/confluence-mdx/tests/test_reverse_sync_patch_builder.py @@ -12,9 +12,11 @@ RoundtripSidecar, SidecarBlock, SidecarEntry, + sha256_text, ) from text_utils import normalize_mdx_to_plain from reverse_sync.patch_builder import ( + _find_roundtrip_sidecar_block, _flush_containing_changes, _resolve_mapping_for_change, build_patches, @@ -421,6 +423,121 @@ def test_roundtrip_sidecar_non_paragraph_reconstruction_stays_modify(self): assert patches[0].get('action', 'modify') == 'modify' assert 'new_element_xhtml' not in patches[0] + def test_roundtrip_identity_fallback_rejects_cross_type_sidecar_block(self): + mapping = _make_mapping('m1', 'same text', xpath='p[6]') + change = _make_change(0, 'same text', 'updated text') + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock( + 0, + 'table[2]', + '| same text |
same text
| same text |
same text
same text
Hello world
", + reconstruction={ + "kind": "paragraph", + "old_plain_text": "Hello world", + "anchors": [_make_image_anchor(6)], + }, + ) + + assert sidecar_block_requires_reconstruction(sidecar_block) is True + + +def test_sidecar_block_requires_reconstruction_for_list_item_anchors(): + sidecar_block = SidecarBlock( + 0, + "ul[1]", + "button
Hello world
", + reconstruction={ + "kind": "paragraph", + "old_plain_text": "Hello world", + "anchors": [_make_image_anchor(6)], + }, + ) + + result = reconstruct_fragment_with_sidecar( + "Hello brave world
", + sidecar_block, + ) + + assert "button
button again
text
" + + +def test_sidecar_block_requires_reconstruction_false_without_anchors(): + sidecar_block = SidecarBlock( + 0, + "p[1]", + "Hello world
", + reconstruction={ + "kind": "paragraph", + "old_plain_text": "Hello world", + "anchors": [], + }, + ) + + assert sidecar_block_requires_reconstruction(sidecar_block) is False diff --git a/confluence-mdx/tests/test_reverse_sync_rehydrator.py b/confluence-mdx/tests/test_reverse_sync_rehydrator.py index dcda309a9..152c339d4 100644 --- a/confluence-mdx/tests/test_reverse_sync_rehydrator.py +++ b/confluence-mdx/tests/test_reverse_sync_rehydrator.py @@ -114,6 +114,17 @@ def test_preserves_envelope(self): result = splice_rehydrate_xhtml(mdx, sidecar) assert result.xhtml == xhtml + def test_skips_page_title_heading_that_matches_frontmatter(self): + xhtml = "Body
" + mdx = "---\ntitle: T\n---\n\n# T\n\n### Overview\n\nBody\n" + sidecar = build_sidecar(xhtml, mdx) + + result = splice_rehydrate_xhtml(mdx, sidecar) + + assert result.xhtml == xhtml + assert result.matched_count == 2 + assert result.emitted_count == 0 + class TestSpliceRealTestcases: """실제 testcase에 대한 forced-splice byte-equal 검증."""