diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py index 6e1df9448..b3ba7ffec 100644 --- a/confluence-mdx/bin/reverse_sync/patch_builder.py +++ b/confluence-mdx/bin/reverse_sync/patch_builder.py @@ -1,6 +1,8 @@ """패치 빌더 — MDX diff 변경과 XHTML 매핑을 결합하여 XHTML 패치를 생성.""" from typing import Dict, List, Optional +from mdx_to_storage.emitter import emit_block +from mdx_to_storage.parser import parse_mdx from reverse_sync.block_diff import BlockChange, NON_CONTENT_TYPES from reverse_sync.mapping_recorder import BlockMapping from mdx_to_storage.parser import Block as MdxBlock @@ -8,7 +10,12 @@ normalize_mdx_to_plain, collapse_ws, ) from reverse_sync.text_transfer import transfer_text_changes -from reverse_sync.sidecar import find_mapping_by_sidecar, SidecarEntry +from reverse_sync.sidecar import ( + RoundtripSidecar, + SidecarBlock, + find_mapping_by_sidecar, + SidecarEntry, +) from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml from reverse_sync.list_patcher import ( @@ -22,6 +29,81 @@ ) +_CLEAN_BLOCK_TYPES = frozenset(("heading", "code_block", "hr")) + + +def _contains_preserved_anchor_markup(xhtml_text: str) -> bool: + """preservation unit이 있으면 clean whole-fragment replacement 대상이 아니다.""" + return " bool: + """Phase 2 clean block 여부를 판별한다.""" + if mapping is None: + return False + + if block_type in _CLEAN_BLOCK_TYPES: + return True + + if sidecar_block is not None: + recon = sidecar_block.reconstruction + if recon is None: + return False + if recon.get("kind") == "paragraph": + return len(recon.get("anchors", [])) == 0 + return False + + return block_type == "paragraph" and not _contains_preserved_anchor_markup( + mapping.xhtml_text + ) + + +def _can_replace_table_fragment( + change: BlockChange, + mapping: Optional[BlockMapping], + roundtrip_sidecar: Optional[RoundtripSidecar], +) -> bool: + """table 계열을 whole-fragment replacement로 처리할 수 있는지 판별한다.""" + if roundtrip_sidecar is None or mapping is None: + return False + if _contains_preserved_anchor_markup(mapping.xhtml_text): + return False + block = change.new_block or change.old_block + return ( + (block.type == "html_block" and block.content.lstrip().startswith(" str: + """Block content를 현재 forward emitter 기준 fragment로 변환한다.""" + parsed_blocks = [parsed for parsed in parse_mdx(block.content) if parsed.type != "empty"] + if len(parsed_blocks) == 1: + return emit_block(parsed_blocks[0]) + return mdx_block_to_xhtml_element(block) + + +def _build_replace_fragment_patch( + mapping: BlockMapping, + new_block: MdxBlock, + mapping_lost_info: Optional[dict] = None, +) -> Dict[str, str]: + """whole-fragment replacement patch를 생성한다.""" + new_element = _emit_replacement_fragment(new_block) + block_lost = (mapping_lost_info or {}).get(mapping.block_id, {}) + if block_lost: + new_element = apply_lost_info(new_element, block_lost) + return { + "action": "replace_fragment", + "xhtml_xpath": mapping.xhtml_xpath, + "new_element_xhtml": new_element, + } + + def _flush_containing_changes( containing_changes: dict, used_ids: 'set | None' = None, @@ -105,12 +187,18 @@ def build_patches( xpath_to_mapping: Dict[str, 'BlockMapping'], alignment: Optional[Dict[int, int]] = None, page_lost_info: Optional[dict] = None, + roundtrip_sidecar: Optional[RoundtripSidecar] = None, ) -> List[Dict[str, str]]: """diff 변경과 매핑을 결합하여 XHTML 패치 목록을 구성한다. sidecar 인덱스를 사용하여 O(1) 직접 조회를 수행한다. """ patches = [] + xpath_to_sidecar_block: Dict[str, SidecarBlock] = {} + if roundtrip_sidecar is not None: + xpath_to_sidecar_block = { + block.xhtml_xpath: block for block in roundtrip_sidecar.blocks + } used_ids: set = set() # 이미 매칭된 mapping block_id (중복 매칭 방지) # child → parent 역참조 맵 (부모-자식 간 중복 매칭 방지) child_to_parent: dict = {} @@ -154,6 +242,22 @@ def _mark_used(block_id: str, m: BlockMapping): idx, mdx_to_sidecar, xpath_to_mapping) if mapping is None: continue + sidecar_block = xpath_to_sidecar_block.get(mapping.xhtml_xpath) + if _is_clean_block( + add_change.new_block.type, + mapping, + sidecar_block, + ) or _can_replace_table_fragment(del_change, mapping, roundtrip_sidecar): + patches.append( + _build_replace_fragment_patch( + mapping, + add_change.new_block, + mapping_lost_info, + ) + ) + _paired_indices.add(idx) + _mark_used(mapping.block_id, mapping) + continue old_plain = normalize_mdx_to_plain( del_change.old_block.content, del_change.old_block.type) new_plain = normalize_mdx_to_plain( @@ -211,10 +315,20 @@ def _mark_used(block_id: str, m: BlockMapping): continue if strategy == 'table': - patches.extend( - build_table_row_patches( - change, mappings, used_ids, - mdx_to_sidecar, xpath_to_mapping)) + if _can_replace_table_fragment(change, mapping, roundtrip_sidecar): + _mark_used(mapping.block_id, mapping) + patches.append( + _build_replace_fragment_patch( + mapping, + change.new_block, + mapping_lost_info, + ) + ) + else: + patches.extend( + build_table_row_patches( + change, mappings, used_ids, + mdx_to_sidecar, xpath_to_mapping)) continue new_plain = normalize_mdx_to_plain( @@ -236,6 +350,27 @@ def _mark_used(block_id: str, m: BlockMapping): and collapse_ws(new_plain) == collapse_ws(mapping.xhtml_plain_text)): continue + sidecar_block = xpath_to_sidecar_block.get(mapping.xhtml_xpath) + if _can_replace_table_fragment(change, mapping, roundtrip_sidecar): + patches.append( + _build_replace_fragment_patch( + mapping, + change.new_block, + mapping_lost_info, + ) + ) + continue + + if _is_clean_block(change.old_block.type, mapping, sidecar_block): + patches.append( + _build_replace_fragment_patch( + mapping, + change.new_block, + mapping_lost_info, + ) + ) + continue + # 재생성 시 소실되는 XHTML 요소 포함 시 텍스트 전이로 폴백 if (' str: Args: xhtml: 원본 XHTML 문자열 patches: 패치 목록. 각 패치는 dict: - - action: "modify" (기본) | "delete" | "insert" + - action: "modify" (기본) | "delete" | "insert" | "replace_fragment" - modify: xhtml_xpath, old_plain_text, new_plain_text 또는 new_inner_xhtml - delete: xhtml_xpath - insert: after_xpath (None이면 맨 앞), new_element_xhtml + - replace_fragment: xhtml_xpath, new_element_xhtml Returns: 패치된 XHTML 문자열 @@ -25,6 +26,7 @@ def patch_xhtml(xhtml: str, patches: List[Dict[str, str]]) -> str: # 패치를 action별로 분류 delete_patches = [p for p in patches if p.get('action') == 'delete'] insert_patches = [p for p in patches if p.get('action') == 'insert'] + replace_patches = [p for p in patches if p.get('action') == 'replace_fragment'] modify_patches = [p for p in patches if p.get('action', 'modify') == 'modify'] @@ -51,6 +53,12 @@ def patch_xhtml(xhtml: str, patches: List[Dict[str, str]]) -> str: if el is not None: resolved_modifies.append((el, p)) + resolved_replacements = [] + for p in replace_patches: + el = _find_element_by_xpath(soup, p['xhtml_xpath']) + if el is not None: + resolved_replacements.append((el, p)) + # 1단계: delete for element in resolved_deletes: element.decompose() @@ -59,7 +67,11 @@ def patch_xhtml(xhtml: str, patches: List[Dict[str, str]]) -> str: for anchor, patch in resolved_inserts: _insert_element_resolved(soup, anchor, patch['new_element_xhtml']) - # 3단계: modify + # 3단계: replace fragment + for element, patch in resolved_replacements: + _replace_element_resolved(element, patch['new_element_xhtml']) + + # 4단계: modify for element, patch in resolved_modifies: if 'new_inner_xhtml' in patch: old_text = patch.get('old_plain_text', '') @@ -160,6 +172,22 @@ def _replace_inner_html(element: Tag, new_inner_xhtml: str): element.append(child.extract()) +def _replace_element_resolved(element: Tag, new_html: str): + """요소 전체를 새 fragment로 교체한다.""" + new_content = BeautifulSoup(new_html, 'html.parser') + replacements = [child.extract() for child in list(new_content.children)] + if not replacements: + element.decompose() + return + + first = replacements[0] + element.replace_with(first) + prev = first + for child in replacements[1:]: + prev.insert_after(child) + prev = child + + def _find_element_by_xpath(soup: BeautifulSoup, xpath: str): """간이 XPath로 요소를 찾는다. diff --git a/confluence-mdx/bin/reverse_sync_cli.py b/confluence-mdx/bin/reverse_sync_cli.py index 90710e098..20d32a68c 100755 --- a/confluence-mdx/bin/reverse_sync_cli.py +++ b/confluence-mdx/bin/reverse_sync_cli.py @@ -354,6 +354,7 @@ def run_verify( from reverse_sync.sidecar import ( SidecarEntry, SidecarChildEntry, generate_sidecar_mapping, build_mdx_to_sidecar_index, build_xpath_to_mapping, + build_sidecar, ) # forward converter가 생성한 mapping.yaml에서 lost_info를 보존 existing_mapping = var_dir / 'mapping.yaml' @@ -386,12 +387,14 @@ def run_verify( children=children, )) mdx_to_sidecar = build_mdx_to_sidecar_index(sidecar_entries) + roundtrip_sidecar = build_sidecar(xhtml, original_mdx, page_id=page_id) xpath_to_mapping = build_xpath_to_mapping(original_mappings) # Step 4: XHTML 패치 → patched.xhtml 저장 patches = build_patches(changes, original_blocks, improved_blocks, original_mappings, mdx_to_sidecar, xpath_to_mapping, - alignment, page_lost_info=page_lost_info) + alignment, page_lost_info=page_lost_info, + roundtrip_sidecar=roundtrip_sidecar) patched_xhtml = patch_xhtml(xhtml, patches) (var_dir / 'reverse-sync.patched.xhtml').write_text(patched_xhtml) diff --git a/confluence-mdx/tests/test_reverse_sync_cli.py b/confluence-mdx/tests/test_reverse_sync_cli.py index 39ca88f3c..a9d79eff0 100644 --- a/confluence-mdx/tests/test_reverse_sync_cli.py +++ b/confluence-mdx/tests/test_reverse_sync_cli.py @@ -567,8 +567,8 @@ def testbuild_patches_index_mapping(): assert len(patches) == 1 assert patches[0]['xhtml_xpath'] == 'p[1]' - assert patches[0]['old_plain_text'] == 'Old text.' - assert patches[0]['new_inner_xhtml'] == 'New text.' + assert patches[0]['action'] == 'replace_fragment' + assert patches[0]['new_element_xhtml'] == '

New text.

' def testbuild_patches_skips_non_content(): @@ -787,7 +787,12 @@ def testbuild_patches_table_block(): from reverse_sync.mdx_block_parser import MdxBlock from reverse_sync.block_diff import BlockChange from reverse_sync.mapping_recorder import BlockMapping - from reverse_sync.sidecar import SidecarEntry + from reverse_sync.sidecar import ( + DocumentEnvelope, + RoundtripSidecar, + SidecarBlock, + SidecarEntry, + ) old_table = '\n\n
\n**Databased Access Control**\n
\n' new_table = '\n\n
\n**Database Access Control**\n
\n' @@ -809,19 +814,23 @@ def testbuild_patches_table_block(): mdx_to_sidecar = { 0: SidecarEntry(xhtml_xpath='table[1]', xhtml_type='table', mdx_blocks=[0]), } + roundtrip_sidecar = RoundtripSidecar( + page_id='test', + blocks=[SidecarBlock(0, 'table[1]', '...
', 'hash1', (1, 5))], + separators=[], + document_envelope=DocumentEnvelope(prefix='', suffix='\n'), + ) xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} patches = build_patches(changes, original_blocks, improved_blocks, mappings, - mdx_to_sidecar, xpath_to_mapping) + mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar) assert len(patches) == 1 assert patches[0]['xhtml_xpath'] == 'table[1]' - assert patches[0]['old_plain_text'] == 'Databased Access Control' - # bold content가 변경되어 has_inline_format_change()가 True → - # new_inner_xhtml 패치가 생성됨 (outer 없이 innerHTML만 포함) - assert 'new_inner_xhtml' in patches[0] - assert 'Database Access Control' in patches[0]['new_inner_xhtml'] - assert not patches[0]['new_inner_xhtml'].startswith('Database Access Control' in patches[0]['new_element_xhtml'] + assert patches[0]['new_element_xhtml'].startswith(' SidecarEntry: return SidecarEntry(xhtml_xpath=xpath, xhtml_type='paragraph', mdx_blocks=mdx_blocks) +def _make_roundtrip_sidecar(blocks): + return RoundtripSidecar( + page_id='test', + blocks=blocks, + separators=['\n'] * (len(blocks) - 1) if len(blocks) > 1 else [], + document_envelope=DocumentEnvelope(prefix='', suffix='\n'), + ) + + # ── Helper 함수 테스트 ── @@ -281,8 +295,7 @@ def test_path6_sidecar_match_text_mismatch_remapping(self): assert len(patches) == 1 assert patches[0]['xhtml_xpath'] == 'p[1]' - # 직접 매칭 + text_transfer 사용 - def test_direct_match_with_transfer(self): + def test_clean_paragraph_generates_replace_fragment(self): m1 = _make_mapping('m1', 'hello world', xpath='p[1]') mappings = [m1] xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} @@ -295,24 +308,118 @@ def test_direct_match_with_transfer(self): mappings, mdx_to_sidecar, xpath_to_mapping) assert len(patches) == 1 - # R1: 항상 inner XHTML 재생성 - assert 'earth' in patches[0]['new_inner_xhtml'] + assert patches[0]['action'] == 'replace_fragment' + assert patches[0]['new_element_xhtml'] == '

hello earth

' + + def test_paragraph_with_preserved_anchor_uses_legacy_modify_patch(self): + m1 = _make_mapping( + 'm1', + 'hello world', + xpath='p[1]', + ) + m1.xhtml_text = '

hello

' + mappings = [m1] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + + change = _make_change(0, 'hello world', 'hello earth') + mdx_to_sidecar = self._setup_sidecar('p[1]', 0) + + patches = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping) + + assert len(patches) == 1 + assert patches[0].get('action', 'modify') == 'modify' + assert patches[0]['new_plain_text'] == 'hello earth' + + def test_roundtrip_sidecar_paragraph_without_anchors_uses_replace_fragment(self): + m1 = _make_mapping('m1', 'hello world', xpath='p[1]') + mappings = [m1] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + mdx_to_sidecar = self._setup_sidecar('p[1]', 0) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock( + 0, 'p[1]', '

hello world

', 'hash1', (1, 1), + reconstruction={'kind': 'paragraph', 'old_plain_text': 'hello world', 'anchors': []}, + ) + ]) + + change = _make_change(0, 'hello world', 'hello earth') + patches = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar) + + assert len(patches) == 1 + assert patches[0]['action'] == 'replace_fragment' + assert patches[0]['new_element_xhtml'] == '

hello earth

' - # 직접 매칭 + text_transfer 미사용 (텍스트 동일) - def test_direct_match_no_transfer(self): + def test_roundtrip_sidecar_paragraph_with_anchors_stays_modify(self): m1 = _make_mapping('m1', 'hello world', xpath='p[1]') mappings = [m1] xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + mdx_to_sidecar = self._setup_sidecar('p[1]', 0) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock( + 0, 'p[1]', '

hello world

', 'hash1', (1, 1), + reconstruction={ + 'kind': 'paragraph', + 'old_plain_text': 'hello world', + 'anchors': [{'anchor_id': 'a1'}], + }, + ) + ]) change = _make_change(0, 'hello world', 'hello earth') + patches = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar) + + assert len(patches) == 1 + assert patches[0].get('action', 'modify') == 'modify' + assert 'new_plain_text' in patches[0] or 'new_inner_xhtml' in patches[0] + + def test_roundtrip_sidecar_without_reconstruction_stays_modify(self): + m1 = _make_mapping('m1', 'hello world', xpath='p[1]') + mappings = [m1] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} mdx_to_sidecar = self._setup_sidecar('p[1]', 0) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock(0, 'p[1]', '

hello world

', 'hash1', (1, 1), reconstruction=None) + ]) + change = _make_change(0, 'hello world', 'hello earth') patches = build_patches( [change], [change.old_block], [change.new_block], - mappings, mdx_to_sidecar, xpath_to_mapping) + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar) + + assert len(patches) == 1 + assert patches[0].get('action', 'modify') == 'modify' + assert 'new_element_xhtml' not in patches[0] + + def test_roundtrip_sidecar_non_paragraph_reconstruction_stays_modify(self): + m1 = _make_mapping('m1', 'hello world', xpath='p[1]') + mappings = [m1] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + mdx_to_sidecar = self._setup_sidecar('p[1]', 0) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock( + 0, 'p[1]', '

hello world

', 'hash1', (1, 1), + reconstruction={'kind': 'html_block', 'old_plain_text': 'hello world'}, + ) + ]) + + change = _make_change(0, 'hello world', 'hello earth') + patches = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar) assert len(patches) == 1 - assert patches[0]['new_inner_xhtml'] == 'hello earth' + assert patches[0].get('action', 'modify') == 'modify' + assert 'new_element_xhtml' not in patches[0] # NON_CONTENT_TYPES 스킵 def test_skips_non_content_types(self): @@ -331,9 +438,8 @@ def test_skips_non_content_types(self): assert patches == [] - # Inline format 변경 → new_inner_xhtml 패치 생성 - def test_direct_inline_code_added_generates_inner_xhtml(self): - """paragraph에서 backtick이 추가되면 new_inner_xhtml 패치를 생성한다.""" + def test_direct_inline_code_added_generates_replace_fragment(self): + """simple paragraph는 inline formatting 변화도 fragment replacement를 사용한다.""" m1 = _make_mapping('m1', 'QueryPie는 https://example.com/과 같은 URL', xpath='p[1]') mappings = [m1] xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} @@ -350,12 +456,12 @@ def test_direct_inline_code_added_generates_inner_xhtml(self): mappings, mdx_to_sidecar, xpath_to_mapping) assert len(patches) == 1 - assert 'new_inner_xhtml' in patches[0] - assert 'https://example.com/' in patches[0]['new_inner_xhtml'] - assert 'new_plain_text' not in patches[0] + assert patches[0]['action'] == 'replace_fragment' + assert 'https://example.com/' in patches[0]['new_element_xhtml'] + assert patches[0]['new_element_xhtml'].startswith('

') - def test_direct_text_only_change_uses_inner_xhtml_patch(self): - """R1: 텍스트만 바뀌어도 inner XHTML 재생성 패치를 사용한다.""" + def test_direct_text_only_change_uses_replace_fragment(self): + """Phase 2: simple paragraph의 기본 경로는 whole-fragment replacement다.""" m1 = _make_mapping('m1', 'hello world', xpath='p[1]') mappings = [m1] xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} @@ -368,8 +474,8 @@ def test_direct_text_only_change_uses_inner_xhtml_patch(self): mappings, mdx_to_sidecar, xpath_to_mapping) assert len(patches) == 1 - assert 'new_inner_xhtml' in patches[0] - assert 'new_plain_text' not in patches[0] + assert patches[0]['action'] == 'replace_fragment' + assert patches[0]['new_element_xhtml'] == '

hello earth

' # sidecar 미스 → skip (텍스트 포함 검색 폴백 제거됨) def test_multiple_changes_grouped_to_containing(self): @@ -391,7 +497,7 @@ def test_multiple_changes_grouped_to_containing(self): assert len(patches) == 0 def test_direct_heading_inline_code_added(self): - """heading에서 backtick 추가 시 new_inner_xhtml 패치를 생성한다.""" + """heading은 fragment replacement를 사용한다.""" m1 = _make_mapping('m1', 'kubectl 명령어 가이드', xpath='h2[1]', type_='heading') mappings = [m1] @@ -399,8 +505,8 @@ def test_direct_heading_inline_code_added(self): change = _make_change( 0, - '## kubectl 명령어 가이드\n', - '## `kubectl` 명령어 가이드\n', + '### kubectl 명령어 가이드\n', + '### `kubectl` 명령어 가이드\n', type_='heading', ) mdx_to_sidecar = self._setup_sidecar('h2[1]', 0) @@ -410,11 +516,11 @@ def test_direct_heading_inline_code_added(self): mappings, mdx_to_sidecar, xpath_to_mapping) assert len(patches) == 1 - assert 'new_inner_xhtml' in patches[0] - assert 'kubectl' in patches[0]['new_inner_xhtml'] + assert patches[0]['action'] == 'replace_fragment' + assert patches[0]['new_element_xhtml'] == '

kubectl 명령어 가이드

' - def test_direct_bold_added_generates_inner_xhtml(self): - """paragraph에서 bold가 추가되면 new_inner_xhtml 패치를 생성한다.""" + def test_direct_bold_added_generates_replace_fragment(self): + """simple paragraph에서 bold가 추가되면 fragment replacement를 생성한다.""" m1 = _make_mapping('m1', '중요한 설정입니다', xpath='p[1]') mappings = [m1] xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} @@ -431,8 +537,80 @@ def test_direct_bold_added_generates_inner_xhtml(self): mappings, mdx_to_sidecar, xpath_to_mapping) assert len(patches) == 1 - assert 'new_inner_xhtml' in patches[0] - assert '중요한' in patches[0]['new_inner_xhtml'] + assert patches[0]['action'] == 'replace_fragment' + assert patches[0]['new_element_xhtml'] == '

중요한 설정입니다

' + + def test_markdown_table_change_generates_replace_fragment(self): + m1 = _make_mapping('m1', 'Header1 Header2 old_val other', xpath='table[1]', + type_='table') + mappings = [m1] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock(0, 'table[1]', '
', 'hash1', (1, 3)) + ]) + + change = _make_change( + 0, + '| Header1 | Header2 |\n| --- | --- |\n| old_val | other |', + '| Header1 | Header2 |\n| --- | --- |\n| new_val | other |', + ) + mdx_to_sidecar = self._setup_sidecar('table[1]', 0) + + patches = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar) + + assert len(patches) == 1 + assert patches[0]['action'] == 'replace_fragment' + assert '' in patches[0]['new_element_xhtml'] + assert 'new_val' in patches[0]['new_element_xhtml'] + + def test_markdown_table_without_roundtrip_sidecar_keeps_row_patch_path(self): + m1 = _make_mapping('m1', 'Header1 Header2 old_val other', xpath='table[1]', + type_='table') + mappings = [m1] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + mdx_to_sidecar = self._setup_sidecar('table[1]', 0) + + change = _make_change( + 0, + '| Header1 | Header2 |\n| --- | --- |\n| old_val | other |', + '| Header1 | Header2 |\n| --- | --- |\n| new_val | other |', + ) + + patches = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping) + + assert len(patches) == 1 + assert patches[0].get('action', 'modify') == 'modify' + assert patches[0]['new_plain_text'] != patches[0]['old_plain_text'] + + def test_delete_add_pair_clean_heading_uses_replace_fragment(self): + m1 = _make_mapping('m1', 'Old Title', xpath='h2[1]', type_='heading') + mappings = [m1] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + mdx_to_sidecar = self._setup_sidecar('h2[1]', 0) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock(0, 'h2[1]', '

Old Title

', 'hash1', (1, 1)) + ]) + + old_block = _make_block('## Old Title\n', 'heading') + new_block = _make_block('### New Title\n', 'heading') + changes = [ + BlockChange(index=0, change_type='deleted', old_block=old_block, new_block=None), + BlockChange(index=0, change_type='added', old_block=None, new_block=new_block), + ] + + patches = build_patches( + changes, [old_block], [new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar) + + assert len(patches) == 1 + assert patches[0]['action'] == 'replace_fragment' + assert patches[0]['new_element_xhtml'] == '

New Title

' # ── build_table_row_patches ── diff --git a/confluence-mdx/tests/test_reverse_sync_reconstruction_goldens.py b/confluence-mdx/tests/test_reverse_sync_reconstruction_goldens.py new file mode 100644 index 000000000..650c019ee --- /dev/null +++ b/confluence-mdx/tests/test_reverse_sync_reconstruction_goldens.py @@ -0,0 +1,91 @@ +"""Phase 2 clean block reconstruction golden 테스트.""" +from pathlib import Path + +import pytest +import yaml + +from reverse_sync.block_diff import diff_blocks +from reverse_sync.mapping_recorder import record_mapping +from reverse_sync.mdx_block_parser import parse_mdx_blocks +from reverse_sync.patch_builder import build_patches +from reverse_sync.sidecar import ( + SidecarEntry, + build_mdx_to_sidecar_index, + build_sidecar, + build_xpath_to_mapping, + generate_sidecar_mapping, +) +from reverse_sync.xhtml_normalizer import normalize_fragment +from reverse_sync.xhtml_patcher import patch_xhtml + + +TESTCASES = Path(__file__).parent / "testcases" + + +def _run_pipeline_with_sidecar(xhtml: str, original_mdx: str, improved_mdx: str): + original_blocks = parse_mdx_blocks(original_mdx) + improved_blocks = parse_mdx_blocks(improved_mdx) + changes, alignment = diff_blocks(original_blocks, improved_blocks) + + mappings = record_mapping(xhtml) + roundtrip_sidecar = build_sidecar(xhtml, original_mdx) + + sidecar_yaml = generate_sidecar_mapping(xhtml, original_mdx) + sidecar_data = yaml.safe_load(sidecar_yaml) or {} + sidecar_entries = [ + SidecarEntry( + xhtml_xpath=item['xhtml_xpath'], + xhtml_type=item.get('xhtml_type', ''), + mdx_blocks=item.get('mdx_blocks', []), + mdx_line_start=item.get('mdx_line_start', 0), + mdx_line_end=item.get('mdx_line_end', 0), + ) + for item in sidecar_data.get('mappings', []) + ] + mdx_to_sidecar = build_mdx_to_sidecar_index(sidecar_entries) + xpath_to_mapping = build_xpath_to_mapping(mappings) + + patches = build_patches( + changes, + original_blocks, + improved_blocks, + mappings, + mdx_to_sidecar, + xpath_to_mapping, + alignment, + roundtrip_sidecar=roundtrip_sidecar, + ) + return patch_xhtml(xhtml, patches) + + +def _load_testcase(case_id: str): + case_dir = TESTCASES / case_id + return { + 'xhtml': (case_dir / 'page.xhtml').read_text(encoding='utf-8'), + 'original_mdx': (case_dir / 'original.mdx').read_text(encoding='utf-8'), + 'improved_mdx': (case_dir / 'improved.mdx').read_text(encoding='utf-8'), + 'expected': (case_dir / 'expected.reverse-sync.patched.xhtml').read_text( + encoding='utf-8' + ), + } + + +class TestSimpleModifiedGoldens: + @pytest.fixture(autouse=True) + def require_testcases(self): + if not TESTCASES.is_dir(): + pytest.skip("testcases directory not found") + + def test_544211126_paragraph_change(self): + case = _load_testcase('544211126') + result = _run_pipeline_with_sidecar( + case['xhtml'], case['original_mdx'], case['improved_mdx'] + ) + assert normalize_fragment(result) == normalize_fragment(case['expected']) + + def test_544178405_paragraph_and_table_change(self): + case = _load_testcase('544178405') + result = _run_pipeline_with_sidecar( + case['xhtml'], case['original_mdx'], case['improved_mdx'] + ) + assert normalize_fragment(result) == normalize_fragment(case['expected']) diff --git a/confluence-mdx/tests/test_reverse_sync_xhtml_patcher.py b/confluence-mdx/tests/test_reverse_sync_xhtml_patcher.py index c3995714e..724eee9c2 100644 --- a/confluence-mdx/tests/test_reverse_sync_xhtml_patcher.py +++ b/confluence-mdx/tests/test_reverse_sync_xhtml_patcher.py @@ -139,7 +139,52 @@ def test_compound_xpath_adf_extension(): assert 'Second para.' in result -# --- Phase 2: delete/insert 테스트 --- +# --- Phase 2: replace_fragment / delete / insert 테스트 --- + + +class TestReplaceFragmentPatch: + def test_replace_simple_paragraph(self): + xhtml = '

Old text

Keep

' + patches = [{ + 'action': 'replace_fragment', + 'xhtml_xpath': 'p[1]', + 'new_element_xhtml': '

New text

', + }] + result = patch_xhtml(xhtml, patches) + assert '

New text

' in result + assert '

Keep

' in result + assert 'Old text' not in result + + def test_replace_code_macro_restores_cdata(self): + xhtml = '' + patches = [{ + 'action': 'replace_fragment', + 'xhtml_xpath': 'ac:structured-macro[1]', + 'new_element_xhtml': ( + '' + 'SELECT * FROM test;' + '' + ), + }] + result = patch_xhtml(xhtml, patches) + assert '' in result + + def test_replace_fragment_preserves_inserted_siblings(self): + xhtml = '

Title

Old text

' + patches = [ + { + 'action': 'insert', + 'after_xpath': 'h1[1]', + 'new_element_xhtml': '

Inserted

', + }, + { + 'action': 'replace_fragment', + 'xhtml_xpath': 'h1[1]', + 'new_element_xhtml': '

Renamed

', + }, + ] + result = patch_xhtml(xhtml, patches) + assert '

Renamed

Inserted

Old text

' in result class TestDeletePatch: