Skip to content
135 changes: 6 additions & 129 deletions confluence-mdx/bin/reverse_sync/list_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,73 +8,9 @@
from reverse_sync.lost_info_patcher import apply_lost_info
from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_inner_xhtml
from reverse_sync.text_transfer import transfer_text_changes
from mdx_to_storage.inline import convert_inline
from text_utils import normalize_mdx_to_plain, collapse_ws, strip_list_marker, strip_for_compare
from text_utils import normalize_mdx_to_plain


def _resolve_child_mapping(
old_plain: str,
parent_mapping: BlockMapping,
id_to_mapping: Dict[str, BlockMapping],
) -> Optional[BlockMapping]:
"""Parent mapping의 children 중에서 old_plain과 일치하는 child를 찾는다."""
old_norm = collapse_ws(old_plain)
if not old_norm:
return None

# 1차: collapse_ws 완전 일치
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child and collapse_ws(child.xhtml_plain_text) == old_norm:
return child

# 2차: 공백 무시 완전 일치
old_nospace = re.sub(r'\s+', '', old_norm)
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
if child_nospace == old_nospace:
return child

# 3차: 리스트 마커 제거 후 비교 (XHTML child가 "- text" 형식인 경우)
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
child_unmarked = strip_list_marker(child_nospace)
if child_unmarked != child_nospace and old_nospace == child_unmarked:
return child

# 4차: MDX 쪽 리스트 마커 제거 후 비교
old_unmarked = strip_list_marker(old_nospace)
if old_unmarked != old_nospace:
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
if old_unmarked == child_nospace:
return child

# 5차: 앞부분 prefix 일치 (emoticon/lost_info 차이 허용)
# XHTML에서 ac:emoticon이 텍스트로 치환되지 않는 경우,
# 전체 문자열 비교가 실패할 수 있으므로 앞부분 20자로 비교한다.
# 단, old_nospace가 child보다 2배 이상 긴 경우는 잘못된 매칭으로 판단한다
# (callout 전체 텍스트가 내부 paragraph 첫 줄과 prefix를 공유하는 경우 방지).
_PREFIX_LEN = 20
if len(old_nospace) >= _PREFIX_LEN:
old_prefix = old_nospace[:_PREFIX_LEN]
for child_id in parent_mapping.children:
child = id_to_mapping.get(child_id)
if child:
child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text)
if (len(child_nospace) >= _PREFIX_LEN
and child_nospace[:_PREFIX_LEN] == old_prefix
and len(old_nospace) <= len(child_nospace) * 2):
return child

return None


def split_list_items(content: str) -> List[str]:
"""리스트 블록 content를 개별 항목으로 분리한다."""
Expand Down Expand Up @@ -174,10 +110,9 @@ def build_list_item_patches(
id_to_mapping: Optional[Dict[str, BlockMapping]] = None,
mapping_lost_info: Optional[Dict[str, dict]] = None,
) -> List[Dict[str, str]]:
"""리스트 블록의 각 항목을 개별 매핑과 대조하여 패치를 생성한다.
"""리스트 블록 변경을 XHTML에 패치한다.

R2: child 매칭 성공 시 항상 child inner XHTML 재생성,
child 매칭 실패 시 전체 리스트 inner XHTML 재생성.
sidecar에서 parent mapping을 찾아 전체 리스트 inner XHTML을 재생성한다.
"""
old_items = split_list_items(change.old_block.content)
new_items = split_list_items(change.new_block.content)
Expand All @@ -188,72 +123,14 @@ def build_list_item_patches(
parent_mapping = find_mapping_by_sidecar(
change.index, mdx_to_sidecar, xpath_to_mapping)

# sidecar에 없으면 텍스트 포함 검색으로 parent 찾기
if parent_mapping is None:
from reverse_sync.patch_builder import _find_containing_mapping
old_plain_all = normalize_mdx_to_plain(
change.old_block.content, 'list')
parent_mapping = _find_containing_mapping(
old_plain_all, mappings, used_ids or set())

# 항목 수 불일치 → 전체 리스트 재생성
# 항목 수 불일치 또는 내용 변경 → 전체 리스트 재생성
if len(old_items) != len(new_items):
return _regenerate_list_from_parent(
change, parent_mapping, used_ids, mapping_lost_info)

patches = []
for old_item, new_item in zip(old_items, new_items):
if old_item == new_item:
continue
old_plain = normalize_mdx_to_plain(old_item, 'list')

# parent mapping의 children에서 child 해석 시도
mapping = None
if parent_mapping is not None and parent_mapping.children and id_to_mapping is not None:
mapping = _resolve_child_mapping(
old_plain, parent_mapping, id_to_mapping)

if mapping is None:
# R2: child 매칭 실패 → 전체 리스트 재생성
if old_item != new_item:
return _regenerate_list_from_parent(
change, parent_mapping, used_ids, mapping_lost_info)

# child 매칭 성공: child inner XHTML 재생성
new_plain = normalize_mdx_to_plain(new_item, 'list')

# 멱등성 체크: push 후 XHTML이 이미 업데이트된 경우 건너뜀
if (collapse_ws(old_plain) != collapse_ws(mapping.xhtml_plain_text)
and collapse_ws(new_plain) == collapse_ws(mapping.xhtml_plain_text)):
continue

if used_ids is not None:
used_ids.add(mapping.block_id)

# 재생성 시 소실되는 XHTML 요소 포함 시 텍스트 전이로 폴백
if '<ac:image' in mapping.xhtml_text or '<span style=' in mapping.xhtml_text:
xhtml_text = transfer_text_changes(
old_plain, new_plain, mapping.xhtml_plain_text)
patches.append({
'xhtml_xpath': mapping.xhtml_xpath,
'old_plain_text': mapping.xhtml_plain_text,
'new_plain_text': xhtml_text,
})
continue

new_item_text = re.sub(r'^[-*+]\s+', '', new_item.strip())
new_item_text = re.sub(r'^\d+\.\s+', '', new_item_text)
new_inner = convert_inline(new_item_text)

# 블록 레벨 lost_info 적용
if mapping_lost_info:
block_lost = mapping_lost_info.get(mapping.block_id, {})
if block_lost:
new_inner = apply_lost_info(new_inner, block_lost)

patches.append({
'xhtml_xpath': mapping.xhtml_xpath,
'old_plain_text': mapping.xhtml_plain_text,
'new_inner_xhtml': new_inner,
})

return patches
return []
82 changes: 11 additions & 71 deletions confluence-mdx/bin/reverse_sync/patch_builder.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
"""패치 빌더 — MDX diff 변경과 XHTML 매핑을 결합하여 XHTML 패치를 생성."""
import re
from typing import Dict, List, Optional

from reverse_sync.block_diff import BlockChange, NON_CONTENT_TYPES
from reverse_sync.mapping_recorder import BlockMapping
from mdx_to_storage.parser import Block as MdxBlock
from text_utils import (
normalize_mdx_to_plain, collapse_ws,
strip_for_compare,
)
from reverse_sync.text_transfer import transfer_text_changes
from reverse_sync.sidecar import find_mapping_by_sidecar, SidecarEntry
from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings
from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml
from reverse_sync.list_patcher import (
build_list_item_patches,
_resolve_child_mapping,
)
from reverse_sync.table_patcher import (
build_table_row_patches,
Expand All @@ -25,41 +22,6 @@
)


_BLOCK_MARKER_RE = re.compile(r'#{1,6}|\d+\.')


def _strip_block_markers(text: str) -> str:
"""containment 비교를 위해 heading/list 마커를 제거한다."""
return _BLOCK_MARKER_RE.sub('', text)


def _find_containing_mapping(
old_plain: str,
mappings: List[BlockMapping],
used_ids: set,
) -> Optional[BlockMapping]:
"""old_plain 텍스트를 포함하는 XHTML 매핑을 찾는다 (sidecar 폴백)."""
old_norm = collapse_ws(old_plain)
if not old_norm or len(old_norm) < 5:
return None
old_nospace = strip_for_compare(old_norm)
for m in mappings:
if m.block_id in used_ids:
continue
m_nospace = strip_for_compare(m.xhtml_plain_text)
if m_nospace and old_nospace in m_nospace:
return m
# 폴백: heading/list 마커를 제거하고 재시도
old_stripped = _strip_block_markers(old_nospace)
for m in mappings:
if m.block_id in used_ids:
continue
m_stripped = _strip_block_markers(strip_for_compare(m.xhtml_plain_text))
if m_stripped and old_stripped in m_stripped:
return m
return None


def _flush_containing_changes(
containing_changes: dict,
used_ids: 'set | None' = None,
Expand Down Expand Up @@ -92,7 +54,6 @@ def _resolve_mapping_for_change(
used_ids: set,
mdx_to_sidecar: Dict[int, SidecarEntry],
xpath_to_mapping: Dict[str, 'BlockMapping'],
id_to_mapping: Dict[str, BlockMapping],
) -> tuple:
"""변경에 대한 매핑과 처리 전략을 결정한다.

Expand All @@ -106,44 +67,23 @@ def _resolve_mapping_for_change(
mapping = find_mapping_by_sidecar(
change.index, mdx_to_sidecar, xpath_to_mapping)

# Parent mapping → child 해석 시도
if mapping is not None and mapping.children:
child = _resolve_child_mapping(old_plain, mapping, id_to_mapping)
if child is not None:
# callout 블록은 direct 전략 시 _convert_callout_inner가
# <li><p> 구조를 생성할 수 없으므로 containing 전략 사용
if change.old_block.type == 'callout':
return ('containing', mapping)
return ('direct', child)
# 블록 텍스트가 parent에 포함되는지 확인
_old_ns = strip_for_compare(old_plain)
_map_ns = strip_for_compare(mapping.xhtml_plain_text)
if _old_ns and _map_ns and _old_ns not in _map_ns:
if change.old_block.type == 'list':
return ('list', mapping)
return ('containing', mapping)

if mapping is None:
# 폴백: 텍스트 포함 검색으로 containing mapping 찾기
containing = _find_containing_mapping(old_plain, mappings, used_ids)
if containing is not None:
return ('containing', containing)
if change.old_block.type == 'list':
return ('list', None)
if is_markdown_table(change.old_block.content):
return ('table', None)
return ('skip', None)

# 매핑 텍스트에 old_plain이 포함되지 않으면 더 나은 매핑 찾기
if not mapping.children:
old_nospace = strip_for_compare(old_plain)
map_nospace = strip_for_compare(mapping.xhtml_plain_text)
if old_nospace and map_nospace and old_nospace not in map_nospace:
better = _find_containing_mapping(old_plain, mappings, used_ids)
if better is not None:
return ('containing', better)
if change.old_block.type == 'list':
return ('list', mapping)
# callout 블록은 항상 containing 전략 사용
# (_convert_callout_inner가 <li><p> 구조를 생성할 수 없으므로)
if change.old_block.type == 'callout':
return ('containing', mapping)

# Parent mapping이 children을 가지면 containing 전략으로 위임
if mapping.children:
if change.old_block.type == 'list':
return ('list', mapping)
return ('containing', mapping)

# list 블록은 list 전략 사용 (direct 교체 시 <ac:image> 등 Confluence 태그 손실 방지)
if change.old_block.type == 'list':
Expand Down Expand Up @@ -257,7 +197,7 @@ def _mark_used(block_id: str, m: BlockMapping):

strategy, mapping = _resolve_mapping_for_change(
change, old_plain, mappings, used_ids,
mdx_to_sidecar, xpath_to_mapping, id_to_mapping)
mdx_to_sidecar, xpath_to_mapping)

if strategy == 'skip':
continue
Expand Down
1 change: 1 addition & 0 deletions confluence-mdx/bin/reverse_sync/roundtrip_verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def _normalize_trailing_blank_lines(text: str) -> str:
return stripped + '\n' if stripped else text



def _apply_minimal_normalizations(text: str) -> str:
"""항상 적용하는 최소 정규화 (strict/lenient 모드 공통).

Expand Down
Loading
Loading