diff --git a/confluence-mdx/bin/reverse_sync/list_patcher.py b/confluence-mdx/bin/reverse_sync/list_patcher.py index d70e3daff..c0f663914 100644 --- a/confluence-mdx/bin/reverse_sync/list_patcher.py +++ b/confluence-mdx/bin/reverse_sync/list_patcher.py @@ -8,73 +8,9 @@ from reverse_sync.lost_info_patcher import apply_lost_info from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_inner_xhtml from reverse_sync.text_transfer import transfer_text_changes -from mdx_to_storage.inline import convert_inline -from text_utils import normalize_mdx_to_plain, collapse_ws, strip_list_marker, strip_for_compare +from text_utils import normalize_mdx_to_plain -def _resolve_child_mapping( - old_plain: str, - parent_mapping: BlockMapping, - id_to_mapping: Dict[str, BlockMapping], -) -> Optional[BlockMapping]: - """Parent mapping의 children 중에서 old_plain과 일치하는 child를 찾는다.""" - old_norm = collapse_ws(old_plain) - if not old_norm: - return None - - # 1차: collapse_ws 완전 일치 - for child_id in parent_mapping.children: - child = id_to_mapping.get(child_id) - if child and collapse_ws(child.xhtml_plain_text) == old_norm: - return child - - # 2차: 공백 무시 완전 일치 - old_nospace = re.sub(r'\s+', '', old_norm) - for child_id in parent_mapping.children: - child = id_to_mapping.get(child_id) - if child: - child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text) - if child_nospace == old_nospace: - return child - - # 3차: 리스트 마커 제거 후 비교 (XHTML child가 "- text" 형식인 경우) - for child_id in parent_mapping.children: - child = id_to_mapping.get(child_id) - if child: - child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text) - child_unmarked = strip_list_marker(child_nospace) - if child_unmarked != child_nospace and old_nospace == child_unmarked: - return child - - # 4차: MDX 쪽 리스트 마커 제거 후 비교 - old_unmarked = strip_list_marker(old_nospace) - if old_unmarked != old_nospace: - for child_id in parent_mapping.children: - child = id_to_mapping.get(child_id) - if child: - child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text) - if old_unmarked == child_nospace: - return child - - # 5차: 앞부분 prefix 일치 (emoticon/lost_info 차이 허용) - # XHTML에서 ac:emoticon이 텍스트로 치환되지 않는 경우, - # 전체 문자열 비교가 실패할 수 있으므로 앞부분 20자로 비교한다. - # 단, old_nospace가 child보다 2배 이상 긴 경우는 잘못된 매칭으로 판단한다 - # (callout 전체 텍스트가 내부 paragraph 첫 줄과 prefix를 공유하는 경우 방지). - _PREFIX_LEN = 20 - if len(old_nospace) >= _PREFIX_LEN: - old_prefix = old_nospace[:_PREFIX_LEN] - for child_id in parent_mapping.children: - child = id_to_mapping.get(child_id) - if child: - child_nospace = re.sub(r'\s+', '', child.xhtml_plain_text) - if (len(child_nospace) >= _PREFIX_LEN - and child_nospace[:_PREFIX_LEN] == old_prefix - and len(old_nospace) <= len(child_nospace) * 2): - return child - - return None - def split_list_items(content: str) -> List[str]: """리스트 블록 content를 개별 항목으로 분리한다.""" @@ -174,10 +110,9 @@ def build_list_item_patches( id_to_mapping: Optional[Dict[str, BlockMapping]] = None, mapping_lost_info: Optional[Dict[str, dict]] = None, ) -> List[Dict[str, str]]: - """리스트 블록의 각 항목을 개별 매핑과 대조하여 패치를 생성한다. + """리스트 블록 변경을 XHTML에 패치한다. - R2: child 매칭 성공 시 항상 child inner XHTML 재생성, - child 매칭 실패 시 전체 리스트 inner XHTML 재생성. + sidecar에서 parent mapping을 찾아 전체 리스트 inner XHTML을 재생성한다. """ old_items = split_list_items(change.old_block.content) new_items = split_list_items(change.new_block.content) @@ -188,72 +123,14 @@ def build_list_item_patches( parent_mapping = find_mapping_by_sidecar( change.index, mdx_to_sidecar, xpath_to_mapping) - # sidecar에 없으면 텍스트 포함 검색으로 parent 찾기 - if parent_mapping is None: - from reverse_sync.patch_builder import _find_containing_mapping - old_plain_all = normalize_mdx_to_plain( - change.old_block.content, 'list') - parent_mapping = _find_containing_mapping( - old_plain_all, mappings, used_ids or set()) - - # 항목 수 불일치 → 전체 리스트 재생성 + # 항목 수 불일치 또는 내용 변경 → 전체 리스트 재생성 if len(old_items) != len(new_items): return _regenerate_list_from_parent( change, parent_mapping, used_ids, mapping_lost_info) - patches = [] for old_item, new_item in zip(old_items, new_items): - if old_item == new_item: - continue - old_plain = normalize_mdx_to_plain(old_item, 'list') - - # parent mapping의 children에서 child 해석 시도 - mapping = None - if parent_mapping is not None and parent_mapping.children and id_to_mapping is not None: - mapping = _resolve_child_mapping( - old_plain, parent_mapping, id_to_mapping) - - if mapping is None: - # R2: child 매칭 실패 → 전체 리스트 재생성 + if old_item != new_item: return _regenerate_list_from_parent( change, parent_mapping, used_ids, mapping_lost_info) - # child 매칭 성공: child inner XHTML 재생성 - new_plain = normalize_mdx_to_plain(new_item, 'list') - - # 멱등성 체크: push 후 XHTML이 이미 업데이트된 경우 건너뜀 - if (collapse_ws(old_plain) != collapse_ws(mapping.xhtml_plain_text) - and collapse_ws(new_plain) == collapse_ws(mapping.xhtml_plain_text)): - continue - - if used_ids is not None: - used_ids.add(mapping.block_id) - - # 재생성 시 소실되는 XHTML 요소 포함 시 텍스트 전이로 폴백 - if ' str: - """containment 비교를 위해 heading/list 마커를 제거한다.""" - return _BLOCK_MARKER_RE.sub('', text) - - -def _find_containing_mapping( - old_plain: str, - mappings: List[BlockMapping], - used_ids: set, -) -> Optional[BlockMapping]: - """old_plain 텍스트를 포함하는 XHTML 매핑을 찾는다 (sidecar 폴백).""" - old_norm = collapse_ws(old_plain) - if not old_norm or len(old_norm) < 5: - return None - old_nospace = strip_for_compare(old_norm) - for m in mappings: - if m.block_id in used_ids: - continue - m_nospace = strip_for_compare(m.xhtml_plain_text) - if m_nospace and old_nospace in m_nospace: - return m - # 폴백: heading/list 마커를 제거하고 재시도 - old_stripped = _strip_block_markers(old_nospace) - for m in mappings: - if m.block_id in used_ids: - continue - m_stripped = _strip_block_markers(strip_for_compare(m.xhtml_plain_text)) - if m_stripped and old_stripped in m_stripped: - return m - return None - - def _flush_containing_changes( containing_changes: dict, used_ids: 'set | None' = None, @@ -92,7 +54,6 @@ def _resolve_mapping_for_change( used_ids: set, mdx_to_sidecar: Dict[int, SidecarEntry], xpath_to_mapping: Dict[str, 'BlockMapping'], - id_to_mapping: Dict[str, BlockMapping], ) -> tuple: """변경에 대한 매핑과 처리 전략을 결정한다. @@ -106,44 +67,23 @@ def _resolve_mapping_for_change( mapping = find_mapping_by_sidecar( change.index, mdx_to_sidecar, xpath_to_mapping) - # Parent mapping → child 해석 시도 - if mapping is not None and mapping.children: - child = _resolve_child_mapping(old_plain, mapping, id_to_mapping) - if child is not None: - # callout 블록은 direct 전략 시 _convert_callout_inner가 - #
  • 구조를 생성할 수 없으므로 containing 전략 사용 - if change.old_block.type == 'callout': - return ('containing', mapping) - return ('direct', child) - # 블록 텍스트가 parent에 포함되는지 확인 - _old_ns = strip_for_compare(old_plain) - _map_ns = strip_for_compare(mapping.xhtml_plain_text) - if _old_ns and _map_ns and _old_ns not in _map_ns: - if change.old_block.type == 'list': - return ('list', mapping) - return ('containing', mapping) - if mapping is None: - # 폴백: 텍스트 포함 검색으로 containing mapping 찾기 - containing = _find_containing_mapping(old_plain, mappings, used_ids) - if containing is not None: - return ('containing', containing) if change.old_block.type == 'list': return ('list', None) if is_markdown_table(change.old_block.content): return ('table', None) return ('skip', None) - # 매핑 텍스트에 old_plain이 포함되지 않으면 더 나은 매핑 찾기 - if not mapping.children: - old_nospace = strip_for_compare(old_plain) - map_nospace = strip_for_compare(mapping.xhtml_plain_text) - if old_nospace and map_nospace and old_nospace not in map_nospace: - better = _find_containing_mapping(old_plain, mappings, used_ids) - if better is not None: - return ('containing', better) - if change.old_block.type == 'list': - return ('list', mapping) + # callout 블록은 항상 containing 전략 사용 + # (_convert_callout_inner가

  • 구조를 생성할 수 없으므로) + if change.old_block.type == 'callout': + return ('containing', mapping) + + # Parent mapping이 children을 가지면 containing 전략으로 위임 + if mapping.children: + if change.old_block.type == 'list': + return ('list', mapping) + return ('containing', mapping) # list 블록은 list 전략 사용 (direct 교체 시 등 Confluence 태그 손실 방지) if change.old_block.type == 'list': @@ -257,7 +197,7 @@ def _mark_used(block_id: str, m: BlockMapping): strategy, mapping = _resolve_mapping_for_change( change, old_plain, mappings, used_ids, - mdx_to_sidecar, xpath_to_mapping, id_to_mapping) + mdx_to_sidecar, xpath_to_mapping) if strategy == 'skip': continue diff --git a/confluence-mdx/bin/reverse_sync/roundtrip_verifier.py b/confluence-mdx/bin/reverse_sync/roundtrip_verifier.py index ce877b453..a6e707c6a 100644 --- a/confluence-mdx/bin/reverse_sync/roundtrip_verifier.py +++ b/confluence-mdx/bin/reverse_sync/roundtrip_verifier.py @@ -65,6 +65,7 @@ def _normalize_trailing_blank_lines(text: str) -> str: return stripped + '\n' if stripped else text + def _apply_minimal_normalizations(text: str) -> str: """항상 적용하는 최소 정규화 (strict/lenient 모드 공통). diff --git a/confluence-mdx/bin/reverse_sync/sidecar.py b/confluence-mdx/bin/reverse_sync/sidecar.py index 55a22a5de..750295df6 100644 --- a/confluence-mdx/bin/reverse_sync/sidecar.py +++ b/confluence-mdx/bin/reverse_sync/sidecar.py @@ -5,8 +5,8 @@ build_sidecar, verify_sidecar_integrity, write_sidecar, load_sidecar, sha256_text -Mapping lookup (mapping.yaml 기반): - SidecarEntry, load_sidecar_mapping, build_mdx_to_sidecar_index, +Mapping lookup (mapping.yaml v3 기반): + SidecarChildEntry, SidecarEntry, load_sidecar_mapping, build_mdx_to_sidecar_index, build_xpath_to_mapping, generate_sidecar_mapping, find_mapping_by_sidecar """ @@ -244,14 +244,104 @@ def load_sidecar(path: Path) -> RoundtripSidecar: # --------------------------------------------------------------------------- -# Mapping lookup — mapping.yaml 로드 및 인덱스 구축 +# Mapping lookup — mapping.yaml v3 로드 및 인덱스 구축 # --------------------------------------------------------------------------- +# XHTML record_mapping type → 호환 MDX parse_mdx type +_TYPE_COMPAT: Dict[str, frozenset] = { + 'heading': frozenset({'heading'}), + 'paragraph': frozenset({'paragraph'}), + 'list': frozenset({'list'}), + 'code': frozenset({'code_block'}), + 'table': frozenset({'table', 'html_block'}), + 'html_block': frozenset({'callout', 'details', 'html_block', 'blockquote', + 'figure', 'badge', 'hr'}), +} + +# MDX 출력을 생성하지 않는 XHTML 매크로 이름 +_SKIP_MACROS = frozenset({'toc', 'children'}) + + +def _should_skip_xhtml(xm: Any) -> bool: + """toc, children 등 MDX 출력이 없는 XHTML 매크로를 판별한다.""" + xpath = xm.xhtml_xpath + for skip_name in _SKIP_MACROS: + if xpath.startswith(f'macro-{skip_name}'): + return True + return False + + +def _type_compatible(xhtml_type: str, mdx_type: str) -> bool: + """XHTML 타입과 MDX 블록 타입이 호환되는지 확인한다.""" + return mdx_type in _TYPE_COMPAT.get(xhtml_type, frozenset()) + + + +def _align_children( + xm: Any, + mdx_block: Any, + id_to_mapping: Dict[str, Any], +) -> List[Dict]: + """XHTML children과 MDX Block.children을 타입 기반 순차 정렬한다. + + 각 XHTML child에 대응하는 MDX child의 절대 line range를 계산하여 + children entry 목록을 반환한다. + + 절대 line = parent_mdx_block.line_start + child.line_start + (callout의 경우 첫 줄이 이므로 +1 offset이 자연스럽게 적용됨) + """ + child_entries = [] + # NON_CONTENT_TYPES는 런타임에 임포트 (순환 참조 방지) + from reverse_sync.block_diff import NON_CONTENT_TYPES + mdx_children = [c for c in mdx_block.children if c.type not in NON_CONTENT_TYPES] + mdx_child_ptr = 0 + + for child_id in xm.children: + child_mapping = id_to_mapping.get(child_id) + if child_mapping is None: + continue + + if mdx_child_ptr < len(mdx_children): + mdx_child = mdx_children[mdx_child_ptr] + if _type_compatible(child_mapping.type, mdx_child.type): + abs_start = mdx_block.line_start + mdx_child.line_start + abs_end = mdx_block.line_start + mdx_child.line_end + child_entries.append({ + 'xhtml_xpath': child_mapping.xhtml_xpath, + 'xhtml_block_id': child_id, + 'mdx_line_start': abs_start, + 'mdx_line_end': abs_end, + }) + mdx_child_ptr += 1 + continue + + child_entries.append({ + 'xhtml_xpath': child_mapping.xhtml_xpath, + 'xhtml_block_id': child_id, + 'mdx_line_start': 0, + 'mdx_line_end': 0, + }) + + return child_entries + + +@dataclass +class SidecarChildEntry: + """mapping.yaml v3 children 항목.""" + xhtml_xpath: str + xhtml_block_id: str + mdx_line_start: int = 0 + mdx_line_end: int = 0 + + @dataclass class SidecarEntry: xhtml_xpath: str xhtml_type: str mdx_blocks: List[int] = field(default_factory=list) + mdx_line_start: int = 0 + mdx_line_end: int = 0 + children: List[SidecarChildEntry] = field(default_factory=list) def load_sidecar_mapping(mapping_path: str) -> List[SidecarEntry]: @@ -265,10 +355,22 @@ def load_sidecar_mapping(mapping_path: str) -> List[SidecarEntry]: data = yaml.safe_load(path.read_text()) or {} entries = [] for item in data.get('mappings', []): + children = [ + SidecarChildEntry( + xhtml_xpath=ch.get('xhtml_xpath', ''), + xhtml_block_id=ch.get('xhtml_block_id', ''), + mdx_line_start=ch.get('mdx_line_start', 0), + mdx_line_end=ch.get('mdx_line_end', 0), + ) + for ch in item.get('children', []) + ] entries.append(SidecarEntry( xhtml_xpath=item['xhtml_xpath'], xhtml_type=item.get('xhtml_type', ''), mdx_blocks=item.get('mdx_blocks', []), + mdx_line_start=item.get('mdx_line_start', 0), + mdx_line_end=item.get('mdx_line_end', 0), + children=children, )) return entries @@ -309,93 +411,58 @@ def generate_sidecar_mapping( page_id: str = '', lost_infos: dict | None = None, ) -> str: - """XHTML + MDX로부터 mapping.yaml 내용을 생성한다. + """XHTML + MDX로부터 mapping.yaml v3 내용을 생성한다. - Forward converter의 sidecar 생성 로직을 재현한다. - record_mapping()과 parse_mdx_blocks()를 조합하여 텍스트 기반 매칭을 수행한다. + 타입 호환성 기반 순차 정렬(two-pointer)로 XHTML top-level 블록과 + MDX content 블록을 매핑한다. 텍스트 비교 없이 블록 타입만 사용한다. - 순서 + 텍스트 매칭: - 각 XHTML 매핑에 대해 현재 MDX 포인터부터 앞으로 탐색하여 - 정규화된 텍스트가 일치하는 MDX 블록을 찾는다. - 일치하지 않는 XHTML 블록(image, toc, empty paragraph 등)은 - 빈 mdx_blocks로 기록한다. + 타입 불일치 시 XHTML 블록이 MDX 출력을 생성하지 않은 것으로 판단 + (ac:image → figure 없는 MDX, toc 등). MDX 포인터는 유지된다. """ from reverse_sync.mapping_recorder import record_mapping from mdx_to_storage.parser import parse_mdx_blocks - from text_utils import normalize_mdx_to_plain, collapse_ws xhtml_mappings = record_mapping(xhtml) - mdx_blocks = parse_mdx_blocks(mdx) + mdx_blocks_all = parse_mdx_blocks(mdx) - # 콘텐츠 블록만 필터 (frontmatter, empty, import 제외) - entries = [] - mdx_content_indices = [ - i for i, b in enumerate(mdx_blocks) + # MDX 콘텐츠 블록만 필터 (frontmatter, empty, import 제외), 원본 인덱스 보존 + mdx_content_indexed = [ + (i, b) for i, b in enumerate(mdx_blocks_all) if b.type not in NON_CONTENT_TYPES ] - # Empty MDX 블록 중 콘텐츠 영역 내의 것만 매핑 대상으로 추적 - # (frontmatter/import 사이의 빈 줄은 XHTML에 대응하지 않음) - first_content_idx = mdx_content_indices[0] if mdx_content_indices else len(mdx_blocks) - mdx_empty_indices = [ - i for i, b in enumerate(mdx_blocks) - if b.type == 'empty' and i > first_content_idx - ] - empty_ptr = 0 - - # MDX 콘텐츠 블록별 정규화 텍스트를 미리 계산 - mdx_plains = {} - for ci in mdx_content_indices: - b = mdx_blocks[ci] - mdx_plains[ci] = collapse_ws(normalize_mdx_to_plain(b.content, b.type)) - # child mapping은 별도 처리 (parent xpath에 포함) - child_ids = set() + # child IDs 수집 → top-level mapping 필터링 + child_ids: set = set() for m in xhtml_mappings: - for cid in m.children: - child_ids.add(cid) - - # top-level mapping만 매칭 대상 + child_ids.update(m.children) top_mappings = [m for m in xhtml_mappings if m.block_id not in child_ids] - mdx_ptr = 0 # MDX 콘텐츠 인덱스 포인터 - LOOKAHEAD = 5 # 최대 앞으로 탐색할 MDX 블록 수 + # block_id → BlockMapping (children 해석용) + id_to_mapping = {m.block_id: m for m in xhtml_mappings} + + entries = [] + # MDX H1 헤딩(페이지 제목)은 XHTML 본문에 존재하지 않으므로 건너뛴다. + # forward converter는 MDX 첫 줄에 `# <페이지 제목>`을 자동 생성하며, + # 이 블록은 Confluence XHTML의 페이지 제목(본문 외부)에 해당한다. + mdx_ptr = 0 + while (mdx_ptr < len(mdx_content_indexed) + and mdx_content_indexed[mdx_ptr][1].type == 'heading' + and mdx_content_indexed[mdx_ptr][1].content.startswith('# ')): + mdx_ptr += 1 for xm in top_mappings: - xhtml_plain = collapse_ws(xm.xhtml_plain_text) - - # 빈 텍스트 XHTML 블록 — empty MDX 블록과 순차 매핑 - if not xhtml_plain: - if xm.type == 'paragraph': - # 현재 content 포인터의 MDX 인덱스 이후의 empty만 사용 - last_content_idx = ( - mdx_content_indices[mdx_ptr - 1] if mdx_ptr > 0 else -1 - ) - # empty_ptr를 last_content_idx 이후로 전진 - while (empty_ptr < len(mdx_empty_indices) - and mdx_empty_indices[empty_ptr] <= last_content_idx): - empty_ptr += 1 - if empty_ptr < len(mdx_empty_indices): - entries.append({ - 'xhtml_xpath': xm.xhtml_xpath, - 'xhtml_type': xm.type, - 'mdx_blocks': [mdx_empty_indices[empty_ptr]], - }) - empty_ptr += 1 - else: - entries.append({ - 'xhtml_xpath': xm.xhtml_xpath, - 'xhtml_type': xm.type, - 'mdx_blocks': [], - }) - else: - entries.append({ - 'xhtml_xpath': xm.xhtml_xpath, - 'xhtml_type': xm.type, - 'mdx_blocks': [], - }) + # 스킵 매크로 (toc, children 등) + if _should_skip_xhtml(xm): + entries.append({ + 'xhtml_xpath': xm.xhtml_xpath, + 'xhtml_type': xm.type, + 'mdx_blocks': [], + }) continue - if mdx_ptr >= len(mdx_content_indices): + # 빈 텍스트 paragraph XHTML 블록 — MDX 콘텐츠 대응 없음 + # (빈

    는 MDX의 empty 줄에 해당하며 content 블록이 아님) + if not xm.xhtml_plain_text.strip() and xm.type == 'paragraph': entries.append({ 'xhtml_xpath': xm.xhtml_xpath, 'xhtml_type': xm.type, @@ -403,44 +470,42 @@ def generate_sidecar_mapping( }) continue - # 현재 MDX 블록과 텍스트 비교 - matched_at = _find_text_match( - xhtml_plain, mdx_content_indices, mdx_plains, mdx_ptr, LOOKAHEAD) - - if matched_at is not None: - # 매치 위치까지 MDX 포인터 이동 - mdx_ptr = matched_at - mdx_idx = mdx_content_indices[mdx_ptr] - matched_indices = [mdx_idx] - mdx_ptr += 1 - - # children이 있으면 후속 MDX 블록도 이 XHTML 매핑에 대응 - # 단, 다음 top-level XHTML 매핑의 텍스트와 겹치지 않는 범위에서만 - if xm.children: - num_children = _count_child_mdx_blocks( - xm, mdx_content_indices, mdx_plains, - mdx_ptr, top_mappings, collapse_ws, - ) - for _ in range(num_children): - if mdx_ptr < len(mdx_content_indices): - matched_indices.append(mdx_content_indices[mdx_ptr]) - mdx_ptr += 1 - + if mdx_ptr >= len(mdx_content_indexed): entries.append({ 'xhtml_xpath': xm.xhtml_xpath, 'xhtml_type': xm.type, - 'mdx_blocks': matched_indices, + 'mdx_blocks': [], }) + continue + + mdx_idx, mdx_block = mdx_content_indexed[mdx_ptr] + + if _type_compatible(xm.type, mdx_block.type): + entry: Dict[str, Any] = { + 'xhtml_xpath': xm.xhtml_xpath, + 'xhtml_type': xm.type, + 'mdx_blocks': [mdx_idx], + 'mdx_line_start': mdx_block.line_start, + 'mdx_line_end': mdx_block.line_end, + } + # compound block (callout/details 등): children 정렬 + if xm.children and mdx_block.children: + child_entries = _align_children(xm, mdx_block, id_to_mapping) + if child_entries: + entry['children'] = child_entries + entries.append(entry) + mdx_ptr += 1 else: - # 텍스트 매치 실패 — MDX 대응 없음 (image, toc 등) + # 타입 불일치 → XHTML 블록이 MDX 출력을 생성하지 않음 + # MDX 포인터는 유지 (MDX 블록이 다음 XHTML과 매칭될 수 있음) entries.append({ 'xhtml_xpath': xm.xhtml_xpath, 'xhtml_type': xm.type, 'mdx_blocks': [], }) - mapping_data = { - 'version': 2, + mapping_data: Dict[str, Any] = { + 'version': 3, 'source_page_id': page_id, 'mdx_file': 'page.mdx', 'mappings': entries, @@ -450,121 +515,6 @@ def generate_sidecar_mapping( return yaml.dump(mapping_data, allow_unicode=True, default_flow_style=False) -def _count_child_mdx_blocks( - xm, - mdx_content_indices, - mdx_plains, - mdx_ptr, - top_mappings, - collapse_ws, -) -> int: - """children이 있는 XHTML 매핑에 대응하는 MDX 블록 수를 결정한다. - - 다음 비빈 top-level XHTML 매핑의 텍스트와 겹치지 않는 범위에서 - 후속 MDX 블록을 소비한다. - """ - current_idx = None - for i, tm in enumerate(top_mappings): - if tm is xm: - current_idx = i - break - if current_idx is None: - return len(xm.children) - - next_sigs = [] - for tm in top_mappings[current_idx + 1:]: - sig = _strip_all_ws(collapse_ws(tm.xhtml_plain_text)) - if sig: - next_sigs.append(sig) - if len(next_sigs) >= 3: - break - - if not next_sigs: - return len(xm.children) - - count = 0 - max_scan = len(xm.children) + 5 - for offset in range(max_scan): - ptr = mdx_ptr + offset - if ptr >= len(mdx_content_indices): - break - mdx_idx = mdx_content_indices[ptr] - mdx_sig = _strip_all_ws(mdx_plains[mdx_idx]) - if not mdx_sig: - count += 1 - continue - - hit = False - for ns in next_sigs: - if mdx_sig == ns: - hit = True - break - if len(ns) >= 10 and ns[:50] in mdx_sig: - hit = True - break - if len(mdx_sig) >= 10 and mdx_sig[:50] in ns: - hit = True - break - if hit: - break - count += 1 - - return count - - -def _strip_all_ws(text: str) -> str: - """모든 공백 문자를 제거한다. 텍스트 서명 비교용.""" - return ''.join(text.split()) - - -def _find_text_match( - xhtml_plain: str, - mdx_content_indices: List[int], - mdx_plains: Dict[int, str], - start_ptr: int, - lookahead: int, -) -> Optional[int]: - """XHTML plain text와 일치하는 MDX 블록을 전방 탐색한다.""" - end_ptr = min(start_ptr + lookahead, len(mdx_content_indices)) - xhtml_sig = _strip_all_ws(xhtml_plain) - - for ptr in range(start_ptr, end_ptr): - mdx_idx = mdx_content_indices[ptr] - if xhtml_plain == mdx_plains[mdx_idx]: - return ptr - - for ptr in range(start_ptr, end_ptr): - mdx_idx = mdx_content_indices[ptr] - mdx_sig = _strip_all_ws(mdx_plains[mdx_idx]) - if xhtml_sig == mdx_sig: - return ptr - - if len(xhtml_sig) >= 10: - prefix = xhtml_sig[:50] - for ptr in range(start_ptr, end_ptr): - mdx_idx = mdx_content_indices[ptr] - mdx_sig = _strip_all_ws(mdx_plains[mdx_idx]) - if not mdx_sig: - continue - if prefix in mdx_sig or mdx_sig[:50] in xhtml_sig: - return ptr - - # 4차: 짧은 prefix 포함 매칭 (emoticon/lost_info 차이 허용) - # XHTML ac:emoticon 태그가 텍스트로 치환되지 않는 경우, - # 전체 문자열의 substring 비교가 실패할 수 있으므로 - # 앞부분 20자만으로 포함 관계를 검사한다. - _SHORT_PREFIX = 20 - for ptr in range(start_ptr, end_ptr): - mdx_idx = mdx_content_indices[ptr] - mdx_sig = _strip_all_ws(mdx_plains[mdx_idx]) - if len(mdx_sig) < _SHORT_PREFIX: - continue - mdx_prefix = mdx_sig[:_SHORT_PREFIX] - if mdx_prefix in xhtml_sig: - return ptr - - return None - def find_mapping_by_sidecar( mdx_block_index: int, diff --git a/confluence-mdx/bin/reverse_sync_cli.py b/confluence-mdx/bin/reverse_sync_cli.py index 26b38d36d..90710e098 100755 --- a/confluence-mdx/bin/reverse_sync_cli.py +++ b/confluence-mdx/bin/reverse_sync_cli.py @@ -352,7 +352,7 @@ def run_verify( # Step 3.5: Sidecar mapping 생성 + 인덱스 구축 from reverse_sync.sidecar import ( - SidecarEntry, generate_sidecar_mapping, + SidecarEntry, SidecarChildEntry, generate_sidecar_mapping, build_mdx_to_sidecar_index, build_xpath_to_mapping, ) # forward converter가 생성한 mapping.yaml에서 lost_info를 보존 @@ -366,14 +366,25 @@ def run_verify( (var_dir / 'mapping.yaml').write_text(sidecar_yaml) sidecar_data = yaml.safe_load(sidecar_yaml) or {} page_lost_info = sidecar_data.get('lost_info', {}) - sidecar_entries = [ - SidecarEntry( + sidecar_entries = [] + for item in sidecar_data.get('mappings', []): + children = [ + SidecarChildEntry( + xhtml_xpath=ch.get('xhtml_xpath', ''), + xhtml_block_id=ch.get('xhtml_block_id', ''), + mdx_line_start=ch.get('mdx_line_start', 0), + mdx_line_end=ch.get('mdx_line_end', 0), + ) + for ch in item.get('children', []) + ] + sidecar_entries.append(SidecarEntry( xhtml_xpath=item['xhtml_xpath'], xhtml_type=item.get('xhtml_type', ''), mdx_blocks=item.get('mdx_blocks', []), - ) - for item in sidecar_data.get('mappings', []) - ] + mdx_line_start=item.get('mdx_line_start', 0), + mdx_line_end=item.get('mdx_line_end', 0), + children=children, + )) mdx_to_sidecar = build_mdx_to_sidecar_index(sidecar_entries) xpath_to_mapping = build_xpath_to_mapping(original_mappings) @@ -401,6 +412,13 @@ def run_verify( yaml.dump(verify_mapping_data, allow_unicode=True, default_flow_style=False)) # Step 6: Forward 변환 → verify.mdx 저장 + # xhtml_path 옆에 있는 page.v1.yaml을 var//로 복사하여 + # forward converter가 크로스 페이지 링크를 정상 해석할 수 있게 한다. + src_page_v1 = Path(xhtml_path).parent / 'page.v1.yaml' + dst_page_v1 = var_dir / 'page.v1.yaml' + if src_page_v1.exists() and not dst_page_v1.exists(): + shutil.copy2(src_page_v1, dst_page_v1) + lang = language or _detect_language(improved_src.descriptor) _forward_convert( str(var_dir / 'reverse-sync.patched.xhtml'), diff --git a/confluence-mdx/tests/reverse-sync/pages.yaml b/confluence-mdx/tests/reverse-sync/pages.yaml index ab95d57d6..0b171d1bf 100644 --- a/confluence-mdx/tests/reverse-sync/pages.yaml +++ b/confluence-mdx/tests/reverse-sync/pages.yaml @@ -1401,7 +1401,7 @@ description: '조사 붙여쓰기 교정이 XHTML 원본 기준으로 되돌아감. 예: MySQL 을 → MySQL을 (교정) → MySQL 을 (원복) ' - expected_status: fail + expected_status: pass failure_type: 11 label: 교정 내용 원복 — 조사 띄어쓰기 (MySQL 을→MySQL을) mdx_path: installation/prerequisites/configuring-rootless-mode-with-podman.mdx @@ -1466,7 +1466,7 @@ description: '어휘·표현 교정이 XHTML 원본 기준으로 되돌아감. 예: 갈음하여 → 대체하여, 주의하여 주세요 → 주의해 주세요 ' - expected_status: fail + expected_status: pass failure_type: 11 label: 교정 내용 원복 — 어휘·표현 변경 (갈음하여→대체하여, 주의하여→주의해) mdx_path: installation/post-installation-setup.mdx diff --git a/confluence-mdx/tests/test_lost_info_collector.py b/confluence-mdx/tests/test_lost_info_collector.py index 2de5d93aa..865d9d64c 100644 --- a/confluence-mdx/tests/test_lost_info_collector.py +++ b/confluence-mdx/tests/test_lost_info_collector.py @@ -199,7 +199,7 @@ def test_no_lost_info_when_nothing_lost(self): class TestMappingYamlLostInfo: - def test_version_is_2(self): + def test_version_is_3(self): import yaml from reverse_sync.sidecar import generate_sidecar_mapping @@ -208,7 +208,7 @@ def test_version_is_2(self): result = generate_sidecar_mapping(xhtml, mdx, '12345') data = yaml.safe_load(result) - assert data['version'] == 2 + assert data['version'] == 3 def test_lost_info_in_mapping_yaml(self): import yaml @@ -223,7 +223,7 @@ def test_lost_info_in_mapping_yaml(self): result = generate_sidecar_mapping(xhtml, mdx, '12345', lost_infos=lost_infos) data = yaml.safe_load(result) - assert data['version'] == 2 + assert data['version'] == 3 assert 'lost_info' in data assert data['lost_info']['emoticons'][0]['name'] == 'tick' diff --git a/confluence-mdx/tests/test_reverse_sync_cli.py b/confluence-mdx/tests/test_reverse_sync_cli.py index ad58b54c4..39ca88f3c 100644 --- a/confluence-mdx/tests/test_reverse_sync_cli.py +++ b/confluence-mdx/tests/test_reverse_sync_cli.py @@ -828,7 +828,7 @@ def testbuild_patches_table_block(): def testbuild_patches_child_resolved(): - """parent+children 매핑에서 child 해석 성공 시 child xpath로 패치한다.""" + """parent+children 매핑에서 containing 전략으로 parent xpath로 패치한다.""" from reverse_sync.mdx_block_parser import MdxBlock from reverse_sync.block_diff import BlockChange from reverse_sync.mapping_recorder import BlockMapping @@ -870,9 +870,10 @@ def testbuild_patches_child_resolved(): patches = build_patches(changes, original_blocks, improved_blocks, mappings, mdx_to_sidecar, xpath_to_mapping) + # _resolve_child_mapping 제거 → containing 전략 → parent xpath로 패치 assert len(patches) == 1 - assert patches[0]['xhtml_xpath'] == 'macro-info[1]/p[1]' - assert patches[0]['new_inner_xhtml'] == 'New child text.' + assert patches[0]['xhtml_xpath'] == 'macro-info[1]' + assert 'New child text.' in patches[0]['new_plain_text'] def testbuild_patches_child_fallback_to_parent_containing(): @@ -998,7 +999,7 @@ def testbuild_patches_list_item_child_resolved(): xhtml_element_index=2, ) mappings = [parent, child_a, child_b] - # sidecar에 list block index 없음 → build_list_item_patches 경로 + # sidecar에 list block index 없음 → parent 없음 → 패치 없음 mdx_to_sidecar = {} xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} id_to_mapping = {m.block_id: m for m in mappings} @@ -1008,12 +1009,9 @@ def testbuild_patches_list_item_child_resolved(): changes[0], mappings, set(), mdx_to_sidecar, xpath_to_mapping, id_to_mapping) - # R2: sidecar에 parent가 없어도 텍스트 포함 폴백으로 parent 발견 → child 해석 성공 - assert len(patches) == 1 - assert patches[0]['xhtml_xpath'] == 'ul[1]/li[1]' - assert patches[0]['new_inner_xhtml'] == 'Item A new' + assert len(patches) == 0 - # sidecar에 parent가 있는 경우 + # sidecar에 parent가 있는 경우 → _regenerate_list_from_parent → 전체 재생성 mdx_to_sidecar = { 0: SidecarEntry(xhtml_xpath='ul[1]', xhtml_type='list', mdx_blocks=[0]), } @@ -1022,8 +1020,8 @@ def testbuild_patches_list_item_child_resolved(): mdx_to_sidecar, xpath_to_mapping, id_to_mapping) assert len(patches) == 1 - assert patches[0]['xhtml_xpath'] == 'ul[1]/li[1]' - assert patches[0]['new_inner_xhtml'] == 'Item A new' + assert patches[0]['xhtml_xpath'] == 'ul[1]' + assert 'new_inner_xhtml' in patches[0] def testbuild_patches_list_item_fallback_to_parent(): diff --git a/confluence-mdx/tests/test_reverse_sync_patch_builder.py b/confluence-mdx/tests/test_reverse_sync_patch_builder.py index fb7d91191..01c200f55 100644 --- a/confluence-mdx/tests/test_reverse_sync_patch_builder.py +++ b/confluence-mdx/tests/test_reverse_sync_patch_builder.py @@ -1,9 +1,8 @@ """patch_builder 유닛 테스트. -기존 _find_containing_mapping 테스트 + build_patches 6개 분기 경로 +build_patches 분기 경로 + helper 함수 (is_markdown_table, split_table_rows, normalize_table_row, -split_list_items, _resolve_child_mapping, -build_table_row_patches, build_list_item_patches) 테스트. +split_list_items, build_table_row_patches, build_list_item_patches) 테스트. """ from reverse_sync.block_diff import BlockChange from reverse_sync.mapping_recorder import BlockMapping @@ -11,7 +10,6 @@ from reverse_sync.sidecar import SidecarEntry from text_utils import normalize_mdx_to_plain from reverse_sync.patch_builder import ( - _find_containing_mapping, _flush_containing_changes, _resolve_mapping_for_change, build_patches, @@ -31,7 +29,6 @@ from reverse_sync.list_patcher import ( build_list_item_patches, split_list_items, - _resolve_child_mapping, ) @@ -82,130 +79,6 @@ def _make_sidecar(xpath: str, mdx_blocks: list) -> SidecarEntry: return SidecarEntry(xhtml_xpath=xpath, xhtml_type='paragraph', mdx_blocks=mdx_blocks) -# ── _find_containing_mapping (기존 7개 테스트 유지) ── - - -class TestFindContainingMapping: - def test_finds_mapping_containing_old_plain(self): - m1 = _make_mapping('m1', 'Command Audit : Server내 수행 명령어 이력') - m2 = _make_mapping('m2', 'General User Access History Activity Logs Servers Command Audit : Server내 수행 명령어 이력 Account Lock History') - mappings = [m1, m2] - result = _find_containing_mapping( - 'Command Audit : Server내 수행 명령어 이력', mappings, set()) - assert result is m1 - - def test_skips_used_ids(self): - m1 = _make_mapping('m1', 'Command Audit : Server내 수행 명령어 이력') - m2 = _make_mapping('m2', 'General Servers Command Audit : Server내 수행 명령어 이력 Account Lock') - mappings = [m1, m2] - used = {'m1'} - result = _find_containing_mapping( - 'Command Audit : Server내 수행 명령어 이력', mappings, used) - assert result is m2 - - def test_returns_none_for_short_text(self): - m1 = _make_mapping('m1', 'hello world foo bar') - result = _find_containing_mapping('abc', [m1], set()) - assert result is None - - def test_returns_none_for_empty_text(self): - m1 = _make_mapping('m1', 'hello world foo bar') - result = _find_containing_mapping('', [m1], set()) - assert result is None - - def test_returns_none_when_no_mapping_contains_text(self): - m1 = _make_mapping('m1', 'completely different text here') - result = _find_containing_mapping( - 'Command Audit : Server내 수행 명령어 이력', [m1], set()) - assert result is None - - def test_ignores_whitespace_differences(self): - m1 = _make_mapping('m1', 'Command Audit : Server내 수행 명령어 이력') - result = _find_containing_mapping( - 'Command Audit : Server내 수행 명령어 이력', [m1], set()) - assert result is m1 - - def test_ignores_invisible_unicode_chars(self): - m1 = _make_mapping( - 'm1', - 'Account Lock History\u3164 : QueryPie\u200b사용자별 서버 접속 계정') - result = _find_containing_mapping( - 'Account Lock History : QueryPie사용자별 서버 접속 계정', - [m1], set()) - assert result is m1 - - -# ── _resolve_child_mapping ── - - -class TestResolveChildMapping: - def test_exact_match_first_pass(self): - child = _make_mapping('c1', 'child text') - parent = _make_mapping('p1', 'parent text', children=['c1']) - id_map = {'c1': child, 'p1': parent} - result = _resolve_child_mapping('child text', parent, id_map) - assert result is child - - def test_whitespace_collapsed_match(self): - child = _make_mapping('c1', 'child text here') - parent = _make_mapping('p1', 'parent', children=['c1']) - id_map = {'c1': child, 'p1': parent} - result = _resolve_child_mapping('child text here', parent, id_map) - assert result is child - - def test_nospace_match(self): - child = _make_mapping('c1', 'child text') - parent = _make_mapping('p1', 'parent', children=['c1']) - id_map = {'c1': child, 'p1': parent} - # collapse_ws doesn't match, but nospace does - result = _resolve_child_mapping('childtext', parent, id_map) - assert result is child - - def test_xhtml_list_marker_stripped(self): - child = _make_mapping('c1', '- item text') - parent = _make_mapping('p1', 'parent', children=['c1']) - id_map = {'c1': child, 'p1': parent} - result = _resolve_child_mapping('item text', parent, id_map) - assert result is child - - def test_mdx_list_marker_stripped(self): - child = _make_mapping('c1', 'item text') - parent = _make_mapping('p1', 'parent', children=['c1']) - id_map = {'c1': child, 'p1': parent} - result = _resolve_child_mapping('- item text', parent, id_map) - assert result is child - - def test_returns_none_when_no_match(self): - child = _make_mapping('c1', 'completely different') - parent = _make_mapping('p1', 'parent', children=['c1']) - id_map = {'c1': child, 'p1': parent} - result = _resolve_child_mapping('no match text here', parent, id_map) - assert result is None - - def test_returns_none_for_empty_text(self): - parent = _make_mapping('p1', 'parent', children=['c1']) - child = _make_mapping('c1', 'child') - id_map = {'c1': child, 'p1': parent} - result = _resolve_child_mapping('', parent, id_map) - assert result is None - - def test_missing_child_id(self): - parent = _make_mapping('p1', 'parent', children=['missing']) - id_map = {'p1': parent} - result = _resolve_child_mapping('some text here', parent, id_map) - assert result is None - - def test_prefix_match_rejects_long_text(self): - # 5차 prefix: old_plain이 child보다 훨씬 길 때 잘못된 매칭 방지 - # callout 전체 텍스트가 내부 paragraph와 같은 prefix를 공유하는 경우 - child_text = '11.4.0부터 속성 기반 승인자 지정시 여러개의 속성을 지정할 수 있도록 개선되었습니다.' - long_old = child_text + ' ' + '기존 Attribute 기반 승인자 지정시 하나의 Attribute만 지정할 수 있었으나...' * 3 - child = _make_mapping('c1', child_text) - parent = _make_mapping('p1', 'parent', children=['c1']) - id_map = {'c1': child, 'p1': parent} - result = _resolve_child_mapping(long_old, parent, id_map) - assert result is None - # ── Helper 함수 테스트 ── @@ -297,15 +170,15 @@ def _setup_sidecar(self, xpath: str, mdx_idx: int): mdx_to_sidecar = {mdx_idx: entry} return mdx_to_sidecar - # Path 1: sidecar 매칭 → children 있음 → child 해석 성공 → 직접 패치 - def test_path1_sidecar_match_child_resolved(self): + # Path 1: sidecar 매칭 → list type + children → list 전략 → 전체 리스트 재생성 + def test_path1_sidecar_match_list_with_children_regenerates(self): child = _make_mapping('c1', 'child text', xpath='li[1]') parent = _make_mapping('p1', 'parent text child text more', xpath='ul[1]', type_='list', children=['c1']) mappings = [parent, child] xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} - change = _make_change(0, 'child text', 'updated child') + change = _make_change(0, '- child text', '- updated child', type_='list') mdx_to_sidecar = self._setup_sidecar('ul[1]', 0) patches = build_patches( @@ -313,8 +186,8 @@ def test_path1_sidecar_match_child_resolved(self): mappings, mdx_to_sidecar, xpath_to_mapping) assert len(patches) == 1 - assert patches[0]['xhtml_xpath'] == 'li[1]' - assert 'updated child' in patches[0]['new_inner_xhtml'] + assert patches[0]['xhtml_xpath'] == 'ul[1]' + assert 'new_inner_xhtml' in patches[0] # Path 2: sidecar 매칭 → children 있음 → child 해석 실패 # → 텍스트 불일치 → list 분리 (item 수 불일치 → inner XHTML 재생성) @@ -360,21 +233,20 @@ def test_path3_sidecar_child_fail_containing_block(self): assert len(patches) == 1 assert patches[0]['xhtml_xpath'] == 'div[1]' - # Path 4: sidecar 미스 → 텍스트 포함 검색 → containing block + # Path 4: sidecar 미스 → skip (텍스트 포함 검색 폴백 제거됨) def test_path4_sidecar_miss_text_search_containing(self): m1 = _make_mapping('m1', 'this mapping contains the search text here') mappings = [m1] xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} change = _make_change(0, 'search text', 'replaced text') - mdx_to_sidecar = {} # 빈 sidecar → sidecar 미스 + mdx_to_sidecar = {} # 빈 sidecar → sidecar 미스 → skip patches = build_patches( [change], [change.old_block], [change.new_block], mappings, mdx_to_sidecar, xpath_to_mapping) - assert len(patches) == 1 - assert patches[0]['xhtml_xpath'] == m1.xhtml_xpath + assert len(patches) == 0 # Path 5: sidecar 미스 → list/table 분리 def test_path5_sidecar_miss_table_split(self): @@ -391,9 +263,9 @@ def test_path5_sidecar_miss_table_split(self): assert patches == [] - # Path 6: sidecar 매칭 → children 없음 → 텍스트 불일치 → 재매핑 + # Path 6: sidecar 매칭 → children 없음 → sidecar를 신뢰하여 직접 매핑 def test_path6_sidecar_match_text_mismatch_remapping(self): - # sidecar 매핑이 있지만 텍스트가 포함되지 않음 → better 매핑 찾기 + # sidecar가 p[1]을 가리키면 텍스트 불일치와 무관하게 p[1]로 직접 패치 wrong = _make_mapping('wrong', 'completely wrong mapping', xpath='p[1]') better = _make_mapping('better', 'contains the target text here', xpath='p[2]') mappings = [wrong, better] @@ -407,7 +279,7 @@ def test_path6_sidecar_match_text_mismatch_remapping(self): mappings, mdx_to_sidecar, xpath_to_mapping) assert len(patches) == 1 - assert patches[0]['xhtml_xpath'] == 'p[2]' + assert patches[0]['xhtml_xpath'] == 'p[1]' # 직접 매칭 + text_transfer 사용 def test_direct_match_with_transfer(self): @@ -499,7 +371,7 @@ def test_direct_text_only_change_uses_inner_xhtml_patch(self): assert 'new_inner_xhtml' in patches[0] assert 'new_plain_text' not in patches[0] - # 여러 변경이 동일 containing block에 그룹화 + # sidecar 미스 → skip (텍스트 포함 검색 폴백 제거됨) def test_multiple_changes_grouped_to_containing(self): container = _make_mapping( 'm1', 'first part and second part', xpath='p[1]') @@ -508,7 +380,7 @@ def test_multiple_changes_grouped_to_containing(self): change1 = _make_change(0, 'first part', 'first UPDATED') change2 = _make_change(1, 'second part', 'second UPDATED') - mdx_to_sidecar = {} # sidecar 미스 → containing 검색 + mdx_to_sidecar = {} # sidecar 미스 → skip patches = build_patches( [change1, change2], @@ -516,8 +388,7 @@ def test_multiple_changes_grouped_to_containing(self): [change1.new_block, change2.new_block], mappings, mdx_to_sidecar, xpath_to_mapping) - assert len(patches) == 1 - assert 'UPDATED' in patches[0]['new_plain_text'] + assert len(patches) == 0 def test_direct_heading_inline_code_added(self): """heading에서 backtick 추가 시 new_inner_xhtml 패치를 생성한다.""" @@ -1143,7 +1014,7 @@ class TestResolveMappingForChange: """_resolve_mapping_for_change 매핑 해석 함수 테스트.""" def _make_context(self, mappings=None, mdx_to_sidecar=None, - xpath_to_mapping=None, id_to_mapping=None): + xpath_to_mapping=None): """공통 컨텍스트 dict를 구성한다.""" mappings = mappings or [] return { @@ -1151,7 +1022,6 @@ def _make_context(self, mappings=None, mdx_to_sidecar=None, 'used_ids': set(), 'mdx_to_sidecar': mdx_to_sidecar or {}, 'xpath_to_mapping': xpath_to_mapping or {}, - 'id_to_mapping': id_to_mapping or {m.block_id: m for m in mappings}, } def _old_plain(self, change): @@ -1181,7 +1051,7 @@ def test_sidecar_direct_match_returns_direct(self): assert strategy == 'direct' assert mapping.block_id == 'b1' - def test_sidecar_match_with_children_resolved_returns_direct(self): + def test_sidecar_match_with_children_returns_containing(self): child = _make_mapping('c1', 'child text', xpath='li[1]') parent = _make_mapping('p1', 'parent text', xpath='ul[1]', children=['c1']) @@ -1194,8 +1064,8 @@ def test_sidecar_match_with_children_resolved_returns_direct(self): change = _make_change(0, 'child text', 'new child') strategy, mapping = _resolve_mapping_for_change( change, self._old_plain(change), **ctx) - assert strategy == 'direct' - assert mapping.block_id == 'c1' + assert strategy == 'containing' + assert mapping.block_id == 'p1' def test_no_sidecar_list_type_returns_list(self): change = _make_change(0, '- item1\n- item2', '- item1\n- changed', type_='list') @@ -1212,14 +1082,14 @@ def test_no_sidecar_table_type_returns_table(self): change, self._old_plain(change), **ctx) assert strategy == 'table' - def test_no_sidecar_containing_match_returns_containing(self): + def test_no_sidecar_containing_match_returns_skip(self): m = _make_mapping('b1', 'hello world full text here', xpath='div[1]') change = _make_change(0, 'hello world', 'hi world') ctx = self._make_context(mappings=[m]) strategy, mapping = _resolve_mapping_for_change( change, self._old_plain(change), **ctx) - assert strategy == 'containing' - assert mapping.block_id == 'b1' + assert strategy == 'skip' + assert mapping is None # ── Inline format 변경 감지 테스트 ── diff --git a/confluence-mdx/tests/test_reverse_sync_sidecar.py b/confluence-mdx/tests/test_reverse_sync_sidecar.py index 677c7f703..b860f2002 100644 --- a/confluence-mdx/tests/test_reverse_sync_sidecar.py +++ b/confluence-mdx/tests/test_reverse_sync_sidecar.py @@ -7,7 +7,6 @@ - xhtml_xpath → BlockMapping 인덱스 구축 - 2-hop 조회: MDX index → SidecarEntry → BlockMapping - XHTML + MDX로부터 mapping.yaml 생성 (generate_sidecar_mapping) - - 텍스트 매칭 내부 함수들 (_find_text_match, _strip_all_ws) """ import pytest import yaml @@ -21,13 +20,12 @@ sha256_text, write_sidecar, SidecarEntry, + SidecarChildEntry, load_sidecar_mapping, build_mdx_to_sidecar_index, build_xpath_to_mapping, find_mapping_by_sidecar, generate_sidecar_mapping, - _find_text_match, - _strip_all_ws, ) from reverse_sync.mapping_recorder import BlockMapping @@ -230,104 +228,6 @@ def test_xpath_not_in_mapping_index(self): assert result is None -# ── _strip_all_ws ───────────────────────────────────────────── - -class TestStripAllWs: - def test_basic(self): - assert _strip_all_ws('hello world') == 'helloworld' - - def test_tabs_and_newlines(self): - assert _strip_all_ws('a\tb\nc d') == 'abcd' - - def test_empty(self): - assert _strip_all_ws('') == '' - - def test_only_whitespace(self): - assert _strip_all_ws(' \t\n ') == '' - - -# ── _find_text_match ────────────────────────────────────────── - -class TestFindTextMatch: - def test_exact_match_at_start(self): - """1차: collapse_ws 후 완전 일치.""" - indices = [0, 1, 2] - plains = {0: 'Hello World', 1: 'Foo Bar', 2: 'Baz'} - result = _find_text_match('Hello World', indices, plains, 0, 5) - assert result == 0 - - def test_exact_match_at_offset(self): - indices = [0, 1, 2] - plains = {0: 'AAA', 1: 'BBB', 2: 'CCC'} - result = _find_text_match('BBB', indices, plains, 0, 5) - assert result == 1 - - def test_whitespace_insensitive_match(self): - """2차: 공백 무시 완전 일치.""" - indices = [0, 1] - plains = {0: 'Hello World', 1: 'Foo'} - # xhtml_plain 'HelloWorld' vs mdx 'Hello World' → strip_all_ws 비교 - result = _find_text_match('Hello World', indices, plains, 0, 5) - # 1차에서 실패하지만 2차 공백무시에서 매칭 - assert result is not None - - def test_prefix_match(self): - """3차: prefix 포함 매칭.""" - indices = [0] - long_text = 'A' * 60 - plains = {0: long_text + ' extra'} - # xhtml_plain의 앞 50자가 mdx에 포함 - result = _find_text_match(long_text, indices, plains, 0, 5) - assert result is not None - - def test_no_match(self): - indices = [0, 1] - plains = {0: 'AAA', 1: 'BBB'} - result = _find_text_match('CCC', indices, plains, 0, 5) - assert result is None - - def test_start_ptr_skips_earlier(self): - """start_ptr 이전의 블록은 검색하지 않는다.""" - indices = [0, 1, 2] - plains = {0: 'Target', 1: 'Other', 2: 'More'} - result = _find_text_match('Target', indices, plains, 1, 5) - assert result is None # index 0은 검색 범위 밖 - - def test_lookahead_limit(self): - """lookahead 범위를 초과하면 매칭하지 않는다.""" - indices = [0, 1, 2, 3, 4, 5] - plains = {i: f'block-{i}' for i in range(6)} - result = _find_text_match('block-5', indices, plains, 0, 3) - assert result is None # lookahead=3이므로 index 0,1,2만 검색 - - def test_short_text_no_prefix_match(self): - """10자 미만의 짧은 텍스트는 prefix 매칭을 시도하지 않는다.""" - indices = [0] - plains = {0: 'AB extra'} - result = _find_text_match('AB', indices, plains, 0, 5) - assert result is None - - def test_short_prefix_match_with_emoticon_difference(self): - """4차: emoticon 차이가 있어도 앞부분 20자 prefix가 일치하면 매칭한다.""" - # XHTML에서 ac:emoticon이 텍스트로 추출되지 않는 경우, - # 끝부분에 이모지가 빠져서 전체 문자열 비교가 실패하지만 - # 앞부분 prefix로 매칭할 수 있어야 한다. - xhtml_text = '9.12.0 이후부터 적용되는 신규 메뉴 가이드입니다. (클릭해서 확대해서 보세요. )' - mdx_text = '9.12.0 이후부터 적용되는 신규 메뉴 가이드입니다. (클릭해서 확대해서 보세요. 🔎 )' - indices = [0] - plains = {0: mdx_text} - result = _find_text_match(xhtml_text, indices, plains, 0, 5) - assert result == 0 - - def test_short_prefix_match_with_metadata_prefix(self): - """4차: XHTML에 파라미터 메타데이터 prefix가 있어도 MDX prefix로 매칭한다.""" - xhtml_text = ':purple_circle:1f7e3🟣#F4F5F79.12.0 이후부터 적용되는 신규 메뉴 가이드입니다.' - mdx_text = '9.12.0 이후부터 적용되는 신규 메뉴 가이드입니다. (클릭해서 확대해서 보세요. 🔎 )' - indices = [0] - plains = {0: mdx_text} - result = _find_text_match(xhtml_text, indices, plains, 0, 5) - assert result == 0 - # ── generate_sidecar_mapping ────────────────────────────────── @@ -345,7 +245,7 @@ def test_simple_heading_paragraph(self): result = generate_sidecar_mapping(xhtml, mdx, '12345') data = yaml.safe_load(result) - assert data['version'] == 2 + assert data['version'] == 3 assert data['source_page_id'] == '12345' assert len(data['mappings']) >= 2 @@ -356,6 +256,9 @@ def test_simple_heading_paragraph(self): e for e in data['mappings'] if e['xhtml_type'] == 'paragraph') assert len(heading_entry['mdx_blocks']) >= 1 assert len(para_entry['mdx_blocks']) >= 1 + # v3: line range 필드 포함 + assert heading_entry.get('mdx_line_start', 0) > 0 + assert para_entry.get('mdx_line_start', 0) > 0 def test_empty_xhtml_block_gets_empty_mdx_blocks(self): """이미지 등 텍스트가 없는 XHTML 블록은 빈 mdx_blocks를 받는다.""" @@ -417,7 +320,7 @@ def test_multiple_paragraphs_sequential_matching(self): assert all_indices == sorted(all_indices) def test_callout_macro_with_children(self): - """Callout 매크로 (ac:structured-macro) → 컨테이너 + children 매핑.""" + """Callout 매크로 (ac:structured-macro) → 단일 MDX callout 블록에 매핑, children 포함.""" xhtml = ( '' '' @@ -426,21 +329,26 @@ def test_callout_macro_with_children(self): '' '' ) + # 실제 프로젝트 MDX 포맷: 태그 사용 mdx = ( '---\ntitle: Test\n---\n\n' - ':::info\n\n' + 'import { Callout } from \'nextra/components\'\n\n' + '\n' 'Info paragraph 1.\n\n' - 'Info paragraph 2.\n\n' - ':::\n' + 'Info paragraph 2.\n' + '\n' ) result = generate_sidecar_mapping(xhtml, mdx) data = yaml.safe_load(result) - # 컨테이너 매핑이 여러 MDX 블록을 포함해야 함 - container_entries = [ - e for e in data['mappings'] if len(e.get('mdx_blocks', [])) > 1 - ] - assert len(container_entries) >= 1 + # v3: 컨테이너가 단일 MDX 블록 (callout)에 매핑됨 + html_entries = [e for e in data['mappings'] if e.get('xhtml_type') == 'html_block'] + assert len(html_entries) >= 1 + container = html_entries[0] + assert len(container['mdx_blocks']) == 1 + # v3: children 필드에 XHTML children 정렬 결과 포함 + children = container.get('children', []) + assert len(children) == 2 def test_callout_panel_with_emoticon_maps_to_mdx(self): """panel callout + emoticon이 있는 XHTML이 MDX callout에 매핑된다.""" diff --git a/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.diff.yaml b/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.diff.yaml index 6495cb8c0..1d7c51bb1 100644 --- a/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.diff.yaml +++ b/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.diff.yaml @@ -12,7 +12,7 @@ changes: * 사용하려는 계정을 선택하고 필요시 비밀번호를 입력한 뒤, `OK` 버튼을 클릭하여 세션을 엽니다. ' -created_at: '2026-02-26T09:02:54.021713+00:00' +created_at: '2026-03-09T16:43:39.195734+00:00' improved_mdx: tests/testcases/544112828/improved.mdx original_mdx: tests/testcases/544112828/original.mdx page_id: '544112828' diff --git a/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.original.yaml b/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.original.yaml index b94cdf3e6..cb9036434 100644 --- a/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.original.yaml +++ b/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.original.yaml @@ -841,6 +841,6 @@ blocks: xhtml_plain_text: '' xhtml_text: '' xhtml_xpath: p[36] -created_at: '2026-02-26T09:02:54.021713+00:00' +created_at: '2026-03-09T16:43:39.195734+00:00' page_id: '544112828' source_xhtml: page.xhtml diff --git a/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.patched.yaml b/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.patched.yaml index 2440b938d..92e26be31 100644 --- a/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.patched.yaml +++ b/confluence-mdx/tests/testcases/544112828/expected.reverse-sync.mapping.patched.yaml @@ -366,9 +366,9 @@ blocks: children: [] type: list xhtml_element_index: 41 - xhtml_plain_text: 이후 해당 서버에 접속 가능한 계정이 여러 개라면, Account 선택창이 열립니다.사용하려는 계정을 선택하고 + xhtml_plain_text: 이후 해당 서버에 접속 가능한 계정이 여러 개라면, Account 선택창이 열립니다. 사용하려는 계정을 선택하고 필요 시 비밀번호를 입력한 뒤, OK 버튼을 클릭하여 세션을 엽니다. - xhtml_text: