From 440082744c3679d6479cbd86fdb40e5ec386160e Mon Sep 17 00:00:00 2001 From: JK Date: Mon, 16 Mar 2026 22:59:16 +0900 Subject: [PATCH 1/3] =?UTF-8?q?confluence-mdx:=20reverse-sync=20Phase=205?= =?UTF-8?q?=20=E2=80=94=20sidecar=20=ED=83=80=EC=9E=85=20=EA=B8=B0?= =?UTF-8?q?=EB=B0=98=20=EB=A7=A4=ED=95=91=20=EB=B0=8F=20heading=20lookahea?= =?UTF-8?q?d=20=EA=B5=AC=ED=98=84=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - sidecar.py: build_sidecar()와 generate_sidecar_mapping() 모두 mdx_to_storage.parser로 전환합니다 - sidecar.py: type-compatible two-pointer 매칭 — paragraph→list 호환성 추가합니다 - sidecar.py: _heading_lookahead() — heading MISS 시 content 매칭으로 구조 정렬 복원합니다 - patch_builder.py: _build_mdx_to_sidecar_from_v3() — roundtrip sidecar v3 기반 mdx_to_sidecar 자동 구축합니다 - patch_builder.py: build_patches() — mdx_to_sidecar=None 시 v3 sidecar에서 자동 구축합니다 - reverse_sync_cli.py: mapping.yaml 없는 v3 sidecar 단독 경로 지원합니다 - tests: test_reverse_sync_phase3.py — Phase 3 inline-anchor/list 재구성 테스트 추가합니다 Co-Authored-By: Claude Sonnet 4.6 --- .../bin/reverse_sync/patch_builder.py | 43 +++++++- confluence-mdx/bin/reverse_sync/sidecar.py | 103 ++++++++++++++++-- confluence-mdx/bin/reverse_sync_cli.py | 43 ++------ .../tests/test_reverse_sync_phase3.py | 79 ++++++++++++++ 4 files changed, 221 insertions(+), 47 deletions(-) create mode 100644 confluence-mdx/tests/test_reverse_sync_phase3.py diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py index edc9664ca..d66c81261 100644 --- a/confluence-mdx/bin/reverse_sync/patch_builder.py +++ b/confluence-mdx/bin/reverse_sync/patch_builder.py @@ -17,6 +17,7 @@ find_sidecar_block_by_identity, sha256_text, SidecarEntry, + build_mdx_line_range_index, ) from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml @@ -39,6 +40,34 @@ _CLEAN_BLOCK_TYPES = frozenset(("heading", "code_block", "hr")) +def _build_mdx_to_sidecar_from_v3( + roundtrip_sidecar: RoundtripSidecar, + original_blocks: List[MdxBlock], +) -> Dict[int, SidecarEntry]: + """roundtrip sidecar v3와 original_blocks에서 mdx_to_sidecar 인덱스를 생성한다. + + mapping.yaml 없이 v3 sidecar의 mdx_line_range를 기준으로 + original_blocks의 절대 인덱스 → SidecarEntry를 구축한다. + find_mapping_by_sidecar()가 entry.xhtml_xpath만 사용하므로 + xhtml_xpath 필드만 채운다. + """ + from reverse_sync.block_diff import NON_CONTENT_TYPES as _NON_CONTENT + line_range_idx = build_mdx_line_range_index(roundtrip_sidecar) + result: Dict[int, SidecarEntry] = {} + for idx, block in enumerate(original_blocks): + if block.type in _NON_CONTENT: + continue + sc_block = line_range_idx.get((block.line_start, block.line_end)) + if sc_block is None: + continue + result[idx] = SidecarEntry( + xhtml_xpath=sc_block.xhtml_xpath, + xhtml_type="", + mdx_blocks=[idx], + ) + return result + + def _contains_preserved_anchor_markup(xhtml_text: str) -> bool: """preservation unit이 있으면 clean whole-fragment replacement 대상이 아니다.""" return " List[Dict[str, str]]: """diff 변경과 매핑을 결합하여 XHTML 패치 목록을 구성한다. - sidecar 인덱스를 사용하여 O(1) 직접 조회를 수행한다. + mdx_to_sidecar=None (기본값)이면 roundtrip_sidecar v3에서 자동으로 구축한다. """ + # v3 sidecar 기반 경로: mdx_to_sidecar가 없으면 roundtrip_sidecar에서 구축 + if mdx_to_sidecar is None: + if roundtrip_sidecar is not None: + mdx_to_sidecar = _build_mdx_to_sidecar_from_v3( + roundtrip_sidecar, original_blocks) + else: + mdx_to_sidecar = {} + xpath_to_mapping = xpath_to_mapping or {} patches = [] xpath_to_sidecar_block: Dict[str, SidecarBlock] = {} if roundtrip_sidecar is not None: diff --git a/confluence-mdx/bin/reverse_sync/sidecar.py b/confluence-mdx/bin/reverse_sync/sidecar.py index f865b9431..0ebc89068 100644 --- a/confluence-mdx/bin/reverse_sync/sidecar.py +++ b/confluence-mdx/bin/reverse_sync/sidecar.py @@ -154,6 +154,21 @@ def build_sidecar_identity_index( return dict(grouped) +def build_mdx_line_range_index( + sidecar: "RoundtripSidecar", +) -> Dict[tuple, "SidecarBlock"]: + """(line_start, line_end) → SidecarBlock 인덱스를 구축한다. + + roundtrip sidecar v3 기반 mapping lookup에 사용된다. + mdx_line_range가 (0, 0)인 블록(MDX 대응 없음)은 제외된다. + """ + return { + tuple(b.mdx_line_range): b + for b in sidecar.blocks + if b.mdx_line_range != (0, 0) + } + + def find_sidecar_block_by_identity( blocks: List[SidecarBlock], mdx_content_hash: str, @@ -206,11 +221,13 @@ def build_sidecar( ) -> RoundtripSidecar: """Block-level sidecar를 생성한다. + generate_sidecar_mapping()과 동일한 type-compatible two-pointer 매칭으로 + XHTML top-level block과 MDX content block을 정렬한다. Fragment 추출 → MDX alignment → 무결성 검증 → RoundtripSidecar 반환. """ from reverse_sync.fragment_extractor import extract_block_fragments from reverse_sync.mapping_recorder import record_mapping - from reverse_sync.mdx_block_parser import parse_mdx_blocks + from mdx_to_storage.parser import parse_mdx_blocks # 1. XHTML mapping + fragment 추출 xhtml_mappings = record_mapping(page_xhtml_text) @@ -224,19 +241,49 @@ def build_sidecar( child_ids.update(m.children) top_mappings = [m for m in xhtml_mappings if m.block_id not in child_ids] - # 3. MDX content 블록 (frontmatter, empty, import 제외) - mdx_content_blocks = [b for b in mdx_blocks if b.type not in NON_CONTENT_TYPES] + # 3. MDX content 블록 — NON_CONTENT_TYPES 제외, 원본 인덱스 보존 + mdx_content_indexed = [ + (i, b) for i, b in enumerate(mdx_blocks) if b.type not in NON_CONTENT_TYPES + ] - # 4. Block 생성 — fragment와 top-level mapping을 정렬 + # 4. MDX H1 헤딩(페이지 제목) 건너뜀 + # forward converter가 MDX 첫 줄에 `# <페이지 제목>`을 자동 생성하며, + # 이 블록은 Confluence XHTML의 페이지 제목(본문 외부)에 해당한다. + mdx_ptr = 0 + while (mdx_ptr < len(mdx_content_indexed) + and mdx_content_indexed[mdx_ptr][1].type == 'heading' + and mdx_content_indexed[mdx_ptr][1].content.startswith('# ')): + mdx_ptr += 1 + + # 5. type-compatible two-pointer 매칭으로 fragment-MDX block 쌍 생성 sidecar_blocks: List[SidecarBlock] = [] for i, fragment in enumerate(frag_result.fragments): xpath = top_mappings[i].xhtml_xpath if i < len(top_mappings) else f"unknown[{i}]" + mapping = top_mappings[i] if i < len(top_mappings) else None + + mdx_block = None + if mapping is not None and not _should_skip_xhtml(mapping): + # 빈 paragraph: MDX 대응 없음 + if mapping.xhtml_plain_text.strip() or mapping.type != 'paragraph': + if mdx_ptr < len(mdx_content_indexed): + _, candidate = mdx_content_indexed[mdx_ptr] + if _type_compatible(mapping.type, candidate.type): + mdx_block = candidate + mdx_ptr += 1 + elif mapping.type == 'heading': + # heading lookahead: 현재 MDX 블록이 heading이 아닌 경우 + # content 매칭 heading을 전방 탐색하여 구조 정렬 복원 + la_ptr = _heading_lookahead( + mapping.xhtml_plain_text, mdx_content_indexed, mdx_ptr) + if la_ptr is not None: + mdx_ptr = la_ptr + _, candidate = mdx_content_indexed[mdx_ptr] + mdx_block = candidate + mdx_ptr += 1 + # else: 타입 불일치 → XHTML 블록이 MDX 출력을 생성하지 않음 - # 순차 1:1 대응 (향후 block alignment로 개선) - mdx_block = mdx_content_blocks[i] if i < len(mdx_content_blocks) else None mdx_hash = sha256_text(mdx_block.content) if mdx_block else "" mdx_range = (mdx_block.line_start, mdx_block.line_end) if mdx_block else (0, 0) - mapping = top_mappings[i] if i < len(top_mappings) else None sidecar_blocks.append( SidecarBlock( @@ -437,7 +484,7 @@ def load_sidecar(path: Path) -> RoundtripSidecar: # XHTML record_mapping type → 호환 MDX parse_mdx type _TYPE_COMPAT: Dict[str, frozenset] = { 'heading': frozenset({'heading'}), - 'paragraph': frozenset({'paragraph'}), + 'paragraph': frozenset({'paragraph', 'list'}), 'list': frozenset({'list'}), 'code': frozenset({'code_block'}), 'table': frozenset({'table', 'html_block'}), @@ -463,6 +510,38 @@ def _type_compatible(xhtml_type: str, mdx_type: str) -> bool: return mdx_type in _TYPE_COMPAT.get(xhtml_type, frozenset()) +def _normalize_heading_text(text: str) -> str: + """heading 텍스트에서 `#` prefix와 앞뒤 공백을 제거한다.""" + return text.lstrip('#').strip() + + +def _heading_lookahead( + xhtml_plain: str, + mdx_content_indexed: List, + mdx_ptr: int, + lookahead_limit: int = 20, +) -> Optional[int]: + """XHTML heading의 plain text와 content-matching MDX heading을 lookahead로 탐색한다. + + XHTML heading이 타입 불일치(MISS)일 때 전방 탐색으로 구조적 정렬을 복원한다. + 두 텍스트 중 하나가 다른 텍스트에 포함(substring)되면 일치로 판단한다. + + Returns: + 일치하는 MDX block의 포인터 인덱스, 없으면 None + """ + xhtml_norm = _normalize_heading_text(xhtml_plain) + if len(xhtml_norm) < 8: + return None + end = min(mdx_ptr + lookahead_limit, len(mdx_content_indexed)) + for ptr in range(mdx_ptr, end): + _, candidate = mdx_content_indexed[ptr] + if candidate.type == 'heading': + mdx_norm = _normalize_heading_text(candidate.content) + if xhtml_norm in mdx_norm or mdx_norm in xhtml_norm: + return ptr + return None + + def _align_children( xm: Any, @@ -667,6 +746,14 @@ def generate_sidecar_mapping( mdx_idx, mdx_block = mdx_content_indexed[mdx_ptr] + if not _type_compatible(xm.type, mdx_block.type) and xm.type == 'heading': + # heading lookahead: 현재 MDX 블록이 heading이 아닌 경우 + # content 매칭 heading을 전방 탐색하여 구조 정렬 복원 + la_ptr = _heading_lookahead(xm.xhtml_plain_text, mdx_content_indexed, mdx_ptr) + if la_ptr is not None: + mdx_ptr = la_ptr + mdx_idx, mdx_block = mdx_content_indexed[mdx_ptr] + if _type_compatible(xm.type, mdx_block.type): entry: Dict[str, Any] = { 'xhtml_xpath': xm.xhtml_xpath, diff --git a/confluence-mdx/bin/reverse_sync_cli.py b/confluence-mdx/bin/reverse_sync_cli.py index 20d32a68c..d6234990e 100755 --- a/confluence-mdx/bin/reverse_sync_cli.py +++ b/confluence-mdx/bin/reverse_sync_cli.py @@ -350,49 +350,20 @@ def run_verify( (var_dir / 'reverse-sync.mapping.original.yaml').write_text( yaml.dump(original_mapping_data, allow_unicode=True, default_flow_style=False)) - # Step 3.5: Sidecar mapping 생성 + 인덱스 구축 + # Step 3.5: Roundtrip sidecar v3 구축 — mapping.yaml 재생성 없이 v3 경로로 동작 from reverse_sync.sidecar import ( - SidecarEntry, SidecarChildEntry, generate_sidecar_mapping, - build_mdx_to_sidecar_index, build_xpath_to_mapping, + build_xpath_to_mapping, build_sidecar, + load_page_lost_info, ) - # forward converter가 생성한 mapping.yaml에서 lost_info를 보존 - existing_mapping = var_dir / 'mapping.yaml' - existing_lost_info = None - if existing_mapping.exists(): - existing_data = yaml.safe_load(existing_mapping.read_text()) or {} - existing_lost_info = existing_data.get('lost_info') or None - sidecar_yaml = generate_sidecar_mapping( - xhtml, original_mdx, page_id, lost_infos=existing_lost_info) - (var_dir / 'mapping.yaml').write_text(sidecar_yaml) - sidecar_data = yaml.safe_load(sidecar_yaml) or {} - page_lost_info = sidecar_data.get('lost_info', {}) - sidecar_entries = [] - for item in sidecar_data.get('mappings', []): - children = [ - SidecarChildEntry( - xhtml_xpath=ch.get('xhtml_xpath', ''), - xhtml_block_id=ch.get('xhtml_block_id', ''), - mdx_line_start=ch.get('mdx_line_start', 0), - mdx_line_end=ch.get('mdx_line_end', 0), - ) - for ch in item.get('children', []) - ] - sidecar_entries.append(SidecarEntry( - xhtml_xpath=item['xhtml_xpath'], - xhtml_type=item.get('xhtml_type', ''), - mdx_blocks=item.get('mdx_blocks', []), - mdx_line_start=item.get('mdx_line_start', 0), - mdx_line_end=item.get('mdx_line_end', 0), - children=children, - )) - mdx_to_sidecar = build_mdx_to_sidecar_index(sidecar_entries) + # forward converter가 생성한 mapping.yaml에서 lost_info만 로드 + page_lost_info = load_page_lost_info(str(var_dir / 'mapping.yaml')) roundtrip_sidecar = build_sidecar(xhtml, original_mdx, page_id=page_id) xpath_to_mapping = build_xpath_to_mapping(original_mappings) - # Step 4: XHTML 패치 → patched.xhtml 저장 + # Step 4: XHTML 패치 → patched.xhtml 저장 (mdx_to_sidecar=None → v3 자동 구축) patches = build_patches(changes, original_blocks, improved_blocks, - original_mappings, mdx_to_sidecar, xpath_to_mapping, + original_mappings, None, xpath_to_mapping, alignment, page_lost_info=page_lost_info, roundtrip_sidecar=roundtrip_sidecar) patched_xhtml = patch_xhtml(xhtml, patches) diff --git a/confluence-mdx/tests/test_reverse_sync_phase3.py b/confluence-mdx/tests/test_reverse_sync_phase3.py new file mode 100644 index 000000000..c8642951a --- /dev/null +++ b/confluence-mdx/tests/test_reverse_sync_phase3.py @@ -0,0 +1,79 @@ +"""Phase 3 inline-anchor/list reconstruction tests (v3 sidecar 경로).""" + +from pathlib import Path + +import pytest + +from reverse_sync.block_diff import diff_blocks +from reverse_sync.mapping_recorder import record_mapping +from reverse_sync.mdx_block_parser import parse_mdx_blocks +from reverse_sync.patch_builder import build_patches +from reverse_sync.sidecar import ( + build_sidecar, + build_xpath_to_mapping, +) +from reverse_sync.xhtml_patcher import patch_xhtml + + +def _build_patched_xhtml(xhtml: str, original_mdx: str, improved_mdx: str): + """v3 sidecar 경로로 XHTML 패치를 생성한다 (mdx_to_sidecar 없음).""" + original_blocks = parse_mdx_blocks(original_mdx) + improved_blocks = parse_mdx_blocks(improved_mdx) + changes, alignment = diff_blocks(original_blocks, improved_blocks) + + mappings = record_mapping(xhtml) + roundtrip_sidecar = build_sidecar(xhtml, original_mdx) + xpath_to_mapping = build_xpath_to_mapping(mappings) + + patches = build_patches( + changes, + original_blocks, + improved_blocks, + mappings, + xpath_to_mapping=xpath_to_mapping, + alignment=alignment, + roundtrip_sidecar=roundtrip_sidecar, + ) + return patches, patch_xhtml(xhtml, patches) + + +def test_list_with_inline_image_uses_replace_fragment_reconstruction(): + xhtml = ( + '
  • Dry Run : ' + '' + '버튼을 클릭합니다.

' + ) + original_mdx = '* **Dry Run :** sample.png버튼을 클릭합니다.\n' + improved_mdx = '* **Dry Run :** sample.png버튼을 다시 클릭합니다.\n' + + patches, patched = _build_patched_xhtml(xhtml, original_mdx, improved_mdx) + + assert len(patches) == 1 + assert patches[0]["action"] == "replace_fragment" + assert "" in patched + assert "버튼을 다시 클릭합니다." in patched + + +class Test544376004: + @pytest.fixture(autouse=True) + def require_fixture(self): + case_dir = Path(__file__).parent / "reverse-sync" / "544376004" + if not case_dir.is_dir(): + pytest.skip("reverse-sync/544376004 fixture not found") + + def test_preserves_double_space_and_inline_image(self): + case_dir = Path(__file__).parent / "reverse-sync" / "544376004" + xhtml = (case_dir / "page.xhtml").read_text(encoding="utf-8") + original_mdx = (case_dir / "original.mdx").read_text(encoding="utf-8") + improved_mdx = (case_dir / "improved.mdx").read_text(encoding="utf-8") + + patches, patched = _build_patched_xhtml(xhtml, original_mdx, improved_mdx) + + replace_patches = [patch for patch in patches if patch.get("action") == "replace_fragment"] + assert replace_patches, "Phase 3 list reconstruction should emit replace_fragment" + assert any(patch["xhtml_xpath"] == "ul[3]" for patch in replace_patches) + assert "Enable Attribute Synchronization : LDAP" in patched + assert ' Date: Mon, 16 Mar 2026 23:56:49 +0900 Subject: [PATCH 2/3] =?UTF-8?q?confluence-mdx:=20paragraph=E2=86=92list=20?= =?UTF-8?q?compat=20=EC=A0=9C=EA=B1=B0=20=EB=B0=8F=20=ED=85=8C=EC=8A=A4?= =?UTF-8?q?=ED=8A=B8=20=ED=8C=8C=EC=84=9C=20=EB=B6=88=EC=9D=BC=EC=B9=98=20?= =?UTF-8?q?=EC=88=98=EC=A0=95=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - sidecar.py: _TYPE_COMPAT에서 paragraph→list 호환성 제거합니다 heading lookahead만으로 구조 정렬이 충분히 복원되므로 불필요합니다 paragraph→list는 XHTML

가 MDX list 전략으로 처리되어 invalid XHTML 생성 위험이 있습니다 - test_reverse_sync_phase3.py: reverse_sync.mdx_block_parser → mdx_to_storage.parser 로 교체합니다 런타임 경로(sidecar.py, reverse_sync_cli.py)와 동일한 파서를 사용합니다 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/reverse_sync/sidecar.py | 2 +- confluence-mdx/tests/test_reverse_sync_phase3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluence-mdx/bin/reverse_sync/sidecar.py b/confluence-mdx/bin/reverse_sync/sidecar.py index 0ebc89068..5a58a489c 100644 --- a/confluence-mdx/bin/reverse_sync/sidecar.py +++ b/confluence-mdx/bin/reverse_sync/sidecar.py @@ -484,7 +484,7 @@ def load_sidecar(path: Path) -> RoundtripSidecar: # XHTML record_mapping type → 호환 MDX parse_mdx type _TYPE_COMPAT: Dict[str, frozenset] = { 'heading': frozenset({'heading'}), - 'paragraph': frozenset({'paragraph', 'list'}), + 'paragraph': frozenset({'paragraph'}), 'list': frozenset({'list'}), 'code': frozenset({'code_block'}), 'table': frozenset({'table', 'html_block'}), diff --git a/confluence-mdx/tests/test_reverse_sync_phase3.py b/confluence-mdx/tests/test_reverse_sync_phase3.py index c8642951a..4f740e45a 100644 --- a/confluence-mdx/tests/test_reverse_sync_phase3.py +++ b/confluence-mdx/tests/test_reverse_sync_phase3.py @@ -6,7 +6,7 @@ from reverse_sync.block_diff import diff_blocks from reverse_sync.mapping_recorder import record_mapping -from reverse_sync.mdx_block_parser import parse_mdx_blocks +from mdx_to_storage.parser import parse_mdx_blocks from reverse_sync.patch_builder import build_patches from reverse_sync.sidecar import ( build_sidecar, From 6430fb8916f87c110621d060070050422e40087f Mon Sep 17 00:00:00 2001 From: JK Date: Tue, 17 Mar 2026 00:19:52 +0900 Subject: [PATCH 3/3] =?UTF-8?q?confluence-mdx:=20heading=20lookahead=20?= =?UTF-8?q?=EC=97=A3=EC=A7=80=20=EC=BC=80=EC=9D=B4=EC=8A=A4=20=ED=9A=8C?= =?UTF-8?q?=EA=B7=80=20=ED=85=8C=EC=8A=A4=ED=8A=B8=EB=A5=BC=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TestHeadingLookahead: _heading_lookahead() 유닛 테스트 6건 추가합니다 - 기본 매칭 동작 검증 - 짧은 heading(8자 미만) None 반환 명시 - 유사 heading 다수 존재 시 첫 번째 매칭 반환 동작 문서화 - ptr 전진 후 두 번째 섹션 탐색 정확도 - lookahead_limit 경계 밖 heading 미반환 - substring 양방향 매칭 허용 Co-Authored-By: Claude Sonnet 4.6 --- .../tests/test_reverse_sync_sidecar.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/confluence-mdx/tests/test_reverse_sync_sidecar.py b/confluence-mdx/tests/test_reverse_sync_sidecar.py index b860f2002..acd9d4365 100644 --- a/confluence-mdx/tests/test_reverse_sync_sidecar.py +++ b/confluence-mdx/tests/test_reverse_sync_sidecar.py @@ -542,3 +542,92 @@ def test_container_with_multiple_mdx_blocks(self, tmp_path): for idx in [3, 5, 7, 9]: result = find_mapping_by_sidecar(idx, mdx_to_sidecar, xpath_to_mapping) assert result is container + + +class TestHeadingLookahead: + """_heading_lookahead 엣지 케이스 회귀 테스트.""" + + from dataclasses import dataclass + + @staticmethod + def _make_mdx(blocks): + """(type, content, line) 튜플 목록을 mdx_content_indexed 형식으로 변환한다.""" + from dataclasses import dataclass + + @dataclass + class FakeBlock: + type: str + content: str + line_start: int + line_end: int + + return [(i, FakeBlock(t, c, ln, ln)) for i, (t, c, ln) in enumerate(blocks)] + + def test_basic_match(self): + """heading XHTML이 전방 MDX heading과 content로 매칭된다.""" + from reverse_sync.sidecar import _heading_lookahead + mdx = self._make_mdx([ + ('list', '1. some item', 1), + ('heading', '## 에이전트를 통한 서버 접속', 3), + ]) + result = _heading_lookahead('에이전트를 통한 서버 접속', mdx, 0) + assert result == 1 + + def test_short_heading_skipped(self): + """8자 미만 heading은 false positive 방지를 위해 None을 반환한다.""" + from reverse_sync.sidecar import _heading_lookahead + mdx = self._make_mdx([ + ('list', '1. item', 1), + ('heading', '## 설정', 3), + ]) + result = _heading_lookahead('설정', mdx, 0) + assert result is None + + def test_similar_headings_picks_first(self): + """유사한 heading이 여러 개일 때 전방 탐색에서 가장 가까운 것을 반환한다.""" + from reverse_sync.sidecar import _heading_lookahead + mdx = self._make_mdx([ + ('list', '1. item', 1), + ('heading', '## QueryPie Agent에 로그인하기', 3), + ('list', '2. item', 5), + ('heading', '## QueryPie Agent에 로그인하기 (Mac)', 7), + ]) + # XHTML heading이 두 번째와 매핑되어야 하는 케이스라도 + # 현재 구현은 첫 번째를 반환한다 — 이 동작을 명시적으로 문서화 + result = _heading_lookahead('QueryPie Agent에 로그인하기', mdx, 0) + assert result == 1 # 첫 번째 매칭 반환 + + def test_repeated_heading_ptr_advances_correctly(self): + """반복된 heading이 있을 때 두 번째 섹션은 ptr를 앞으로 시작한다.""" + from reverse_sync.sidecar import _heading_lookahead + mdx = self._make_mdx([ + ('heading', '## 섹션 A — 서버 접속 방법', 1), + ('list', '1. item in A', 3), + ('heading', '## 섹션 B — 서버 접속 방법', 5), + ('list', '1. item in B', 7), + ]) + # ptr=0에서 탐색: 섹션 A가 가장 먼저 매칭 + r1 = _heading_lookahead('섹션 A — 서버 접속 방법', mdx, 0) + assert r1 == 0 + # ptr=2에서 탐색: 섹션 B가 매칭 (섹션 A는 이미 소비됨) + r2 = _heading_lookahead('섹션 B — 서버 접속 방법', mdx, 2) + assert r2 == 2 + + def test_no_match_beyond_lookahead_limit(self): + """lookahead_limit 밖의 heading은 반환되지 않는다.""" + from reverse_sync.sidecar import _heading_lookahead + filler = [('list', f'{i}. item', i * 2) for i in range(25)] + mdx = self._make_mdx(filler + [('heading', '## 에이전트를 통한 서버 접속', 60)]) + result = _heading_lookahead('에이전트를 통한 서버 접속', mdx, 0, lookahead_limit=20) + assert result is None # 25번째 블록은 limit 밖 + + def test_substring_asymmetry(self): + """XHTML이 MDX의 substring이어도, MDX가 XHTML의 substring이어도 매칭된다.""" + from reverse_sync.sidecar import _heading_lookahead + mdx = self._make_mdx([ + ('list', '1. item', 1), + ('heading', '### QueryPie Agent에 로그인하기 ', 3), + ]) + # XHTML plain ⊂ MDX content + r = _heading_lookahead('QueryPie Agent에 로그인하기', mdx, 0) + assert r == 1