Skip to content
49 changes: 44 additions & 5 deletions confluence-mdx/bin/reverse_sync/patch_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,23 +147,51 @@ def _find_roundtrip_sidecar_block(
return xpath_match

# identity fallback: mapping.yaml이 어긋난 경우 hash 기반으로 재탐색
# xpath 태그 타입(p, ul, ol, table 등) 일치하는 경우에만 반환하여 cross-type 오매칭 방지
# block family(paragraph/list/table 등) 일치하는 경우에만 반환하여 cross-type 오매칭 방지
if identity_block is not None and identity_block.content:
identity_match = find_sidecar_block_by_identity(
roundtrip_sidecar.blocks,
sha256_text(identity_block.content),
(identity_block.line_start, identity_block.line_end),
)
if identity_match is not None:
mapping_tag = mapping.xhtml_xpath.split('[')[0] if mapping else ''
identity_tag = identity_match.xhtml_xpath.split('[')[0] if identity_match.xhtml_xpath else ''
if mapping_tag == identity_tag:
if mapping is not None and _mapping_block_family(mapping) == _xpath_block_family(identity_match.xhtml_xpath):
return identity_match

# xpath 결과를 마지막 fallback으로 반환 (hash 불일치라도 없는 것보다 나음)
return xpath_match


def _xpath_root_tag(xpath: str) -> str:
"""Extract the top-level tag portion from an xpath-like storage path."""
head = xpath.split("/", 1)[0]
return head.split("[", 1)[0]


def _xpath_block_family(xpath: str) -> str:
"""xpath의 최상위 태그를 block family 문자열로 변환한다.

알 수 없는 태그(pre, blockquote, ac:* 등)는 raw tag를 반환하여
cross-type 보호 목적상 보수적으로 동작한다.
"""
root_tag = _xpath_root_tag(xpath)
if root_tag == "p":
return "paragraph"
if root_tag in {"ul", "ol"}:
return "list"
if root_tag == "table":
return "table"
if root_tag.startswith("h") and root_tag[1:].isdigit():
return "heading"
return root_tag


def _mapping_block_family(mapping: BlockMapping) -> str:
if mapping.type in {"paragraph", "list", "heading", "table"}:
return mapping.type
return _xpath_block_family(mapping.xhtml_xpath)


def _flush_containing_changes(
containing_changes: dict,
used_ids: 'set | None' = None,
Expand Down Expand Up @@ -370,9 +398,20 @@ def _mark_used(block_id: str, m: BlockMapping):
list_sidecar = _find_roundtrip_sidecar_block(
change, mapping, roundtrip_sidecar, xpath_to_sidecar_block,
)
# roundtrip sidecar가 있지만 이 list에 매칭되는 block이 없을 때
# (cross-type 거부 또는 mapping drift) clean list는 whole-fragment 재생성으로 처리
should_replace_clean_list = (
mapping is not None
and not _contains_preserved_anchor_markup(mapping.xhtml_text)
and roundtrip_sidecar is not None
and list_sidecar is None
)
if (mapping is not None
and not _contains_preserved_anchor_markup(mapping.xhtml_text)
and sidecar_block_requires_reconstruction(list_sidecar)):
and (
sidecar_block_requires_reconstruction(list_sidecar)
or should_replace_clean_list
)):
_mark_used(mapping.block_id, mapping)
patches.append(
_build_replace_fragment_patch(
Expand Down
62 changes: 60 additions & 2 deletions confluence-mdx/bin/reverse_sync/rehydrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,45 @@ def _mdx_block_to_parser_block(mdx_block: MdxBlock) -> Block:
return Block(type=block_type, content=mdx_block.content)


def _extract_frontmatter_title(mdx_blocks: List[MdxBlock]) -> str:
for block in mdx_blocks:
if block.type != "frontmatter":
continue
for raw_line in block.content.splitlines():
line = raw_line.strip()
if not line.startswith("title:"):
continue
value = line.split(":", 1)[1].strip()
if (
len(value) >= 2
and value[0] == value[-1]
and value[0] in {"'", '"'}
):
return value[1:-1]
return value
return ""


def _content_blocks_for_splice(mdx_text: str) -> List[MdxBlock]:
mdx_blocks = parse_mdx_blocks(mdx_text)
content_blocks = [b for b in mdx_blocks if b.type not in _NON_CONTENT]

if not content_blocks:
return content_blocks

frontmatter_title = _extract_frontmatter_title(mdx_blocks)
first_block = content_blocks[0]
if (
first_block.type == "heading"
and frontmatter_title
and first_block.content.startswith("# ")
and first_block.content[2:].strip() == frontmatter_title
):
return content_blocks[1:]

return content_blocks


def splice_rehydrate_xhtml(
mdx_text: str,
sidecar: RoundtripSidecar,
Expand All @@ -71,8 +110,27 @@ def splice_rehydrate_xhtml(
- 해시가 일치하는 블록: 원본 xhtml_fragment 사용
- 해시가 불일치하는 블록: emitter로 재생성
"""
mdx_blocks = parse_mdx_blocks(mdx_text)
content_blocks = [b for b in mdx_blocks if b.type not in _NON_CONTENT]
if sidecar_matches_mdx(mdx_text, sidecar):
details: List[dict] = []
matched_count = 0
for i, sb in enumerate(sidecar.blocks):
method = "sidecar" if sb.mdx_content_hash else "preserved"
if method == "sidecar":
matched_count += 1
details.append({
"index": i,
"method": method,
"xpath": sb.xhtml_xpath,
})
return SpliceResult(
xhtml=sidecar.reassemble_xhtml(),
matched_count=matched_count,
emitted_count=0,
total_blocks=len(sidecar.blocks),
block_details=details,
)

content_blocks = _content_blocks_for_splice(mdx_text)

matched_count = 0
emitted_count = 0
Expand Down
17 changes: 17 additions & 0 deletions confluence-mdx/tests/test_reverse_sync_byte_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,23 @@ def test_verify_case_dir_splice_fails_when_sidecar_missing(tmp_path):
assert result.reason.startswith("sidecar_missing")


def test_verify_case_dir_splice_skips_page_title_heading(tmp_path):
case = tmp_path / "100"
case.mkdir()
xhtml = "<h2>Overview</h2>\n<p>Body</p>"
mdx = "---\ntitle: T\n---\n\n# T\n\n### Overview\n\nBody\n"
(case / "expected.mdx").write_text(mdx, encoding="utf-8")
(case / "page.xhtml").write_text(xhtml, encoding="utf-8")
write_sidecar(build_sidecar(xhtml, mdx, page_id="100"), case / "expected.roundtrip.json")

result = verify_case_dir_splice(case)

assert result.passed is True
assert result.reason == "byte_equal_splice"
assert result.matched_count == 2
assert result.emitted_count == 0


class TestSpliceRealTestcases:
"""실제 testcase에 대한 forced-splice byte-equal 검증."""

Expand Down
117 changes: 117 additions & 0 deletions confluence-mdx/tests/test_reverse_sync_patch_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
RoundtripSidecar,
SidecarBlock,
SidecarEntry,
sha256_text,
)
from text_utils import normalize_mdx_to_plain
from reverse_sync.patch_builder import (
_find_roundtrip_sidecar_block,
_flush_containing_changes,
_resolve_mapping_for_change,
build_patches,
Expand Down Expand Up @@ -421,6 +423,121 @@ def test_roundtrip_sidecar_non_paragraph_reconstruction_stays_modify(self):
assert patches[0].get('action', 'modify') == 'modify'
assert 'new_element_xhtml' not in patches[0]

def test_roundtrip_identity_fallback_rejects_cross_type_sidecar_block(self):
mapping = _make_mapping('m1', 'same text', xpath='p[6]')
change = _make_change(0, 'same text', 'updated text')
roundtrip_sidecar = _make_roundtrip_sidecar([
SidecarBlock(
0,
'table[2]',
'<table><tr><td>same text</td></tr></table>',
sha256_text(change.old_block.content),
(change.old_block.line_start, change.old_block.line_end),
)
])

sidecar_block = _find_roundtrip_sidecar_block(
change,
mapping,
roundtrip_sidecar,
{block.xhtml_xpath: block for block in roundtrip_sidecar.blocks},
)

assert sidecar_block is None

def test_list_roundtrip_identity_fallback_rejects_cross_type_mapping(self):
m1 = _make_mapping('m1', 'same text', xpath='ul[1]', type_='list')
m1.xhtml_text = '<ul><li><p>same text</p></li></ul>'
mappings = [m1]
xpath_to_mapping = {m.xhtml_xpath: m for m in mappings}
mdx_to_sidecar = self._setup_sidecar('ul[1]', 0)
change = _make_change(0, '- same text', '- updated text', type_='list')
roundtrip_sidecar = _make_roundtrip_sidecar([
SidecarBlock(
0,
'table[2]',
'<table><tr><td>same text</td></tr></table>',
sha256_text(change.old_block.content),
(change.old_block.line_start, change.old_block.line_end),
)
])

patches = build_patches(
[change], [change.old_block], [change.new_block],
mappings, mdx_to_sidecar, xpath_to_mapping,
roundtrip_sidecar=roundtrip_sidecar)

assert len(patches) == 1
assert patches[0]['action'] == 'replace_fragment'
assert patches[0]['xhtml_xpath'] == 'ul[1]'

def test_roundtrip_identity_fallback_accepts_ul_ol_same_list_family(self):
mapping = _make_mapping('m1', 'same text', xpath='ul[1]', type_='list')
change = _make_change(0, '- same text', '- updated text', type_='list')
roundtrip_sidecar = _make_roundtrip_sidecar([
SidecarBlock(
0,
'ol[2]',
'<ol><li><p>same text</p></li></ol>',
sha256_text(change.old_block.content),
(change.old_block.line_start, change.old_block.line_end),
)
])

sidecar_block = _find_roundtrip_sidecar_block(
change,
mapping,
roundtrip_sidecar,
{block.xhtml_xpath: block for block in roundtrip_sidecar.blocks},
)

assert sidecar_block is not None
assert sidecar_block.xhtml_xpath == 'ol[2]'

def test_roundtrip_identity_fallback_accepts_heading_family(self):
mapping = _make_mapping('m1', 'same heading', xpath='h2[1]', type_='heading')
change = _make_change(0, '## same heading', '## updated heading', type_='heading')
roundtrip_sidecar = _make_roundtrip_sidecar([
SidecarBlock(
0,
'h3[4]',
'<h3>same heading</h3>',
sha256_text(change.old_block.content),
(change.old_block.line_start, change.old_block.line_end),
)
])

sidecar_block = _find_roundtrip_sidecar_block(
change,
mapping,
roundtrip_sidecar,
{block.xhtml_xpath: block for block in roundtrip_sidecar.blocks},
)

assert sidecar_block is not None
assert sidecar_block.xhtml_xpath == 'h3[4]'

def test_roundtrip_identity_fallback_does_not_guess_without_mapping(self):
change = _make_change(0, '- same text', '- updated text', type_='list')
roundtrip_sidecar = _make_roundtrip_sidecar([
SidecarBlock(
0,
'ol[2]',
'<ol><li><p>same text</p></li></ol>',
sha256_text(change.old_block.content),
(change.old_block.line_start, change.old_block.line_end),
)
])

sidecar_block = _find_roundtrip_sidecar_block(
change,
None,
roundtrip_sidecar,
{block.xhtml_xpath: block for block in roundtrip_sidecar.blocks},
)

assert sidecar_block is None

# NON_CONTENT_TYPES 스킵
def test_skips_non_content_types(self):
m1 = _make_mapping('m1', 'text', xpath='p[1]')
Expand Down
Loading
Loading