From 21e0b578e6808023b610ef5aa2f4a703787bb527 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:20:14 +0900
Subject: [PATCH 01/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20golden=20te?=
=?UTF-8?q?st=20=E2=80=94=20=ED=8C=8C=EC=84=9C=20=EB=B6=88=EC=9D=BC?=
=?UTF-8?q?=EC=B9=98=20=EC=88=98=EC=A0=95=20=EB=B0=8F=20inline-anchor=20?=
=?UTF-8?q?=EC=BC=80=EC=9D=B4=EC=8A=A4=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
...est_reverse_sync_reconstruction_goldens.py | 62 ++++++++++++++++++-
1 file changed, 59 insertions(+), 3 deletions(-)
diff --git a/confluence-mdx/tests/test_reverse_sync_reconstruction_goldens.py b/confluence-mdx/tests/test_reverse_sync_reconstruction_goldens.py
index 650c019ee..288be0846 100644
--- a/confluence-mdx/tests/test_reverse_sync_reconstruction_goldens.py
+++ b/confluence-mdx/tests/test_reverse_sync_reconstruction_goldens.py
@@ -6,7 +6,7 @@
from reverse_sync.block_diff import diff_blocks
from reverse_sync.mapping_recorder import record_mapping
-from reverse_sync.mdx_block_parser import parse_mdx_blocks
+from mdx_to_storage.parser import parse_mdx_blocks
from reverse_sync.patch_builder import build_patches
from reverse_sync.sidecar import (
SidecarEntry,
@@ -23,8 +23,8 @@
def _run_pipeline_with_sidecar(xhtml: str, original_mdx: str, improved_mdx: str):
- original_blocks = parse_mdx_blocks(original_mdx)
- improved_blocks = parse_mdx_blocks(improved_mdx)
+ original_blocks = list(parse_mdx_blocks(original_mdx))
+ improved_blocks = list(parse_mdx_blocks(improved_mdx))
changes, alignment = diff_blocks(original_blocks, improved_blocks)
mappings = record_mapping(xhtml)
@@ -89,3 +89,59 @@ def test_544178405_paragraph_and_table_change(self):
case['xhtml'], case['original_mdx'], case['improved_mdx']
)
assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_1911652402_inline_anchor_paragraph(self):
+ case = _load_testcase('1911652402')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_544113141_list_with_trailing_image(self):
+ case = _load_testcase('544113141')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_544145591_list_change_with_inline_images(self):
+ case = _load_testcase('544145591')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_544377869_paragraph_with_link(self):
+ case = _load_testcase('544377869')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_568918170_paragraph_with_link(self):
+ case = _load_testcase('568918170')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_692355151_heading_change_with_link_para(self):
+ case = _load_testcase('692355151')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_880181257_list_with_nested_image(self):
+ case = _load_testcase('880181257')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
+
+ def test_883654669_list_with_image(self):
+ case = _load_testcase('883654669')
+ result = _run_pipeline_with_sidecar(
+ case['xhtml'], case['original_mdx'], case['improved_mdx']
+ )
+ assert normalize_fragment(result) == normalize_fragment(case['expected'])
From c952500132b994e5e24bb6e31e5479d740bf7769 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:23:51 +0900
Subject: [PATCH 02/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20sidecar=20a?=
=?UTF-8?q?nchor=20metadata=20=E2=80=94=20paragraph=20ac:image=20anchor=20?=
=?UTF-8?q?entry=20=EC=B6=94=EC=B6=9C=20=EA=B5=AC=ED=98=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
confluence-mdx/bin/reverse_sync/sidecar.py | 33 ++++++++++-
...test_reverse_sync_reconstruct_paragraph.py | 55 +++++++++++++++++++
2 files changed, 87 insertions(+), 1 deletion(-)
create mode 100644 confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
diff --git a/confluence-mdx/bin/reverse_sync/sidecar.py b/confluence-mdx/bin/reverse_sync/sidecar.py
index 4ca96d6d2..67aa6eaac 100644
--- a/confluence-mdx/bin/reverse_sync/sidecar.py
+++ b/confluence-mdx/bin/reverse_sync/sidecar.py
@@ -267,6 +267,37 @@ def build_sidecar(
return sidecar
+def _build_anchor_entries(fragment: str) -> list:
+ """fragment 내 p 요소 안의 ac:image를 anchor entry 목록으로 추출한다.
+
+ 각 anchor entry:
+ kind: "image"
+ offset: old_plain_text 기준 앞쪽 텍스트 길이 (삽입 위치)
+ raw_xhtml: ac:image 원본 XHTML 문자열
+
+ li 직속 자식 ac:image(p 밖)는 포함하지 않는다.
+ """
+ from bs4 import BeautifulSoup, NavigableString, Tag
+ soup = BeautifulSoup(fragment, 'html.parser')
+ anchors = []
+ for p in soup.find_all('p'):
+ offset = 0
+ for child in p.children:
+ if isinstance(child, NavigableString):
+ offset += len(str(child))
+ elif isinstance(child, Tag):
+ if child.name == 'ac:image':
+ anchors.append({
+ 'kind': 'image',
+ 'offset': offset,
+ 'raw_xhtml': str(child),
+ })
+ else:
+ # ac:link 등 텍스트를 포함하는 inline 요소는 텍스트 추출
+ offset += len(extract_plain_text(str(child)))
+ return anchors
+
+
def _build_reconstruction_metadata(
fragment: str,
mapping: BlockMapping | None,
@@ -280,7 +311,7 @@ def _build_reconstruction_metadata(
"old_plain_text": extract_plain_text(fragment),
}
if mapping.type == "paragraph":
- metadata["anchors"] = []
+ metadata["anchors"] = _build_anchor_entries(fragment)
elif mapping.type == "list":
metadata["ordered"] = mapping.xhtml_xpath.startswith("ol[")
metadata["items"] = []
diff --git a/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py b/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
new file mode 100644
index 000000000..0dc992d63
--- /dev/null
+++ b/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
@@ -0,0 +1,55 @@
+"""Phase 3 paragraph/list-item inline-anchor 재구성 테스트."""
+import pytest
+from reverse_sync.sidecar import _build_anchor_entries # noqa: import check
+
+
+class TestBuildAnchorEntries:
+ def test_empty_paragraph_returns_empty(self):
+ """ac:image 없는 단순 paragraph는 빈 anchors를 반환한다."""
+ fragment = 'Simple text without images.
'
+ anchors = _build_anchor_entries(fragment)
+ assert anchors == []
+
+ def test_paragraph_with_inline_image(self):
+ """paragraph 안 ac:image를 anchor로 추출한다."""
+ fragment = (
+ 'Text before '
+ ''
+ ' text after
'
+ )
+ anchors = _build_anchor_entries(fragment)
+ assert len(anchors) == 1
+ assert anchors[0]['kind'] == 'image'
+ assert anchors[0]['offset'] == len('Text before ')
+ assert 'ac:image' in anchors[0]['raw_xhtml']
+
+ def test_paragraph_with_multiple_images(self):
+ """여러 ac:image를 순서대로 추출한다."""
+ fragment = (
+ ''
+ ''
+ 'middle'
+ ''
+ '
'
+ )
+ anchors = _build_anchor_entries(fragment)
+ assert len(anchors) == 2
+ assert anchors[0]['offset'] == 0
+ assert anchors[1]['offset'] == len('middle')
+
+ def test_image_in_list_item_ignored(self):
+ """li 직속 자식 ac:image(p 밖)는 anchors에 포함하지 않는다."""
+ fragment = (
+ ''
+ 'List item text
'
+ ''
+ ''
+ )
+ anchors = _build_anchor_entries(fragment)
+ assert anchors == []
+
+ def test_no_paragraph_returns_empty(self):
+ """p 요소가 없는 fragment는 빈 anchors를 반환한다."""
+ fragment = 'Just a heading
'
+ anchors = _build_anchor_entries(fragment)
+ assert anchors == []
From 4c6a10a6fd94f4e13abfeebe27c0b6519bfb074a Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:30:12 +0900
Subject: [PATCH 03/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20sidecar=20?=
=?UTF-8?q?=5Fbuild=5Fanchor=5Fentries=20=E2=80=94=20recursive=3DFalse?=
=?UTF-8?q?=EB=A1=9C=20p=20=ED=83=90=EC=83=89=20=EB=B2=94=EC=9C=84=20?=
=?UTF-8?q?=EC=A0=9C=ED=95=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
confluence-mdx/bin/reverse_sync/sidecar.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/confluence-mdx/bin/reverse_sync/sidecar.py b/confluence-mdx/bin/reverse_sync/sidecar.py
index 67aa6eaac..366148b50 100644
--- a/confluence-mdx/bin/reverse_sync/sidecar.py
+++ b/confluence-mdx/bin/reverse_sync/sidecar.py
@@ -280,7 +280,7 @@ def _build_anchor_entries(fragment: str) -> list:
from bs4 import BeautifulSoup, NavigableString, Tag
soup = BeautifulSoup(fragment, 'html.parser')
anchors = []
- for p in soup.find_all('p'):
+ for p in soup.find_all('p', recursive=False):
offset = 0
for child in p.children:
if isinstance(child, NavigableString):
From c3c2a2cd53968ad6b1f7e6e67226aa01fe5273e3 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:32:49 +0900
Subject: [PATCH 04/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20reconstruct?=
=?UTF-8?q?ors=20=E2=80=94=20anchor=20offset=20=EB=A7=A4=ED=95=91=20?=
=?UTF-8?q?=EB=B0=8F=20DOM=20=EC=82=BD=EC=9E=85=20helper=20=EA=B5=AC?=
=?UTF-8?q?=ED=98=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
.../bin/reverse_sync/reconstructors.py | 159 ++++++++++++++++++
...test_reverse_sync_reconstruct_paragraph.py | 98 +++++++++++
2 files changed, 257 insertions(+)
create mode 100644 confluence-mdx/bin/reverse_sync/reconstructors.py
diff --git a/confluence-mdx/bin/reverse_sync/reconstructors.py b/confluence-mdx/bin/reverse_sync/reconstructors.py
new file mode 100644
index 000000000..015d491ba
--- /dev/null
+++ b/confluence-mdx/bin/reverse_sync/reconstructors.py
@@ -0,0 +1,159 @@
+"""Inline-anchor fragment reconstructors.
+
+Phase 3: paragraph/list item 내부 ac:image anchor 보존 재구성.
+anchor offset 매핑 + DOM 삽입 + fragment 재구성 공용 helper.
+"""
+from __future__ import annotations
+
+import difflib
+from typing import List
+
+from bs4 import BeautifulSoup, NavigableString, Tag
+
+from reverse_sync.xhtml_normalizer import extract_plain_text
+
+
+def map_anchor_offset(old_plain: str, new_plain: str, old_offset: int) -> int:
+ """old_plain에서의 anchor offset을 new_plain 기준 offset으로 변환한다.
+
+ difflib SequenceMatcher opcode를 사용해 old 좌표계를 new 좌표계로 매핑한다.
+ anchor offset은 해당 위치 앞의 텍스트 바이트 수다 (삽입 지점).
+
+ anchor 앞쪽 텍스트에 적용된 변경만 offset에 반영한다:
+ - equal: 그대로 유지
+ - replace: new 길이로 비례 매핑
+ - insert (i1==i2 <= old_offset): new 텍스트 길이를 더함
+ - delete: 삭제된 길이만큼 뺌
+ """
+ matcher = difflib.SequenceMatcher(None, old_plain, new_plain, autojunk=False)
+ new_offset = 0
+ consumed_old = 0
+
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+ if consumed_old >= old_offset:
+ break
+
+ if tag == 'equal':
+ take = min(i2, old_offset) - i1
+ if take > 0:
+ new_offset += take
+ consumed_old += take
+
+ elif tag == 'replace':
+ old_take = min(i2, old_offset) - i1
+ if old_take > 0:
+ old_len = i2 - i1
+ new_len = j2 - j1
+ ratio = old_take / old_len
+ new_offset += round(ratio * new_len)
+ consumed_old += old_take
+
+ elif tag == 'delete':
+ old_take = min(i2, old_offset) - i1
+ if old_take > 0:
+ consumed_old += old_take
+
+ elif tag == 'insert':
+ if i1 <= old_offset:
+ new_offset += j2 - j1
+
+ if consumed_old < old_offset:
+ new_offset += old_offset - consumed_old
+
+ return new_offset
+
+
+def insert_anchor_at_offset(p_element: Tag, offset: int, anchor_xhtml: str) -> None:
+ """p 요소 내 offset 위치에 anchor_xhtml을 DOM 삽입한다 (in-place).
+
+ offset은 extract_plain_text() 기준의 문자 수다.
+ 텍스트 노드를 순회하며 올바른 텍스트 노드를 분할하고 anchor를 삽입한다.
+ """
+ anchor_soup = BeautifulSoup(anchor_xhtml, 'html.parser')
+ anchor_nodes = list(anchor_soup.children)
+
+ remaining = offset
+ children = list(p_element.children)
+
+ for i, child in enumerate(children):
+ if isinstance(child, NavigableString):
+ text_len = len(str(child))
+ if remaining <= text_len:
+ text = str(child)
+ before = text[:remaining]
+ after = text[remaining:]
+
+ # Replace original text node with the "before" part
+ child.replace_with(NavigableString(before))
+
+ ref_node = p_element.find(string=before) if before else None
+
+ for anchor_node in reversed(anchor_nodes):
+ cloned = BeautifulSoup(str(anchor_node), 'html.parser')
+ for n in list(cloned.children):
+ if ref_node is not None:
+ ref_node.insert_after(n.extract())
+ else:
+ p_element.insert(0, n.extract())
+
+ if after:
+ anchor_node_last = p_element.find('ac:image')
+ if anchor_node_last:
+ anchor_node_last.insert_after(NavigableString(after))
+ else:
+ p_element.append(NavigableString(after))
+ return
+ else:
+ remaining -= text_len
+ elif isinstance(child, Tag):
+ if child.name == 'ac:image':
+ pass
+ else:
+ child_text = extract_plain_text(str(child))
+ if remaining <= len(child_text):
+ for anchor_node in reversed(anchor_nodes):
+ cloned = BeautifulSoup(str(anchor_node), 'html.parser')
+ for n in list(cloned.children):
+ child.insert_after(n.extract())
+ return
+ remaining -= len(child_text)
+
+ # offset이 모든 텍스트를 초과하면 끝에 추가
+ for anchor_node in anchor_nodes:
+ cloned = BeautifulSoup(str(anchor_node), 'html.parser')
+ for n in list(cloned.children):
+ p_element.append(n.extract())
+
+
+def reconstruct_inline_anchor_fragment(
+ old_fragment: str,
+ anchors: list,
+ new_fragment: str,
+) -> str:
+ """new_fragment에 원본 anchors를 offset 매핑하여 재삽입한다.
+
+ Args:
+ old_fragment: 원본 XHTML fragment (anchor 포함)
+ anchors: _build_anchor_entries()로 추출된 anchor entry 목록
+ new_fragment: emit_block()으로 생성된 새 XHTML fragment (anchor 없음)
+
+ Returns:
+ anchor가 재삽입된 new_fragment
+ """
+ if not anchors:
+ return new_fragment
+
+ old_plain = extract_plain_text(old_fragment)
+ new_plain = extract_plain_text(new_fragment)
+
+ soup = BeautifulSoup(new_fragment, 'html.parser')
+ p = soup.find('p')
+ if p is None:
+ return new_fragment
+
+ # offset을 역순으로 처리하여 앞쪽 삽입이 뒤쪽 offset에 영향 미치지 않게 함
+ for anchor in reversed(anchors):
+ new_offset = map_anchor_offset(old_plain, new_plain, anchor['offset'])
+ insert_anchor_at_offset(p, new_offset, anchor['raw_xhtml'])
+
+ return str(soup)
diff --git a/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py b/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
index 0dc992d63..e769d01dc 100644
--- a/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
+++ b/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
@@ -53,3 +53,101 @@ def test_no_paragraph_returns_empty(self):
fragment = 'Just a heading
'
anchors = _build_anchor_entries(fragment)
assert anchors == []
+
+
+class TestMapAnchorOffset:
+ def test_no_change_preserves_offset(self):
+ """텍스트 변경 없으면 offset 그대로 유지된다."""
+ from reverse_sync.reconstructors import map_anchor_offset
+ result = map_anchor_offset('hello world', 'hello world', 5)
+ assert result == 5
+
+ def test_insert_before_anchor_shifts_offset(self):
+ """anchor 앞에 텍스트 삽입 시 offset이 증가한다."""
+ from reverse_sync.reconstructors import map_anchor_offset
+ # old: "AB", anchor at 1 (between A and B)
+ # new: "XAB" (X inserted before A)
+ result = map_anchor_offset('AB', 'XAB', 1)
+ # After inserting X before A, old offset 1 (end of A) → new offset 2 (end of A in XAB)
+ assert result == 2
+
+ def test_delete_before_anchor_shifts_offset(self):
+ """anchor 앞 텍스트 삭제 시 offset이 감소한다."""
+ from reverse_sync.reconstructors import map_anchor_offset
+ # old: "XAB", anchor at 2 (end of XA)
+ # new: "AB" (X deleted)
+ result = map_anchor_offset('XAB', 'AB', 2)
+ # anchor was after "XA", now after "A" → offset 1
+ assert result == 1
+
+ def test_replace_before_anchor(self):
+ """anchor 앞 텍스트 교체 시 offset이 새 길이로 조정된다."""
+ from reverse_sync.reconstructors import map_anchor_offset
+ # old: "hello world", anchor at 5 (after "hello")
+ # new: "hi world" (hello→hi)
+ result = map_anchor_offset('hello world', 'hi world', 5)
+ # "hello" replaced by "hi" → anchor moves from 5 to 2
+ assert result == 2
+
+ def test_offset_at_end_stays_at_end(self):
+ """anchor가 텍스트 끝이면 새 끝으로 이동한다."""
+ from reverse_sync.reconstructors import map_anchor_offset
+ result = map_anchor_offset('hello', 'world2', 5)
+ assert result == 6
+
+
+class TestInsertAnchorAtOffset:
+ def test_insert_at_beginning(self):
+ """offset=0이면 첫 텍스트 노드 앞에 삽입된다."""
+ from reverse_sync.reconstructors import insert_anchor_at_offset
+ from bs4 import BeautifulSoup
+ soup = BeautifulSoup('hello
', 'html.parser')
+ p = soup.find('p')
+ anchor_html = ''
+ insert_anchor_at_offset(p, 0, anchor_html)
+ result = str(soup)
+ assert result.index('ac:image') < result.index('hello')
+
+ def test_insert_in_middle(self):
+ """offset이 중간이면 해당 텍스트 위치에 삽입된다."""
+ from reverse_sync.reconstructors import insert_anchor_at_offset
+ from bs4 import BeautifulSoup
+ soup = BeautifulSoup('helloworld
', 'html.parser')
+ p = soup.find('p')
+ anchor_html = ''
+ insert_anchor_at_offset(p, 5, anchor_html)
+ result = str(p)
+ # hello[image]world 순서여야 함
+ idx_hello = result.index('hello')
+ idx_image = result.index('ac:image')
+ idx_world = result.index('world')
+ assert idx_hello < idx_image < idx_world
+
+ def test_insert_at_end(self):
+ """offset이 텍스트 끝이면 마지막 텍스트 뒤에 삽입된다."""
+ from reverse_sync.reconstructors import insert_anchor_at_offset
+ from bs4 import BeautifulSoup
+ soup = BeautifulSoup('hello
', 'html.parser')
+ p = soup.find('p')
+ anchor_html = ''
+ insert_anchor_at_offset(p, 5, anchor_html)
+ result = str(p)
+ assert result.index('hello') < result.index('ac:image')
+
+
+class TestReconstructInlineAnchorFragment:
+ def test_basic_text_change_preserves_image(self):
+ """텍스트 변경 시 ac:image가 보존된다."""
+ from reverse_sync.reconstructors import reconstruct_inline_anchor_fragment
+ old_fragment = (
+ 'Old text '
+ ''
+ ' rest
'
+ )
+ new_fragment = 'New text rest
' # emitted from new MDX
+ anchors = [{'kind': 'image', 'offset': len('Old text '), 'raw_xhtml': ''}]
+
+ result = reconstruct_inline_anchor_fragment(old_fragment, anchors, new_fragment)
+ assert 'ac:image' in result
+ assert 'New text' in result
+ assert 'rest' in result
From 5be7ed07b0c57f9790cf80379b0d640f865cdc87 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:35:50 +0900
Subject: [PATCH 05/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20reconstruct?=
=?UTF-8?q?ors=20=E2=80=94=20insert=5Fanchor=5Fat=5Foffset=20pivot=20track?=
=?UTF-8?q?ing=20=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
find(string=) 검색 대신 직접 참조(pivot)를 유지하여 동일 텍스트가 여러 번
나타날 때 잘못된 노드를 찾는 문제와 find('ac:image')가 다른 이미지를 찾는
문제를 수정합니다.
Co-Authored-By: Claude Sonnet 4.6
---
.../bin/reverse_sync/reconstructors.py | 46 +++++++++----------
1 file changed, 22 insertions(+), 24 deletions(-)
diff --git a/confluence-mdx/bin/reverse_sync/reconstructors.py b/confluence-mdx/bin/reverse_sync/reconstructors.py
index 015d491ba..45627e0db 100644
--- a/confluence-mdx/bin/reverse_sync/reconstructors.py
+++ b/confluence-mdx/bin/reverse_sync/reconstructors.py
@@ -71,50 +71,48 @@ def insert_anchor_at_offset(p_element: Tag, offset: int, anchor_xhtml: str) -> N
"""
anchor_soup = BeautifulSoup(anchor_xhtml, 'html.parser')
anchor_nodes = list(anchor_soup.children)
+ if not anchor_nodes:
+ return
remaining = offset
children = list(p_element.children)
- for i, child in enumerate(children):
+ for child in children:
if isinstance(child, NavigableString):
text_len = len(str(child))
if remaining <= text_len:
text = str(child)
- before = text[:remaining]
- after = text[remaining:]
+ before_text = text[:remaining]
+ after_text = text[remaining:]
- # Replace original text node with the "before" part
- child.replace_with(NavigableString(before))
+ # 직접 참조를 유지하여 before_node 뒤에 순서대로 삽입
+ before_node = NavigableString(before_text)
+ child.replace_with(before_node)
- ref_node = p_element.find(string=before) if before else None
-
- for anchor_node in reversed(anchor_nodes):
+ pivot = before_node
+ for anchor_node in anchor_nodes:
cloned = BeautifulSoup(str(anchor_node), 'html.parser')
for n in list(cloned.children):
- if ref_node is not None:
- ref_node.insert_after(n.extract())
- else:
- p_element.insert(0, n.extract())
-
- if after:
- anchor_node_last = p_element.find('ac:image')
- if anchor_node_last:
- anchor_node_last.insert_after(NavigableString(after))
- else:
- p_element.append(NavigableString(after))
+ extracted = n.extract()
+ pivot.insert_after(extracted)
+ pivot = extracted
+
+ if after_text:
+ pivot.insert_after(NavigableString(after_text))
return
else:
remaining -= text_len
elif isinstance(child, Tag):
- if child.name == 'ac:image':
- pass
- else:
+ if child.name != 'ac:image':
child_text = extract_plain_text(str(child))
if remaining <= len(child_text):
- for anchor_node in reversed(anchor_nodes):
+ pivot = child
+ for anchor_node in anchor_nodes:
cloned = BeautifulSoup(str(anchor_node), 'html.parser')
for n in list(cloned.children):
- child.insert_after(n.extract())
+ extracted = n.extract()
+ pivot.insert_after(extracted)
+ pivot = extracted
return
remaining -= len(child_text)
From 03fd7aa0929f820647ac39f9e69b46b760657975 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:40:33 +0900
Subject: [PATCH 06/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20reconstruct?=
=?UTF-8?q?ors=20=E2=80=94=20=EB=AF=B8=EC=82=AC=EC=9A=A9=20typing.List=20i?=
=?UTF-8?q?mport=20=EC=A0=9C=EA=B1=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
confluence-mdx/bin/reverse_sync/reconstructors.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/confluence-mdx/bin/reverse_sync/reconstructors.py b/confluence-mdx/bin/reverse_sync/reconstructors.py
index 45627e0db..ac0af5aa9 100644
--- a/confluence-mdx/bin/reverse_sync/reconstructors.py
+++ b/confluence-mdx/bin/reverse_sync/reconstructors.py
@@ -6,7 +6,6 @@
from __future__ import annotations
import difflib
-from typing import List
from bs4 import BeautifulSoup, NavigableString, Tag
From 6a9b78972f7fc7fb57752dd21ff33347687830e3 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:44:28 +0900
Subject: [PATCH 07/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20patch=5Fbui?=
=?UTF-8?q?lder=20=E2=80=94=20inline-anchor=20reconstruction=20=EA=B2=BD?=
=?UTF-8?q?=EB=A1=9C=20=EC=97=B0=EB=8F=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
.../bin/reverse_sync/patch_builder.py | 32 ++++++++++
...test_reverse_sync_reconstruct_paragraph.py | 58 +++++++++++++++++++
2 files changed, 90 insertions(+)
diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py
index b3ba7ffec..61ef99d0d 100644
--- a/confluence-mdx/bin/reverse_sync/patch_builder.py
+++ b/confluence-mdx/bin/reverse_sync/patch_builder.py
@@ -18,6 +18,7 @@
)
from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings
from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml
+from reverse_sync.reconstructors import reconstruct_inline_anchor_fragment
from reverse_sync.list_patcher import (
build_list_item_patches,
)
@@ -371,6 +372,37 @@ def _mark_used(block_id: str, m: BlockMapping):
)
continue
+ # Phase 3: sidecar anchor가 있는 paragraph → inline-anchor reconstruction
+ # anchor entry에 offset/raw_xhtml이 없으면 text-transfer 경로로 폴백
+ _anchors = (
+ sidecar_block.reconstruction.get('anchors', [])
+ if sidecar_block is not None and sidecar_block.reconstruction is not None
+ else []
+ )
+ _valid_anchors = [
+ a for a in _anchors if 'offset' in a and 'raw_xhtml' in a
+ ]
+ if (sidecar_block is not None
+ and sidecar_block.reconstruction is not None
+ and sidecar_block.reconstruction.get('kind') == 'paragraph'
+ and _valid_anchors):
+ new_element = _emit_replacement_fragment(change.new_block)
+ reconstructed = reconstruct_inline_anchor_fragment(
+ mapping.xhtml_text,
+ _valid_anchors,
+ new_element,
+ )
+ block_lost = (mapping_lost_info or {}).get(mapping.block_id, {})
+ if block_lost:
+ from reverse_sync.lost_info_patcher import apply_lost_info
+ reconstructed = apply_lost_info(reconstructed, block_lost)
+ patches.append({
+ 'action': 'replace_fragment',
+ 'xhtml_xpath': mapping.xhtml_xpath,
+ 'new_element_xhtml': reconstructed,
+ })
+ continue
+
# 재생성 시 소실되는 XHTML 요소 포함 시 텍스트 전이로 폴백
if ('Original text '
+ ''
+ ' more text
'
+ )
+ original_mdx = '---\ntitle: test\n---\n\n# Test\n\nOriginal text more text\n'
+ improved_mdx = '---\ntitle: test\n---\n\n# Test\n\nChanged text more text\n'
+
+ orig_blocks = list(parse_mdx_blocks(original_mdx))
+ imp_blocks = list(parse_mdx_blocks(improved_mdx))
+ changes, alignment = diff_blocks(orig_blocks, imp_blocks)
+
+ mappings = record_mapping(xhtml)
+ roundtrip_sidecar = build_sidecar(xhtml, original_mdx)
+ sidecar_yaml = generate_sidecar_mapping(xhtml, original_mdx)
+ sidecar_data = yaml.safe_load(sidecar_yaml) or {}
+ sidecar_entries = [
+ SidecarEntry(
+ xhtml_xpath=item['xhtml_xpath'],
+ xhtml_type=item.get('xhtml_type', ''),
+ mdx_blocks=item.get('mdx_blocks', []),
+ mdx_line_start=item.get('mdx_line_start', 0),
+ mdx_line_end=item.get('mdx_line_end', 0),
+ )
+ for item in sidecar_data.get('mappings', [])
+ ]
+ mdx_to_sidecar = build_mdx_to_sidecar_index(sidecar_entries)
+ xpath_to_mapping = build_xpath_to_mapping(mappings)
+
+ patches = build_patches(
+ changes, orig_blocks, imp_blocks,
+ mappings, mdx_to_sidecar, xpath_to_mapping,
+ alignment, roundtrip_sidecar=roundtrip_sidecar,
+ )
+ result = patch_xhtml(xhtml, patches)
+
+ assert 'ac:image' in result
+ assert 'ri:attachment' in result
+ assert 'Changed text' in result
+ assert 'Original text' not in result
From 52600f3c8da497d78d6221b0ba9e1990acf1ac96 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:44:52 +0900
Subject: [PATCH 08/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20=EC=84=A4?=
=?UTF-8?q?=EA=B3=84=20=EB=AC=B8=EC=84=9C=20=EC=83=81=ED=83=9C=20=EA=B0=B1?=
=?UTF-8?q?=EC=8B=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
...3-13-reverse-sync-reconstruction-design.md | 25 ++++++-------------
1 file changed, 8 insertions(+), 17 deletions(-)
diff --git a/confluence-mdx/docs/plans/2026-03-13-reverse-sync-reconstruction-design.md b/confluence-mdx/docs/plans/2026-03-13-reverse-sync-reconstruction-design.md
index 42c0d3306..ecc59cec8 100644
--- a/confluence-mdx/docs/plans/2026-03-13-reverse-sync-reconstruction-design.md
+++ b/confluence-mdx/docs/plans/2026-03-13-reverse-sync-reconstruction-design.md
@@ -387,24 +387,15 @@ PR #913 시점에 제안된 방향 중, 2026-03-15 기준 `main`에서도 그대
### Phase 3. inline-anchor 및 list 재구성
-상태: 미완료
-
-구현 항목:
-
-- paragraph/list item anchor metadata builder
-- old/new plain-text offset mapping helper
-- raw anchor DOM insertion helper
-- nested list tree 기반 reconstruction
-
-우선 대상 fixture:
-
-- `tests/testcases` 내 list/image 혼합 케이스
-- `tests/reverse-sync/544376004`
+상태: 완료, `main` 반영 예정
-게이트:
-
-- inline image가 있는 paragraph/list item 재구성 green
-- duplicate hash 후보에서도 identity가 안정적으로 동작
+완료 기준:
+- paragraph anchor metadata builder 구현 (`sidecar.py`)
+- anchor offset mapping helper 구현 (`reconstructors.py`)
+- raw anchor DOM insertion helper 구현 (`reconstructors.py`)
+- inline-anchor paragraph reconstruction pipeline 연동 (`patch_builder.py`)
+- golden test 확장: 10개 inline-anchor 케이스 모두 green
+- 파서 불일치 수정 (test에서 `mdx_to_storage.parser` 사용)
### Phase 4. container 재구성
From 9598181c453ca07cd003cfbb261d25bbb9433bdb Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 15:47:27 +0900
Subject: [PATCH 09/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20patch=5Fbui?=
=?UTF-8?q?lder=20=E2=80=94=20=EC=A4=91=EB=B3=B5=20import=20=EC=A0=9C?=
=?UTF-8?q?=EA=B1=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Sonnet 4.6
---
confluence-mdx/bin/reverse_sync/patch_builder.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py
index 61ef99d0d..360945b18 100644
--- a/confluence-mdx/bin/reverse_sync/patch_builder.py
+++ b/confluence-mdx/bin/reverse_sync/patch_builder.py
@@ -394,7 +394,6 @@ def _mark_used(block_id: str, m: BlockMapping):
)
block_lost = (mapping_lost_info or {}).get(mapping.block_id, {})
if block_lost:
- from reverse_sync.lost_info_patcher import apply_lost_info
reconstructed = apply_lost_info(reconstructed, block_lost)
patches.append({
'action': 'replace_fragment',
From 58753b079b14c581e681b9bea1e026559b9603b0 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 18:08:23 +0900
Subject: [PATCH 10/13] =?UTF-8?q?confluence-mdx:=20patch=5Fbuilder=20?=
=?UTF-8?q?=E2=80=94=20sidecar=20block=20identity=20fallback=20=EC=B6=94?=
=?UTF-8?q?=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
xpath 조회 후 hash+line range 검증, 실패 시 find_sidecar_block_by_identity로
재탐색합니다. cross-type 오매칭 방지를 위해 xpath 태그 타입(p, ul, table 등)이
일치하는 경우에만 identity match를 반환합니다.
Co-Authored-By: Claude Sonnet 4.6
---
.../bin/reverse_sync/patch_builder.py | 57 ++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py
index 360945b18..8e786a2ef 100644
--- a/confluence-mdx/bin/reverse_sync/patch_builder.py
+++ b/confluence-mdx/bin/reverse_sync/patch_builder.py
@@ -14,6 +14,8 @@
RoundtripSidecar,
SidecarBlock,
find_mapping_by_sidecar,
+ find_sidecar_block_by_identity,
+ sha256_text,
SidecarEntry,
)
from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings
@@ -105,6 +107,57 @@ def _build_replace_fragment_patch(
}
+def _find_roundtrip_sidecar_block(
+ change: BlockChange,
+ mapping: Optional[BlockMapping],
+ roundtrip_sidecar: Optional[RoundtripSidecar],
+ xpath_to_sidecar_block: Dict[str, SidecarBlock],
+) -> Optional[SidecarBlock]:
+ """xpath → identity hash 순으로 roundtrip sidecar block을 탐색한다.
+
+ 1. xpath로 빠른 조회
+ 2. mdx_content_hash + mdx_line_range로 검증 → 일치하면 확정 반환
+ 3. 검증 실패 시 find_sidecar_block_by_identity로 더 정확한 블록 탐색
+ 4. identity도 없으면 xpath 결과를 fallback으로 반환
+ """
+ if roundtrip_sidecar is None:
+ return None
+
+ identity_block = change.old_block or change.new_block
+
+ # xpath 조회
+ xpath_match: Optional[SidecarBlock] = None
+ if mapping is not None:
+ xpath_match = xpath_to_sidecar_block.get(mapping.xhtml_xpath)
+
+ # hash + line range 검증 → 확정 일치
+ if xpath_match is not None and identity_block is not None:
+ expected_hash = sha256_text(identity_block.content) if identity_block.content else ""
+ expected_range = (identity_block.line_start, identity_block.line_end)
+ if (
+ xpath_match.mdx_content_hash == expected_hash
+ and tuple(xpath_match.mdx_line_range) == expected_range
+ ):
+ return xpath_match
+
+ # identity fallback: mapping.yaml이 어긋난 경우 hash 기반으로 재탐색
+ # xpath 태그 타입(p, ul, ol, table 등)이 일치하는 경우에만 반환하여 cross-type 오매칭 방지
+ if identity_block is not None and identity_block.content:
+ identity_match = find_sidecar_block_by_identity(
+ roundtrip_sidecar.blocks,
+ sha256_text(identity_block.content),
+ (identity_block.line_start, identity_block.line_end),
+ )
+ if identity_match is not None:
+ mapping_tag = mapping.xhtml_xpath.split('[')[0] if mapping else ''
+ identity_tag = identity_match.xhtml_xpath.split('[')[0] if identity_match.xhtml_xpath else ''
+ if mapping_tag == identity_tag:
+ return identity_match
+
+ # xpath 결과를 마지막 fallback으로 반환 (hash 불일치라도 없는 것보다 나음)
+ return xpath_match
+
+
def _flush_containing_changes(
containing_changes: dict,
used_ids: 'set | None' = None,
@@ -351,7 +404,9 @@ def _mark_used(block_id: str, m: BlockMapping):
and collapse_ws(new_plain) == collapse_ws(mapping.xhtml_plain_text)):
continue
- sidecar_block = xpath_to_sidecar_block.get(mapping.xhtml_xpath)
+ sidecar_block = _find_roundtrip_sidecar_block(
+ change, mapping, roundtrip_sidecar, xpath_to_sidecar_block,
+ )
if _can_replace_table_fragment(change, mapping, roundtrip_sidecar):
patches.append(
_build_replace_fragment_patch(
From 278c53df9f1d2cc0ebf019e7dd0d9a17f8fadec6 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 18:10:53 +0900
Subject: [PATCH 11/13] =?UTF-8?q?confluence-mdx:=20reconstructors=20+=20pa?=
=?UTF-8?q?tch=5Fbuilder=20=EC=B6=94=EC=83=81=ED=99=94=20=EB=A0=88?=
=?UTF-8?q?=EC=9D=B4=EC=96=B4=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
sidecar_block_requires_reconstruction / reconstruct_fragment_with_sidecar를
reconstructors.py에 추가하고, _build_replace_fragment_patch에 sidecar_block
파라미터를 연동합니다. patch_builder의 인라인 Phase 3 블록을 제거합니다.
Co-Authored-By: Claude Sonnet 4.6
---
.../bin/reverse_sync/patch_builder.py | 52 +++++++------------
.../bin/reverse_sync/reconstructors.py | 40 ++++++++++++++
2 files changed, 60 insertions(+), 32 deletions(-)
diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py
index 8e786a2ef..79261545f 100644
--- a/confluence-mdx/bin/reverse_sync/patch_builder.py
+++ b/confluence-mdx/bin/reverse_sync/patch_builder.py
@@ -20,7 +20,10 @@
)
from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings
from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml
-from reverse_sync.reconstructors import reconstruct_inline_anchor_fragment
+from reverse_sync.reconstructors import (
+ sidecar_block_requires_reconstruction,
+ reconstruct_fragment_with_sidecar,
+)
from reverse_sync.list_patcher import (
build_list_item_patches,
)
@@ -93,10 +96,13 @@ def _emit_replacement_fragment(block: MdxBlock) -> str:
def _build_replace_fragment_patch(
mapping: BlockMapping,
new_block: MdxBlock,
+ sidecar_block: Optional[SidecarBlock] = None,
mapping_lost_info: Optional[dict] = None,
) -> Dict[str, str]:
"""whole-fragment replacement patch를 생성한다."""
new_element = _emit_replacement_fragment(new_block)
+ if sidecar_block_requires_reconstruction(sidecar_block):
+ new_element = reconstruct_fragment_with_sidecar(new_element, sidecar_block)
block_lost = (mapping_lost_info or {}).get(mapping.block_id, {})
if block_lost:
new_element = apply_lost_info(new_element, block_lost)
@@ -306,7 +312,7 @@ def _mark_used(block_id: str, m: BlockMapping):
_build_replace_fragment_patch(
mapping,
add_change.new_block,
- mapping_lost_info,
+ mapping_lost_info=mapping_lost_info,
)
)
_paired_indices.add(idx)
@@ -375,7 +381,7 @@ def _mark_used(block_id: str, m: BlockMapping):
_build_replace_fragment_patch(
mapping,
change.new_block,
- mapping_lost_info,
+ mapping_lost_info=mapping_lost_info,
)
)
else:
@@ -412,7 +418,7 @@ def _mark_used(block_id: str, m: BlockMapping):
_build_replace_fragment_patch(
mapping,
change.new_block,
- mapping_lost_info,
+ mapping_lost_info=mapping_lost_info,
)
)
continue
@@ -422,39 +428,21 @@ def _mark_used(block_id: str, m: BlockMapping):
_build_replace_fragment_patch(
mapping,
change.new_block,
- mapping_lost_info,
+ sidecar_block=sidecar_block,
+ mapping_lost_info=mapping_lost_info,
)
)
continue
- # Phase 3: sidecar anchor가 있는 paragraph → inline-anchor reconstruction
- # anchor entry에 offset/raw_xhtml이 없으면 text-transfer 경로로 폴백
- _anchors = (
- sidecar_block.reconstruction.get('anchors', [])
- if sidecar_block is not None and sidecar_block.reconstruction is not None
- else []
- )
- _valid_anchors = [
- a for a in _anchors if 'offset' in a and 'raw_xhtml' in a
- ]
- if (sidecar_block is not None
- and sidecar_block.reconstruction is not None
- and sidecar_block.reconstruction.get('kind') == 'paragraph'
- and _valid_anchors):
- new_element = _emit_replacement_fragment(change.new_block)
- reconstructed = reconstruct_inline_anchor_fragment(
- mapping.xhtml_text,
- _valid_anchors,
- new_element,
+ if sidecar_block_requires_reconstruction(sidecar_block):
+ patches.append(
+ _build_replace_fragment_patch(
+ mapping,
+ change.new_block,
+ sidecar_block=sidecar_block,
+ mapping_lost_info=mapping_lost_info,
+ )
)
- block_lost = (mapping_lost_info or {}).get(mapping.block_id, {})
- if block_lost:
- reconstructed = apply_lost_info(reconstructed, block_lost)
- patches.append({
- 'action': 'replace_fragment',
- 'xhtml_xpath': mapping.xhtml_xpath,
- 'new_element_xhtml': reconstructed,
- })
continue
# 재생성 시 소실되는 XHTML 요소 포함 시 텍스트 전이로 폴백
diff --git a/confluence-mdx/bin/reverse_sync/reconstructors.py b/confluence-mdx/bin/reverse_sync/reconstructors.py
index ac0af5aa9..fa54591dc 100644
--- a/confluence-mdx/bin/reverse_sync/reconstructors.py
+++ b/confluence-mdx/bin/reverse_sync/reconstructors.py
@@ -6,11 +6,15 @@
from __future__ import annotations
import difflib
+from typing import TYPE_CHECKING, Optional
from bs4 import BeautifulSoup, NavigableString, Tag
from reverse_sync.xhtml_normalizer import extract_plain_text
+if TYPE_CHECKING:
+ from reverse_sync.sidecar import SidecarBlock
+
def map_anchor_offset(old_plain: str, new_plain: str, old_offset: int) -> int:
"""old_plain에서의 anchor offset을 new_plain 기준 offset으로 변환한다.
@@ -122,6 +126,42 @@ def insert_anchor_at_offset(p_element: Tag, offset: int, anchor_xhtml: str) -> N
p_element.append(n.extract())
+def sidecar_block_requires_reconstruction(
+ sidecar_block: Optional['SidecarBlock'],
+) -> bool:
+ """sidecar block에 Phase 3 재구성이 필요한 metadata가 있으면 True를 반환한다.
+
+ offset + raw_xhtml이 모두 있는 유효한 anchor가 하나 이상 있어야 True를 반환한다.
+ """
+ if sidecar_block is None or sidecar_block.reconstruction is None:
+ return False
+ recon = sidecar_block.reconstruction
+ if recon.get('kind') == 'paragraph':
+ return any(
+ 'offset' in a and 'raw_xhtml' in a
+ for a in recon.get('anchors', [])
+ )
+ return False
+
+
+def reconstruct_fragment_with_sidecar(
+ new_fragment: str,
+ sidecar_block: Optional['SidecarBlock'],
+) -> str:
+ """new_fragment에 sidecar block의 anchor metadata를 재주입한다."""
+ if sidecar_block is None or sidecar_block.reconstruction is None:
+ return new_fragment
+ recon = sidecar_block.reconstruction
+ kind = recon.get('kind')
+ if kind == 'paragraph':
+ anchors = recon.get('anchors', [])
+ valid_anchors = [a for a in anchors if 'offset' in a and 'raw_xhtml' in a]
+ if valid_anchors:
+ old_plain = recon.get('old_plain_text', '')
+ return reconstruct_inline_anchor_fragment(old_plain, valid_anchors, new_fragment)
+ return new_fragment
+
+
def reconstruct_inline_anchor_fragment(
old_fragment: str,
anchors: list,
From c74937d9433a64997e843692b9e2c47ca6682f35 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 18:12:44 +0900
Subject: [PATCH 12/13] =?UTF-8?q?confluence-mdx:=20Phase=203=20list=20anch?=
=?UTF-8?q?or=20=EC=9E=AC=EA=B5=AC=EC=84=B1=20=EA=B5=AC=ED=98=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
sidecar.py에 _build_list_anchor_entries/_walk_list/_extract_anchors_from_p를
추가하여 list item anchor metadata를 기록합니다.
reconstructors.py에 _rebuild_list_fragment/_find_list_item_by_path를 추가하고
sidecar_block_requires_reconstruction/reconstruct_fragment_with_sidecar에
list 분기를 연동합니다.
patch_builder.py list strategy 경로에 sidecar reconstruction을 연결합니다.
Co-Authored-By: Claude Sonnet 4.6
---
.../bin/reverse_sync/patch_builder.py | 16 ++++++
.../bin/reverse_sync/reconstructors.py | 56 ++++++++++++++++++
confluence-mdx/bin/reverse_sync/sidecar.py | 57 ++++++++++++++++++-
...test_reverse_sync_reconstruct_paragraph.py | 57 +++++++++++++++++++
4 files changed, 185 insertions(+), 1 deletion(-)
diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py
index 79261545f..37ffceccf 100644
--- a/confluence-mdx/bin/reverse_sync/patch_builder.py
+++ b/confluence-mdx/bin/reverse_sync/patch_builder.py
@@ -367,6 +367,22 @@ def _mark_used(block_id: str, m: BlockMapping):
continue
if strategy == 'list':
+ list_sidecar = _find_roundtrip_sidecar_block(
+ change, mapping, roundtrip_sidecar, xpath_to_sidecar_block,
+ )
+ if (mapping is not None
+ and not _contains_preserved_anchor_markup(mapping.xhtml_text)
+ and sidecar_block_requires_reconstruction(list_sidecar)):
+ _mark_used(mapping.block_id, mapping)
+ patches.append(
+ _build_replace_fragment_patch(
+ mapping,
+ change.new_block,
+ sidecar_block=list_sidecar,
+ mapping_lost_info=mapping_lost_info,
+ )
+ )
+ continue
patches.extend(
build_list_item_patches(
change, mappings, used_ids,
diff --git a/confluence-mdx/bin/reverse_sync/reconstructors.py b/confluence-mdx/bin/reverse_sync/reconstructors.py
index fa54591dc..d24ed8a49 100644
--- a/confluence-mdx/bin/reverse_sync/reconstructors.py
+++ b/confluence-mdx/bin/reverse_sync/reconstructors.py
@@ -126,6 +126,55 @@ def insert_anchor_at_offset(p_element: Tag, offset: int, anchor_xhtml: str) -> N
p_element.append(n.extract())
+def _find_list_item_by_path(root: Tag, path: list) -> Optional[Tag]:
+ """path 인덱스 경로를 따라 li 요소를 탐색한다."""
+ current_list: Optional[Tag] = root
+ current_li: Optional[Tag] = None
+ for index in path:
+ if current_list is None:
+ return None
+ items = [c for c in current_list.children if isinstance(c, Tag) and c.name == 'li']
+ if index < 0 or index >= len(items):
+ return None
+ current_li = items[index]
+ current_list = next(
+ (c for c in current_li.children if isinstance(c, Tag) and c.name in ('ul', 'ol')),
+ None,
+ )
+ return current_li
+
+
+def _find_direct_list_item_paragraph(li: Tag) -> Tag:
+ """li의 직접 자식 p 요소를 반환한다. 없으면 li 자체를 반환."""
+ for child in li.children:
+ if isinstance(child, Tag) and child.name == 'p':
+ return child
+ return li
+
+
+def _rebuild_list_fragment(new_fragment: str, recon: dict) -> str:
+ """list fragment에 sidecar anchor entries를 경로 기반으로 재주입한다."""
+ soup = BeautifulSoup(new_fragment, 'html.parser')
+ root = soup.find(['ul', 'ol'])
+ if root is None:
+ return new_fragment
+
+ old_plain = recon.get('old_plain_text', '')
+ for entry in recon.get('items', []):
+ if not entry.get('raw_xhtml') or 'offset' not in entry:
+ continue
+ path = entry.get('path', [])
+ li = _find_list_item_by_path(root, path)
+ if li is None:
+ continue
+ p = _find_direct_list_item_paragraph(li)
+ new_p_plain = extract_plain_text(str(p))
+ new_offset = map_anchor_offset(old_plain, new_p_plain, entry['offset'])
+ insert_anchor_at_offset(p, new_offset, entry['raw_xhtml'])
+
+ return str(soup)
+
+
def sidecar_block_requires_reconstruction(
sidecar_block: Optional['SidecarBlock'],
) -> bool:
@@ -141,6 +190,11 @@ def sidecar_block_requires_reconstruction(
'offset' in a and 'raw_xhtml' in a
for a in recon.get('anchors', [])
)
+ if recon.get('kind') == 'list':
+ return any(
+ 'offset' in item and 'raw_xhtml' in item
+ for item in recon.get('items', [])
+ )
return False
@@ -159,6 +213,8 @@ def reconstruct_fragment_with_sidecar(
if valid_anchors:
old_plain = recon.get('old_plain_text', '')
return reconstruct_inline_anchor_fragment(old_plain, valid_anchors, new_fragment)
+ if kind == 'list':
+ return _rebuild_list_fragment(new_fragment, recon)
return new_fragment
diff --git a/confluence-mdx/bin/reverse_sync/sidecar.py b/confluence-mdx/bin/reverse_sync/sidecar.py
index 366148b50..afa21d378 100644
--- a/confluence-mdx/bin/reverse_sync/sidecar.py
+++ b/confluence-mdx/bin/reverse_sync/sidecar.py
@@ -298,6 +298,61 @@ def _build_anchor_entries(fragment: str) -> list:
return anchors
+def _extract_anchors_from_p(p_el) -> list:
+ """p 요소에서 ac:image anchor entry (offset, raw_xhtml) 목록을 추출한다."""
+ from bs4 import NavigableString, Tag
+ anchors = []
+ offset = 0
+ for child in p_el.children:
+ if isinstance(child, NavigableString):
+ offset += len(str(child))
+ elif isinstance(child, Tag):
+ if child.name == 'ac:image':
+ anchors.append({
+ 'kind': 'image',
+ 'offset': offset,
+ 'raw_xhtml': str(child),
+ })
+ else:
+ offset += len(extract_plain_text(str(child)))
+ return anchors
+
+
+def _walk_list(list_el, path: list, entries: list) -> None:
+ """list 요소를 재귀 순회하며 anchor entry를 수집한다."""
+ from bs4 import Tag
+ items = [c for c in list_el.children if isinstance(c, Tag) and c.name == 'li']
+ for idx, li in enumerate(items):
+ current_path = path + [idx]
+ for child in li.children:
+ if not isinstance(child, Tag):
+ continue
+ if child.name == 'p':
+ for a in _extract_anchors_from_p(child):
+ entries.append({**a, 'path': current_path})
+ elif child.name in ('ul', 'ol'):
+ _walk_list(child, current_path, entries)
+
+
+def _build_list_anchor_entries(fragment: str) -> list:
+ """list fragment 내 li > p > ac:image를 path 기반 anchor entry로 추출한다.
+
+ 각 entry:
+ kind: "image"
+ path: li 인덱스 경로 (중첩 지원, e.g. [0, 1])
+ offset: p 내 plain text 기준 삽입 위치
+ raw_xhtml: ac:image 원본 XHTML 문자열
+ """
+ from bs4 import BeautifulSoup
+ soup = BeautifulSoup(fragment, 'html.parser')
+ root = soup.find(['ul', 'ol'])
+ if root is None:
+ return []
+ entries = []
+ _walk_list(root, [], entries)
+ return entries
+
+
def _build_reconstruction_metadata(
fragment: str,
mapping: BlockMapping | None,
@@ -314,7 +369,7 @@ def _build_reconstruction_metadata(
metadata["anchors"] = _build_anchor_entries(fragment)
elif mapping.type == "list":
metadata["ordered"] = mapping.xhtml_xpath.startswith("ol[")
- metadata["items"] = []
+ metadata["items"] = _build_list_anchor_entries(fragment)
elif mapping.children:
child_plain_texts = [
id_to_mapping[child_id].xhtml_plain_text.strip()
diff --git a/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py b/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
index c482472a7..5809ff37c 100644
--- a/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
+++ b/confluence-mdx/tests/test_reverse_sync_reconstruct_paragraph.py
@@ -209,3 +209,60 @@ def test_changed_paragraph_with_image_preserves_image(self):
assert 'ri:attachment' in result
assert 'Changed text' in result
assert 'Original text' not in result
+
+
+class TestBuildListAnchorEntries:
+ def test_list_with_inline_image(self):
+ from reverse_sync.sidecar import _build_list_anchor_entries
+ fragment = (
+ ''
+ 'item '
+ ''
+ ' text
'
+ '
'
+ )
+ entries = _build_list_anchor_entries(fragment)
+ assert len(entries) == 1
+ assert entries[0]['path'] == [0]
+ assert entries[0]['offset'] == len('item ')
+ assert 'a.png' in entries[0]['raw_xhtml']
+
+ def test_nested_list_with_image(self):
+ from reverse_sync.sidecar import _build_list_anchor_entries
+ fragment = (
+ ''
+ )
+ entries = _build_list_anchor_entries(fragment)
+ assert len(entries) == 1
+ assert entries[0]['path'] == [0, 0]
+ assert entries[0]['offset'] == 0
+
+ def test_list_without_images_returns_empty(self):
+ from reverse_sync.sidecar import _build_list_anchor_entries
+ fragment = ''
+ entries = _build_list_anchor_entries(fragment)
+ assert entries == []
+
+ def test_multiple_items_with_images(self):
+ from reverse_sync.sidecar import _build_list_anchor_entries
+ fragment = (
+ ''
+ 'first '
+ ''
+ '
'
+ 'second
'
+ ''
+ ''
+ ' after
'
+ '
'
+ )
+ entries = _build_list_anchor_entries(fragment)
+ assert len(entries) == 2
+ assert entries[0]['path'] == [0]
+ assert entries[0]['offset'] == len('first ')
+ assert entries[1]['path'] == [2]
+ assert entries[1]['offset'] == 0
From ea886f7937732e07b07231f9ff329a6f17d085c6 Mon Sep 17 00:00:00 2001
From: JK
Date: Sun, 15 Mar 2026 18:13:37 +0900
Subject: [PATCH 13/13] =?UTF-8?q?confluence-mdx:=20reconstructors=20?=
=?UTF-8?q?=E2=80=94=20map=5Fanchor=5Foffset=20affinity=20=ED=8C=8C?=
=?UTF-8?q?=EB=9D=BC=EB=AF=B8=ED=84=B0=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
경계(i1 == old_offset)에서 삽입된 텍스트의 포함 여부를 affinity='before'/'after'로
제어합니다. 기본값 'before'는 기존 동작을 유지합니다.
Co-Authored-By: Claude Sonnet 4.6
---
confluence-mdx/bin/reverse_sync/reconstructors.py | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/confluence-mdx/bin/reverse_sync/reconstructors.py b/confluence-mdx/bin/reverse_sync/reconstructors.py
index d24ed8a49..fe2f720ba 100644
--- a/confluence-mdx/bin/reverse_sync/reconstructors.py
+++ b/confluence-mdx/bin/reverse_sync/reconstructors.py
@@ -16,7 +16,12 @@
from reverse_sync.sidecar import SidecarBlock
-def map_anchor_offset(old_plain: str, new_plain: str, old_offset: int) -> int:
+def map_anchor_offset(
+ old_plain: str,
+ new_plain: str,
+ old_offset: int,
+ affinity: str = 'before',
+) -> int:
"""old_plain에서의 anchor offset을 new_plain 기준 offset으로 변환한다.
difflib SequenceMatcher opcode를 사용해 old 좌표계를 new 좌표계로 매핑한다.
@@ -25,7 +30,7 @@ def map_anchor_offset(old_plain: str, new_plain: str, old_offset: int) -> int:
anchor 앞쪽 텍스트에 적용된 변경만 offset에 반영한다:
- equal: 그대로 유지
- replace: new 길이로 비례 매핑
- - insert (i1==i2 <= old_offset): new 텍스트 길이를 더함
+ - insert at boundary: affinity='before'이면 삽입 포함, 'after'이면 제외
- delete: 삭제된 길이만큼 뺌
"""
matcher = difflib.SequenceMatcher(None, old_plain, new_plain, autojunk=False)
@@ -57,7 +62,10 @@ def map_anchor_offset(old_plain: str, new_plain: str, old_offset: int) -> int:
consumed_old += old_take
elif tag == 'insert':
- if i1 <= old_offset:
+ # 경계(i1 == old_offset)에서 affinity로 배치 방향 결정:
+ # 'before': anchor가 삽입된 텍스트 뒤에 위치 (삽입 포함)
+ # 'after': anchor가 삽입된 텍스트 앞에 위치 (삽입 제외)
+ if i1 < old_offset or (i1 == old_offset and affinity == 'before'):
new_offset += j2 - j1
if consumed_old < old_offset: