diff --git a/src/__init__.py b/src/__init__.py index b06babea0..1256fc450 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -257,6 +257,8 @@ def log( text='', caller=1): line = frame_record.lineno function = frame_record.function text = f'{filename}:{line}:{function}(): {text}' + del stack + # Leaving to be garbage collected, appears to change behaviour. if _g_log_items_active: _g_log_items.append(text) if _g_out_log: @@ -3241,13 +3243,25 @@ def cre_annot(lnk, xref_dst, pno_src, ctm): if len(links) == 0: # no links there page_src = None continue - ctm = ~page_src.transformation_matrix # calc page transformation matrix page_dst = doc1[pno_dst[i]] # load destination page + + # In our call above to page_src.get_links(), we end up in + # fz_load_links(). This extracts the raw rects (encoded as strings + # such as `/Rect[10 782 40 822]`) and multiplies them by page_ctm + # from pdf_page_transform(). + # + # We want to recreate the original raw rects, so we need to + # multiply by inverse of page_ctm. This fixes #4958. + ctm = mupdf.FzMatrix() + page_src_pdf_document = _as_pdf_page(page_src) + mupdf.pdf_page_transform(page_src_pdf_document, mupdf.FzRect(0), ctm) + ictm = Matrix(mupdf.fz_invert_matrix(ctm)) + link_tab = [] # store all link definitions here for l in links: if l["kind"] == LINK_GOTO and (l["page"] not in pno_src): continue # GOTO link target not in copied pages - annot_text = cre_annot(l, xref_dst, pno_src, ctm) + annot_text = cre_annot(l, xref_dst, pno_src, ictm) if annot_text: link_tab.append(annot_text) if link_tab != []: diff --git a/tests/resources/test_4965.pdf b/tests/resources/test_4965.pdf new file mode 100644 index 000000000..77556e80e Binary files /dev/null and b/tests/resources/test_4965.pdf differ diff --git a/tests/test_insertpdf.py b/tests/test_insertpdf.py index 9689beb8c..99b2aa0b1 100644 --- a/tests/test_insertpdf.py +++ b/tests/test_insertpdf.py @@ -332,4 +332,35 @@ def test_4571(): else: # Incorrect. assert b'<>' in content - + + +def test_4958(): + print() + with pymupdf.Document() as document_orig, pymupdf.Document() as document_copy: + document_orig.new_page() + document_orig[0].set_rotation(90) + document_orig[0].insert_link( + { + 'kind': 2, + 'from': pymupdf.Rect(10, 20, 40, 60), + 'uri': 'https://example.org' + } + ) + + document_copy.insert_pdf(document_orig, links=True) + + path_orig = os.path.normpath(f'{__file__}/../../tests/test_4958_out_orig.pdf') + path_copy = os.path.normpath(f'{__file__}/../../tests/test_4958_out_copy.pdf') + + document_orig.save(path_orig) + document_copy.save(path_copy) + + print(f'Have created {path_orig=}') + print(f'Have created {path_copy=}') + + from_rects_orig = [l['from'] for l in document_orig[0].get_links()] + from_rects_copy = [l['from'] for l in document_copy[0].get_links()] + + print(f'test_4958(): orig: {from_rects_orig}') + print(f'test_4958(): copy: {from_rects_copy}') + assert from_rects_orig == from_rects_copy diff --git a/tests/test_widgets.py b/tests/test_widgets.py index 9eafd0246..2ba7a0afb 100644 --- a/tests/test_widgets.py +++ b/tests/test_widgets.py @@ -430,3 +430,20 @@ def test_4055(): # Round 5: final check: setting to "Yes" also does work for w in page.widgets(types=[2]): assert w.field_value == w.on_state() + + +def test_4965(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4965.pdf') + with pymupdf.open(path) as document: + for page in document: + print(f'test_4965(): {page.number=}') + # Iterate over all form fields (widgets) on the page + for widget_i, field in enumerate(page.widgets()): + # Access field properties + name = field.field_name # The internal name of the field + value = field.field_value # The data currently in the field + f_type = field.field_type # Integer representing the field type + print(f' {widget_i=}') + print(f' {name=}') + print(f' {value=}') + print(f' {f_type=}')