Skip to content

Commit

Permalink
src/__init__.py tests/: address #4004 - avoid segv when trying to get…
Browse files Browse the repository at this point in the history
… page from annot.

The fix requires MuPDF >= 1.25, specifically this MuPDF commit:

    When annotation is deleted from page, remove link from annotation to page.
  • Loading branch information
julian-smith-artifex-com committed Oct 31, 2024
1 parent f6a853e commit b04e8b2
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 13 deletions.
42 changes: 29 additions & 13 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,21 @@ def _as_pdf_page(page, required=True):
assert 0, f'Unrecognised {type(page)=}'


def _pdf_annot_page(annot):
'''
Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
is not bound to a page instead of returning a mupdf.PdfPage with
`.m_internal=None`.

[Some other MuPDF functions such as pdf_update_annot()` already raise a
similar exception if a pdf_annot's .page field is null.]
'''
page = mupdf.pdf_annot_page(annot)
if not page.m_internal:
raise RuntimeError('Annot is not bound to a page')
return page


# Fixme: we don't support JM_MEMORY=1.
JM_MEMORY = 0

Expand Down Expand Up @@ -599,7 +614,7 @@ def _setAP(self, buffer_, rect=0):
try:
annot = self.this
annot_obj = mupdf.pdf_annot_obj( annot)
page = mupdf.pdf_annot_page( annot)
page = _pdf_annot_page(annot)
apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
if not apobj.m_internal:
raise RuntimeError( MSG_BAD_APN)
Expand All @@ -619,7 +634,7 @@ def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotat
annot = self.this
assert annot.m_internal
annot_obj = mupdf.pdf_annot_obj( annot)
page = mupdf.pdf_annot_page( annot)
page = _pdf_annot_page(annot)
pdf = page.doc()
type_ = mupdf.pdf_annot_type( annot)
nfcol, fcol = JM_color_FromSequence(fill_color)
Expand Down Expand Up @@ -830,7 +845,7 @@ def delete_responses(self):
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
page = mupdf.pdf_annot_page(annot)
page = _pdf_annot_page(annot)
while 1:
irt_annot = JM_find_annot_irt(annot)
if not irt_annot.m_internal:
Expand Down Expand Up @@ -942,7 +957,7 @@ def get_parent(self):
try:
ret = getattr( self, 'parent')
except AttributeError:
page = mupdf.pdf_annot_page(self.this)
page = _pdf_annot_page(self.this)
assert isinstance( page, mupdf.PdfPage)
document = Document( page.doc()) if page.m_internal else None
ret = Page(page, document)
Expand Down Expand Up @@ -1374,7 +1389,7 @@ def set_irt_xref(self, xref):
'''
annot = self.this
annot_obj = mupdf.pdf_annot_obj( annot)
page = mupdf.pdf_annot_page( annot)
page = _pdf_annot_page(annot)
if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
raise ValueError( MSG_BAD_XREF)
irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
Expand Down Expand Up @@ -1429,7 +1444,7 @@ def set_opacity(self, opacity):
return
mupdf.pdf_set_annot_opacity(annot, opacity)
if opacity < 1.0:
page = mupdf.pdf_annot_page(annot)
page = _pdf_annot_page(annot)
page.transparency = 1

def set_open(self, is_open):
Expand All @@ -1444,7 +1459,7 @@ def set_popup(self, rect):
'''
CheckParent(self)
annot = self.this
pdfpage = mupdf.pdf_annot_page( annot)
pdfpage = _pdf_annot_page(annot)
rot = JM_rotate_page_matrix(pdfpage)
r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
mupdf.pdf_set_annot_popup(annot, r)
Expand All @@ -1454,7 +1469,7 @@ def set_rect(self, rect):
CheckParent(self)
annot = self.this

pdfpage = mupdf.pdf_annot_page(annot)
pdfpage = _pdf_annot_page(annot)
rot = JM_rotate_page_matrix(pdfpage)
r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
Expand Down Expand Up @@ -1850,7 +1865,7 @@ def vertices(self):
annot = self.this
assert isinstance(annot, mupdf.PdfAnnot)
annot_obj = mupdf.pdf_annot_obj(annot)
page = mupdf.pdf_annot_page(annot)
page = _pdf_annot_page(annot)
page_ctm = mupdf.FzMatrix() # page transformation matrix
dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
mupdf.pdf_page_transform(page, dummy, page_ctm)
Expand Down Expand Up @@ -14470,7 +14485,7 @@ def JM_add_annot_id(annot, stem):
Append a number to 'stem' such that the result is a unique name.
'''
assert isinstance(annot, mupdf.PdfAnnot)
page = mupdf.pdf_annot_page( annot)
page = _pdf_annot_page(annot)
annot_obj = mupdf.pdf_annot_obj( annot)
names = JM_get_annot_id_list(page)
i = 0
Expand Down Expand Up @@ -15315,7 +15330,7 @@ def JM_find_annot_irt(annot):
annot_obj = mupdf.pdf_annot_obj(annot)
found = 0
# loop thru MuPDF's internal annots array
page = mupdf.pdf_annot_page(annot)
page = _pdf_annot_page(annot)
irt_annot = mupdf.pdf_first_annot(page)
while 1:
assert isinstance(irt_annot, mupdf.PdfAnnot)
Expand Down Expand Up @@ -15781,7 +15796,7 @@ def JM_get_widget_properties(annot, Widget):
#log( '{type(annot)=}')
annot_obj = mupdf.pdf_annot_obj(annot.this)
#log( 'Have called mupdf.pdf_annot_obj()')
page = mupdf.pdf_annot_page(annot.this)
page = _pdf_annot_page(annot.this)
pdf = page.doc()
tw = annot

Expand Down Expand Up @@ -17596,7 +17611,8 @@ def JM_set_widget_properties(annot, Widget):
if isinstance( annot, Annot):
annot = annot.this
assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
page = mupdf.pdf_annot_page(annot)
page = _pdf_annot_page(annot)
assert page.m_internal, 'Annot is not bound to a page'
annot_obj = mupdf.pdf_annot_obj(annot)
pdf = page.doc()
def GETATTR(name):
Expand Down
Binary file added tests/resources/test_4004.pdf
Binary file not shown.
19 changes: 19 additions & 0 deletions tests/test_annots.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,22 @@ def test_3758():
page.apply_redactions()
wt = pymupdf.TOOLS.mupdf_warnings()
assert wt


def test_parent():
"""Test invalidating parent on page re-assignment."""
doc = pymupdf.open()
page = doc.new_page()
a = page.add_highlight_annot(page.rect) # insert annotation on page 0
page = doc.new_page() # make a new page, should orphanate annotation
try:
print(a) # should raise
except Exception as e:
if pymupdf.mupdf_version_tuple >= (1, 25):
assert isinstance(e, pymupdf.mupdf.FzErrorArgument)
assert str(e) == 'code=4: annotation not bound to any page'
else:
assert isinstance(e, ReferenceError)
assert str(e) == 'weakly-referenced object no longer exists'
else:
assert 0, f'Failed to get expected exception.'
46 changes: 46 additions & 0 deletions tests/test_widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,49 @@ def test_3950():
'{{ policy_period_end_date }}',
'{{ insurance_line }}',
]


def test_4004():
if pymupdf.mupdf_version_tuple < (1, 25):
print(f'test_4004(): not running because requires MuPDF >= 1.25.')
return

import collections

def get_widgets_by_name(doc):
"""
Extracts and returns a dictionary of widgets indexed by their names.
"""
widgets_by_name = collections.defaultdict(list)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
for field in page.widgets():
widgets_by_name[field.field_name].append({
"page_num": page_num,
"widget": field
})
return widgets_by_name

# Open document and get widgets
path = os.path.normpath(f'{__file__}/../../tests/resources/test_4004.pdf')
doc = pymupdf.open(path)
widgets_by_name = get_widgets_by_name(doc)

# Print widget information
for name, widgets in widgets_by_name.items():
print(f"Widget Name: {name}")
for entry in widgets:
widget = entry["widget"]
page_num = entry["page_num"]
print(f" Page: {page_num + 1}, Type: {widget.field_type}, Value: {widget.field_value}, Rect: {widget.rect}")

# Attempt to update field value
w = widgets_by_name["Text1"][0]
field = w['widget']
field.value = "1234567890"
try:
field.update()
except Exception as e:
assert str(e) == 'Annot is not bound to a page'

doc.close()

0 comments on commit b04e8b2

Please sign in to comment.