pymupdf · JorjMcKie · Feb 14, 2026
diff --git a/src/__init__.py b/src/__init__.py
@@ -13700,13 +13700,15 @@ def n2(self):
             return self.n
         return mupdf.fz_pixmap_components(self.this)
 
-    def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
+    def pdfocr_save(self, filename, compress=1, language=None, tessdata=None, options=""):
         '''
         Save pixmap as an OCR-ed PDF page.
         '''
         tessdata = get_tessdata(tessdata)
         opts = mupdf.FzPdfocrOptions()
         opts.compress = compress
+        if options:
+            opts.options = options
         if language:
             opts.language_set2( language)
         if tessdata:
@@ -13721,7 +13723,7 @@ def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
             finally:
                 out.fz_close_output()   # Avoid MuPDF warning.
 
-    def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
+    def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None, options=""):
         """Save pixmap as an OCR-ed PDF page.
 
         Args:
@@ -13731,6 +13733,8 @@ def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
             tessdata: (str) folder name of Tesseract's language support. If None
                     we use environment variable TESSDATA_PREFIX or search for
                     Tesseract installation.
+            options: (str) any Tesseract comma-separated options that can be
+                     given using Tesseract's "-c" CLI parameter.
         Notes:
             On failure, make sure Tesseract is installed and you have set
             <tessdata> or environment variable "TESSDATA_PREFIX" to the folder
@@ -13739,7 +13743,7 @@ def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
         tessdata = get_tessdata(tessdata)
         from io import BytesIO
         bio = BytesIO()
-        self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
+        self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata, options=options)
         return bio.getvalue()
 
     def pil_image(self):

diff --git a/src/utils.py b/src/utils.py
@@ -321,6 +321,7 @@ def get_textpage_ocr(
     dpi: int = 72,
     full: bool = False,
     tessdata: str = None,
+    options="",
 ) -> pymupdf.TextPage:
     """Create a Textpage from combined results of normal and OCR text parsing.
 
@@ -329,6 +330,8 @@ def get_textpage_ocr(
         language: (str) specify expected language(s). Default is "eng" (English).
         dpi: (int) resolution in dpi, default 72.
         full: (bool) whether to OCR the full page image, or only its images (default)
+        options: (str) any Tesseract comma-separated options that can be given
+                 using Tesseract's "-c" CLI parameter.
     """
     pymupdf.CheckParent(page)
     tessdata = pymupdf.get_tessdata(tessdata)
@@ -343,6 +346,7 @@ def full_ocr(page, dpi, language, flags):
                     compress=False,
                     language=language,
                     tessdata=tessdata,
+                    options=options,
                     ),
                 )
         ocr_page = ocr_pdf.load_page(0)
@@ -376,7 +380,10 @@ def full_ocr(page, dpi, language, flags):
                 pix = pymupdf.Pixmap(pix, 0)
             imgdoc = pymupdf.Document(
                     "pdf",
-                    pix.pdfocr_tobytes(language=language, tessdata=tessdata),
+                    pix.pdfocr_tobytes(
+                        language=language,
+                        tessdata=tessdata,
+                        options=options),
                     )  # pdf with OCRed page
             imgpage = imgdoc.load_page(0)  # read image as a page
             pix = None