🔥 refactor eynollah ocr

.
2026-03-13 02:31:56 +01:00 · 2025-11-28 14:54:43 +01:00 · 2025-11-28 14:54:43 +01:00 · b161e33854
commit b161e33854
parent 30f9c695dc
5 changed files with 769 additions and 865 deletions
--- a/src/eynollah/cli/cli_ocr.py
+++ b/src/eynollah/cli/cli_ocr.py
@ -88,7 +88,6 @@ def ocr_cli(
    tr_ocr,
    do_not_mask_with_textline_contour,
    batch_size,
-    dataset_abbrevation,
    min_conf_value_of_textline_text,
 ):
    """
@ -101,7 +100,6 @@ def ocr_cli(
        tr_ocr=tr_ocr,
        do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
        batch_size=batch_size,
-        pref_of_dataset=dataset_abbrevation,
        min_conf_value_of_textline_text=min_conf_value_of_textline_text)
    eynollah_ocr.run(overwrite=overwrite,
                     dir_in=dir_in,
--- a/src/eynollah/eynollah_ocr.py
+++ b/src/eynollah/eynollah_ocr.py
@ -1,24 +1,22 @@
 # FIXME: fix all of those...
-# pyright: reportPossiblyUnboundVariable=false
-# pyright: reportOptionalMemberAccess=false
-# pyright: reportArgumentType=false
-# pyright: reportCallIssue=false
 # pyright: reportOptionalSubscript=false

 from logging import Logger, getLogger
-from typing import Optional
+from typing import List, Optional
 from pathlib import Path
 import os
 import gc
-import sys
 import math
-import time
+from dataclasses import dataclass

 import cv2
-import xml.etree.ElementTree as ET
-from PIL import Image, ImageDraw, ImageFont
+from cv2.typing import MatLike
+from xml.etree import ElementTree as ET
+from PIL import Image, ImageDraw
 import numpy as np
 from eynollah.model_zoo import EynollahModelZoo
+from eynollah.utils.font import get_font
+from eynollah.utils.xml import etree_namespace_for_element_tag
 try:
    import torch
 except ImportError:
@ -38,11 +36,13 @@ from .utils.utils_ocr import (
    rotate_image_with_padding,
 )

-# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
-if sys.version_info < (3, 10):
-    import importlib_resources
-else:
-    import importlib.resources as importlib_resources
+# TODO: refine typing
+@dataclass
+class EynollahOcrResult:
+    extracted_texts_merged: List
+    extracted_conf_value_merged: Optional[List]
+    cropped_lines_region_indexer: List
+    total_bb_coordinates:List

 class Eynollah_ocr:
    def __init__(
@ -76,6 +76,7 @@ class Eynollah_ocr:

    @property
    def device(self):
+        assert torch
        if torch.cuda.is_available():
            self.logger.info("Using GPU acceleration")
            return torch.device("cuda:0")
@ -83,59 +84,17 @@ class Eynollah_ocr:
            self.logger.info("Using CPU processing")
            return torch.device("cpu")

-    def run(self, overwrite: bool = False,
-            dir_in: Optional[str] = None,
-            # Prediction with RGB and binarized images for selected pages, should not be the default
-            dir_in_bin: Optional[str] = None,
-            image_filename: Optional[str] = None,
-            dir_xmls: Optional[str] = None,
-            dir_out_image_text: Optional[str] = None,
-            dir_out: Optional[str] = None,
-    ):
-        if dir_in:
-            ls_imgs = [os.path.join(dir_in, image_filename)
-                       for image_filename in filter(is_image_filename,
-                                                    os.listdir(dir_in))]
-        else:
-            assert image_filename
-            ls_imgs = [image_filename]
+    def run_trocr(
+        self,
+        *,
+        img: MatLike,
+        page_tree: ET.ElementTree,
+        page_ns,
+        tr_ocr_input_height_and_width,
+    ) -> EynollahOcrResult:
        
-        if self.tr_ocr:
-            tr_ocr_input_height_and_width = 384
-            for dir_img in ls_imgs:
-                file_name = Path(dir_img).stem
-                assert dir_xmls  # FIXME: check the logic
-                dir_xml = os.path.join(dir_xmls, file_name+'.xml')
-                assert dir_out # FIXME: check the logic
-                out_file_ocr = os.path.join(dir_out, file_name+'.xml')
-                
-                if os.path.exists(out_file_ocr):
-                    if overwrite:
-                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
-                    else:
-                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
-                        continue
-                    
-                img = cv2.imread(dir_img)
-                
-                if dir_out_image_text:
-                    out_image_with_text = os.path.join(dir_out_image_text, file_name+'.png')
-                    image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
-                    draw = ImageDraw.Draw(image_text)
        total_bb_coordinates = []

-                ##file_name = Path(dir_xmls).stem
-                tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
-                root1=tree1.getroot()
-                alltags=[elem.tag for elem in root1.iter()]
-                link=alltags[0].split('}')[0]+'}'
-
-                name_space = alltags[0].split('}')[0]
-                name_space = name_space.split('{')[1]
-
-                region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) 
-                        
-                    
            
        cropped_lines = []
        cropped_lines_region_indexer = []
@ -146,7 +105,7 @@ class Eynollah_ocr:
        indexer_text_region = 0
        indexer_b_s = 0
        
-                for nn in root1.iter(region_tags):
+        for nn in page_tree.getroot().iter(f'{{{page_ns}}}TextRegion'):
            for child_textregion in nn:
                if child_textregion.tag.endswith("TextLine"):
                    
@ -159,7 +118,6 @@ class Eynollah_ocr:
                                                            for x in p_h] )
                            x,y,w,h = cv2.boundingRect(textline_coords)
                            
-                                    if dir_out_image_text:
                            total_bb_coordinates.append([x,y,w,h])
                            
                            h2w_ratio = h/float(w)
@ -301,185 +259,37 @@ class Eynollah_ocr:
        extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
        #print(extracted_texts_merged, len(extracted_texts_merged))

-                unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
+        return EynollahOcrResult(
+            extracted_texts_merged=extracted_texts_merged,
+            extracted_conf_value_merged=None,
+            cropped_lines_region_indexer=cropped_lines_region_indexer,
+            total_bb_coordinates=total_bb_coordinates,
+        )
        
-                if dir_out_image_text:
+    def run_cnn(
+        self,
+        *,
+        img: MatLike,
+        img_bin: Optional[MatLike],
+        page_tree: ET.ElementTree,
+        page_ns,
+        image_width,
+        image_height,
+    ) -> EynollahOcrResult:
        
-                    #font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
-                    font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
-                    with importlib_resources.as_file(font) as font:
-                        font = ImageFont.truetype(font=font, size=40)
-                    
-                    for indexer_text, bb_ind in enumerate(total_bb_coordinates):
-                        
-                        
-                        x_bb = bb_ind[0]
-                        y_bb = bb_ind[1]
-                        w_bb = bb_ind[2]
-                        h_bb = bb_ind[3]
-                        
-                        font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
-                                                    font.path, w_bb, int(h_bb*0.4) )
-                        
-                        ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
-                        
-                        text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
-                        text_width = text_bbox[2] - text_bbox[0]
-                        text_height = text_bbox[3] - text_bbox[1]
-
-                        text_x = x_bb + (w_bb - text_width) // 2  # Center horizontally
-                        text_y = y_bb + (h_bb - text_height) // 2  # Center vertically
-
-                        # Draw the text
-                        draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
-                    image_text.save(out_image_with_text)
-
-                #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer')
-                #######text_by_textregion = []
-                #######for ind in unique_cropped_lines_region_indexer:
-                    #######ind = np.array(cropped_lines_region_indexer)==ind
-                    #######extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
-                    #######text_by_textregion.append(" ".join(extracted_texts_merged_un))
-                    
-                text_by_textregion = []
-                for ind in unique_cropped_lines_region_indexer:
-                    ind = np.array(cropped_lines_region_indexer) == ind
-                    extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
-                    if len(extracted_texts_merged_un)>1:
-                        text_by_textregion_ind = ""
-                        next_glue = ""
-                        for indt in range(len(extracted_texts_merged_un)):
-                            if (extracted_texts_merged_un[indt].endswith('⸗') or
-                                extracted_texts_merged_un[indt].endswith('-') or
-                                extracted_texts_merged_un[indt].endswith('¬')):
-                                text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
-                                next_glue = ""
-                            else:
-                                text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
-                                next_glue = " "
-                        text_by_textregion.append(text_by_textregion_ind)
-                    else:
-                        text_by_textregion.append(" ".join(extracted_texts_merged_un))
-                        
-                        
-                indexer = 0
-                indexer_textregion = 0
-                for nn in root1.iter(region_tags):
-                    #id_textregion = nn.attrib['id']
-                    #id_textregions.append(id_textregion)
-                    #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
-                    
-                    is_textregion_text = False
-                    for childtest in nn:
-                        if childtest.tag.endswith("TextEquiv"):
-                            is_textregion_text = True
-                    
-                    if not is_textregion_text:
-                        text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
-                        unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
-
-                    
-                    has_textline = False
-                    for child_textregion in nn:
-                        if child_textregion.tag.endswith("TextLine"):
-                            
-                            is_textline_text = False
-                            for childtest2 in child_textregion:
-                                if childtest2.tag.endswith("TextEquiv"):
-                                    is_textline_text = True
-                            
-                            
-                            if not is_textline_text:
-                                text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
-                                ##text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
-                                unicode_textline = ET.SubElement(text_subelement, 'Unicode')
-                                unicode_textline.text = extracted_texts_merged[indexer]
-                            else:
-                                for childtest3 in child_textregion:
-                                    if childtest3.tag.endswith("TextEquiv"):
-                                        for child_uc in childtest3:
-                                            if child_uc.tag.endswith("Unicode"):
-                                                ##childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
-                                                child_uc.text = extracted_texts_merged[indexer]
-                                    
-                            indexer = indexer + 1
-                            has_textline = True
-                    if has_textline:
-                        if is_textregion_text:
-                            for child4 in nn:
-                                if child4.tag.endswith("TextEquiv"):
-                                    for childtr_uc in child4:
-                                        if childtr_uc.tag.endswith("Unicode"):
-                                            childtr_uc.text = text_by_textregion[indexer_textregion]
-                        else:
-                            unicode_textregion.text = text_by_textregion[indexer_textregion]
-                        indexer_textregion = indexer_textregion + 1
-                        
-                ###sample_order  = [(id_to_order[tid], text)
-                ###                 for tid, text in zip(id_textregions, textregions_by_existing_ids)
-                ###                 if tid in id_to_order]
-                
-                ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
-                ##tot_page_text = ' '.join(ordered_texts_sample)
-                
-                ##for page_element in root1.iter(link+'Page'):
-                    ##text_page = ET.SubElement(page_element, 'TextEquiv')
-                    ##unicode_textpage = ET.SubElement(text_page, 'Unicode')
-                    ##unicode_textpage.text = tot_page_text
-                
-                ET.register_namespace("",name_space)
-                tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
-        else:
-            ###max_len = 280#512#280#512
-            ###padding_token = 1500#299#1500#299
-            image_width = 512#max_len * 4
-            image_height = 32
-
-
-            img_size=(image_width, image_height)
-            
-            for dir_img in ls_imgs:
-                file_name = Path(dir_img).stem
-                dir_xml = os.path.join(dir_xmls, file_name+'.xml')
-                out_file_ocr = os.path.join(dir_out, file_name+'.xml')
-                
-                if os.path.exists(out_file_ocr):
-                    if overwrite:
-                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
-                    else:
-                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
-                        continue
-                
-                img = cv2.imread(dir_img)
-                if dir_in_bin is not None:
-                    cropped_lines_bin = []
-                    img_bin = cv2.imread(os.path.join(dir_in_bin, file_name+'.png'))
-                
-                if dir_out_image_text:
-                    out_image_with_text = os.path.join(dir_out_image_text, file_name+'.png')
-                    image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
-                    draw = ImageDraw.Draw(image_text)
        total_bb_coordinates = []

-                tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
-                root1=tree1.getroot()
-                alltags=[elem.tag for elem in root1.iter()]
-                link=alltags[0].split('}')[0]+'}'
-
-                name_space = alltags[0].split('}')[0]
-                name_space = name_space.split('{')[1]
-
-                region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) 
-                    
        cropped_lines = []
+        img_crop_bin = None
+        imgs_bin = None
+        imgs_bin_ver_flipped = None
+        cropped_lines_bin = []
        cropped_lines_ver_index = []
        cropped_lines_region_indexer = []
        cropped_lines_meging_indexing = []
        
-                tinl = time.time()
        indexer_text_region = 0
-                indexer_textlines = 0
-                for nn in root1.iter(region_tags):
+        for nn in page_tree.getroot().iter(f'{{{page_ns}}}TextRegion'):
            try:
                type_textregion = nn.attrib['type']
            except:
@ -502,13 +312,12 @@ class Eynollah_ocr:
                            if type_textregion=='drop-capital':
                                angle_degrees = 0
                                
-                                    if dir_out_image_text:
                            total_bb_coordinates.append([x,y,w,h])
                            
                            w_scaled = w *  image_height/float(h)
                            
                            img_poly_on_img = np.copy(img)
-                                    if dir_in_bin is not None:
+                            if img_bin:
                                img_poly_on_img_bin = np.copy(img_bin)
                                img_crop_bin = img_poly_on_img_bin[y:y+h, x:x+w, :]
                            
@ -528,7 +337,7 @@ class Eynollah_ocr:
                                better_des_slope = get_orientation_moments(textline_coords)
                                
                                img_crop = rotate_image_with_padding(img_crop, better_des_slope)
-                                        if dir_in_bin is not None:
+                                if img_bin:
                                    img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope)
                                    
                                mask_poly = rotate_image_with_padding(mask_poly, better_des_slope)
@ -542,13 +351,13 @@ class Eynollah_ocr:
                                    
                                if not self.do_not_mask_with_textline_contour:
                                    img_crop[mask_poly==0] = 255
-                                        if dir_in_bin is not None:
+                                if img_bin:
                                    img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                    if not self.do_not_mask_with_textline_contour:
                                        img_crop_bin[mask_poly==0] = 255
                                
                                if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
-                                            if dir_in_bin is not None:
+                                    if img_bin:
                                        img_crop, img_crop_bin = \
                                            break_curved_line_into_small_pieces_and_then_merge(
                                                img_crop, mask_poly, img_crop_bin)
@ -561,14 +370,14 @@ class Eynollah_ocr:
                                better_des_slope = 0
                                if not self.do_not_mask_with_textline_contour:
                                    img_crop[mask_poly==0] = 255
-                                        if dir_in_bin is not None:
+                                if img_bin:
                                    if not self.do_not_mask_with_textline_contour:
                                        img_crop_bin[mask_poly==0] = 255
                                if type_textregion=='drop-capital':
                                    pass
                                else:
                                    if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
-                                                if dir_in_bin is not None:
+                                        if img_bin:
                                            img_crop, img_crop_bin = \
                                                break_curved_line_into_small_pieces_and_then_merge(
                                                    img_crop, mask_poly, img_crop_bin)
@ -587,13 +396,13 @@ class Eynollah_ocr:
                                    cropped_lines_ver_index.append(0)
                                    
                                cropped_lines_meging_indexing.append(0)
-                                        if dir_in_bin is not None:
+                                if img_bin:
                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
                                        img_crop_bin, image_height, image_width)
                                    cropped_lines_bin.append(img_fin)
                            else:
                                splited_images, splited_images_bin = return_textlines_split_if_needed(
-                                            img_crop, img_crop_bin if dir_in_bin is not None else None)
+                                    img_crop, img_crop_bin if img_bin else None)
                                if splited_images:
                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
                                        splited_images[0], image_height, image_width)
@ -616,7 +425,7 @@ class Eynollah_ocr:
                                    else:
                                        cropped_lines_ver_index.append(0)
                                    
-                                            if dir_in_bin is not None:
+                                    if img_bin:
                                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(
                                            splited_images_bin[0], image_height, image_width)
                                        cropped_lines_bin.append(img_fin)
@ -635,7 +444,7 @@ class Eynollah_ocr:
                                    else:
                                        cropped_lines_ver_index.append(0)
                                    
-                                            if dir_in_bin is not None:
+                                    if img_bin:
                                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(
                                            img_crop_bin, image_height, image_width)
                                        cropped_lines_bin.append(img_fin)
@ -648,6 +457,7 @@ class Eynollah_ocr:

        n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 

+        # FIXME: copy pasta
        for i in range(n_iterations):
            if i==(n_iterations-1):
                n_start = i*self.b_s
@ -667,7 +477,7 @@ class Eynollah_ocr:
                else:
                    imgs_ver_flipped = None
                
-                        if dir_in_bin is not None:
+                if img_bin:
                    imgs_bin = cropped_lines_bin[n_start:]
                    imgs_bin = np.array(imgs_bin)
                    imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
@ -697,7 +507,7 @@ class Eynollah_ocr:
                    imgs_ver_flipped = None

                
-                        if dir_in_bin is not None:
+                if img_bin:
                    imgs_bin = cropped_lines_bin[n_start:n_end]
                    imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
                    
@ -743,7 +553,8 @@ class Eynollah_ocr:
                    indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
                    preds[indices_to_be_replaced,:,:] = \
                        preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
-                    if dir_in_bin is not None:
+
+            if img_bin:
                preds_bin = self.model_zoo.get('ocr').predict(imgs_bin, verbose=0)
                
                if len(indices_ver)>0:
@ -797,7 +608,6 @@ class Eynollah_ocr:
                    extracted_texts.append("")
                    extracted_conf_value.append(0)
        del cropped_lines
-                if dir_in_bin is not None:
        del cropped_lines_bin
        gc.collect()
        
@ -808,24 +618,46 @@ class Eynollah_ocr:
                                    else None
                                    for ind in range(len(cropped_lines_meging_indexing))]
        
-                extracted_conf_value_merged = [extracted_conf_value[ind]
+        extracted_conf_value_merged = [extracted_conf_value[ind]  # type: ignore
                                        if cropped_lines_meging_indexing[ind]==0
                                        else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2.
                                        if cropped_lines_meging_indexing[ind]==1
                                        else None
                                        for ind in range(len(cropped_lines_meging_indexing))]

-                extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm]
+        extracted_conf_value_merged: List[float] = [extracted_conf_value_merged[ind_cfm]
                                        for ind_cfm in range(len(extracted_texts_merged))
                                        if extracted_texts_merged[ind_cfm] is not None]
-                extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
-                unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)

-                if dir_out_image_text:
-                    #font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
-                    font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
-                    with importlib_resources.as_file(font) as font:
-                        font = ImageFont.truetype(font=font, size=40)
+        extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
+
+        return EynollahOcrResult(
+            extracted_texts_merged=extracted_texts_merged,
+            extracted_conf_value_merged=extracted_conf_value_merged,
+            cropped_lines_region_indexer=cropped_lines_region_indexer,
+            total_bb_coordinates=total_bb_coordinates,
+        )
+        
+    def write_ocr(
+        self,
+        *,
+        result: EynollahOcrResult,
+        page_tree: ET.ElementTree,
+        out_file_ocr,
+        page_ns,
+        img,
+        out_image_with_text,
+    ):
+        cropped_lines_region_indexer = result.cropped_lines_region_indexer
+        total_bb_coordinates = result.total_bb_coordinates
+        extracted_texts_merged = result.extracted_texts_merged
+        extracted_conf_value_merged = result.extracted_conf_value_merged
+
+        unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
+        if out_image_with_text:
+            image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
+            draw = ImageDraw.Draw(image_text)
+            font = get_font()
            
            for indexer_text, bb_ind in enumerate(total_bb_coordinates):
                x_bb = bb_ind[0]
@ -868,25 +700,10 @@ class Eynollah_ocr:
                text_by_textregion.append(text_by_textregion_ind)
            else:
                text_by_textregion.append(" ".join(extracted_texts_merged_un))
-                    #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')

-                ###index_tot_regions = []
-                ###tot_region_ref = []
-
-                ###for jj in root1.iter(link+'RegionRefIndexed'):
-                    ###index_tot_regions.append(jj.attrib['index'])
-                    ###tot_region_ref.append(jj.attrib['regionRef'])
-                    
-                ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
-    
-                #id_textregions = []
-                #textregions_by_existing_ids = []
        indexer = 0
        indexer_textregion = 0
-                for nn in root1.iter(region_tags):
-                    #id_textregion = nn.attrib['id']
-                    #id_textregions.append(id_textregion)
-                    #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
+        for nn in page_tree.getroot().iter(f'{{{page_ns}}}TextRegion'):
            
            is_textregion_text = False
            for childtest in nn:
@ -910,6 +727,7 @@ class Eynollah_ocr:
                    
                    if not is_textline_text:
                        text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
+                        if extracted_conf_value_merged:
                            text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
                        unicode_textline = ET.SubElement(text_subelement, 'Unicode')
                        unicode_textline.text = extracted_texts_merged[indexer]
@ -918,8 +736,8 @@ class Eynollah_ocr:
                            if childtest3.tag.endswith("TextEquiv"):
                                for child_uc in childtest3:
                                    if child_uc.tag.endswith("Unicode"):
-                                                childtest3.set('conf',
-                                                                f"{extracted_conf_value_merged[indexer]:.2f}")
+                                        if extracted_conf_value_merged:
+                                            childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
                                        child_uc.text = extracted_texts_merged[indexer]
                            
                    indexer = indexer + 1
@ -935,18 +753,85 @@ class Eynollah_ocr:
                    unicode_textregion.text = text_by_textregion[indexer_textregion]
                indexer_textregion = indexer_textregion + 1
                
-                ###sample_order  = [(id_to_order[tid], text)
-                ###                 for tid, text in zip(id_textregions, textregions_by_existing_ids)
-                ###                 if tid in id_to_order]
+        ET.register_namespace("",page_ns)
+        page_tree.write(out_file_ocr, xml_declaration=True, method='xml', encoding="utf-8", default_namespace=None)

-                ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
-                ##tot_page_text = ' '.join(ordered_texts_sample)
+    def run(
+        self,
+        *,
+        overwrite: bool = False,
+        dir_in: Optional[str] = None,
+        dir_in_bin: Optional[str] = None,
+        image_filename: Optional[str] = None,
+        dir_xmls: str,
+        dir_out_image_text: Optional[str] = None,
+        dir_out: str,
+    ):
+        """
+        Run OCR.

-                ##for page_element in root1.iter(link+'Page'):
-                    ##text_page = ET.SubElement(page_element, 'TextEquiv')
-                    ##unicode_textpage = ET.SubElement(text_page, 'Unicode')
-                    ##unicode_textpage.text = tot_page_text
+        Args:

-                ET.register_namespace("",name_space)
-                tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
-                #print("Job done in %.1fs", time.time() - t0)
+            dir_in_bin (str): Prediction with RGB and binarized images for selected pages, should not be the default
+        """
+        if dir_in:
+            ls_imgs = [os.path.join(dir_in, image_filename)
+                    for image_filename in filter(is_image_filename,
+                                                    os.listdir(dir_in))]
+        else:
+            assert image_filename
+            ls_imgs = [image_filename]
+
+        for img_filename in ls_imgs:
+            file_stem = Path(img_filename).stem
+            page_file_in = os.path.join(dir_xmls, file_stem+'.xml')
+            out_file_ocr = os.path.join(dir_out, file_stem+'.xml')
+            
+            if os.path.exists(out_file_ocr):
+                if overwrite:
+                    self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
+                else:
+                    self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
+                    return
+                
+            img = cv2.imread(img_filename)
+
+            page_tree = ET.parse(page_file_in, parser = ET.XMLParser(encoding="utf-8"))
+            page_ns = etree_namespace_for_element_tag(page_tree.getroot().tag)
+
+            out_image_with_text = None
+            if dir_out_image_text:
+                out_image_with_text = os.path.join(dir_out_image_text, file_stem + '.png')
+
+            img_bin = None
+            if dir_in_bin:
+                img_bin = cv2.imread(os.path.join(dir_in_bin, file_stem+'.png'))
+
+
+            if self.tr_ocr:
+                result = self.run_trocr(
+                    img=img,
+                    page_tree=page_tree,
+                    page_ns=page_ns,
+
+                    tr_ocr_input_height_and_width = 384
+                )
+            else:
+                result = self.run_cnn( 
+                    img=img,
+                    page_tree=page_tree,
+                    page_ns=page_ns,
+
+                    img_bin=img_bin,
+                    image_width=512,
+                    image_height=32,
+                )
+
+            self.write_ocr(
+                result=result,
+                img=img,
+                page_tree=page_tree,
+                page_ns=page_ns,
+                out_file_ocr=out_file_ocr,
+                out_image_with_text=out_image_with_text,
+            )
--- a/src/eynollah/utils/font.py
+++ b/src/eynollah/utils/font.py
@ -0,0 +1,16 @@
+
+# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
+import sys
+from PIL import ImageFont
+
+if sys.version_info < (3, 10):
+    import importlib_resources
+else:
+    import importlib.resources as importlib_resources
+
+
+def get_font():
+    #font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
+    font = importlib_resources.files(__package__) / "../Charis-Regular.ttf"
+    with importlib_resources.as_file(font) as font:
+        return ImageFont.truetype(font=font, size=40)
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@ -128,6 +128,7 @@ def return_textlines_split_if_needed(textline_image, textline_image_bin=None):
            return [image1, image2], None
    else:
        return None, None
+
 def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
    if img.shape[0]==0 or img.shape[1]==0:
        img_fin = np.ones((image_height, image_width, 3))
--- a/src/eynollah/utils/xml.py
+++ b/src/eynollah/utils/xml.py
@ -88,3 +88,7 @@ def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region
        order_of_texts.append(interest)

    return order_of_texts, id_of_texts
+
+def etree_namespace_for_element_tag(tag: str):
+    right = tag.find('}')
+    return tag[1:right]