🔥 refactor eynollah ocr

.
2026-03-13 02:31:56 +01:00 · 2025-11-28 14:54:43 +01:00 · 2025-11-28 14:54:43 +01:00 · b161e33854
commit b161e33854
parent 30f9c695dc
5 changed files with 769 additions and 865 deletions
--- a/src/eynollah/cli/cli_ocr.py
+++ b/src/eynollah/cli/cli_ocr.py
@ -88,7 +88,6 @@ def ocr_cli(
    tr_ocr,
    do_not_mask_with_textline_contour,
    batch_size,
    dataset_abbrevation,
    min_conf_value_of_textline_text,
 ):
    """
@ -101,7 +100,6 @@ def ocr_cli(
        tr_ocr=tr_ocr,
        do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
        batch_size=batch_size,
        pref_of_dataset=dataset_abbrevation,
        min_conf_value_of_textline_text=min_conf_value_of_textline_text)
    eynollah_ocr.run(overwrite=overwrite,
                     dir_in=dir_in,
--- a/src/eynollah/eynollah_ocr.py
+++ b/src/eynollah/eynollah_ocr.py
--- a/src/eynollah/utils/font.py
+++ b/src/eynollah/utils/font.py
@ -0,0 +1,16 @@
 # cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
 import sys
 from PIL import ImageFont
 if sys.version_info < (3, 10):
    import importlib_resources
 else:
    import importlib.resources as importlib_resources
 def get_font():
    #font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
    font = importlib_resources.files(__package__) / "../Charis-Regular.ttf"
    with importlib_resources.as_file(font) as font:
        return ImageFont.truetype(font=font, size=40)
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@ -128,6 +128,7 @@ def return_textlines_split_if_needed(textline_image, textline_image_bin=None):
            return [image1, image2], None
    else:
        return None, None
 def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
    if img.shape[0]==0 or img.shape[1]==0:
        img_fin = np.ones((image_height, image_width, 3))
--- a/src/eynollah/utils/xml.py
+++ b/src/eynollah/utils/xml.py
@ -88,3 +88,7 @@ def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region
        order_of_texts.append(interest)
    return order_of_texts, id_of_texts
 def etree_namespace_for_element_tag(tag: str):
    right = tag.find('}')
    return tag[1:right]