🔥 refactor eynollah ocr

.
This commit is contained in:
kba 2025-11-28 14:54:43 +01:00
parent 30f9c695dc
commit b161e33854
5 changed files with 769 additions and 865 deletions

View file

@ -88,7 +88,6 @@ def ocr_cli(
tr_ocr, tr_ocr,
do_not_mask_with_textline_contour, do_not_mask_with_textline_contour,
batch_size, batch_size,
dataset_abbrevation,
min_conf_value_of_textline_text, min_conf_value_of_textline_text,
): ):
""" """
@ -101,7 +100,6 @@ def ocr_cli(
tr_ocr=tr_ocr, tr_ocr=tr_ocr,
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour, do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
batch_size=batch_size, batch_size=batch_size,
pref_of_dataset=dataset_abbrevation,
min_conf_value_of_textline_text=min_conf_value_of_textline_text) min_conf_value_of_textline_text=min_conf_value_of_textline_text)
eynollah_ocr.run(overwrite=overwrite, eynollah_ocr.run(overwrite=overwrite,
dir_in=dir_in, dir_in=dir_in,

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,16 @@
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
import sys
from PIL import ImageFont
if sys.version_info < (3, 10):
import importlib_resources
else:
import importlib.resources as importlib_resources
def get_font():
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
font = importlib_resources.files(__package__) / "../Charis-Regular.ttf"
with importlib_resources.as_file(font) as font:
return ImageFont.truetype(font=font, size=40)

View file

@ -128,6 +128,7 @@ def return_textlines_split_if_needed(textline_image, textline_image_bin=None):
return [image1, image2], None return [image1, image2], None
else: else:
return None, None return None, None
def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width): def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
if img.shape[0]==0 or img.shape[1]==0: if img.shape[0]==0 or img.shape[1]==0:
img_fin = np.ones((image_height, image_width, 3)) img_fin = np.ones((image_height, image_width, 3))

View file

@ -88,3 +88,7 @@ def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region
order_of_texts.append(interest) order_of_texts.append(interest)
return order_of_texts, id_of_texts return order_of_texts, id_of_texts
def etree_namespace_for_element_tag(tag: str):
right = tag.find('}')
return tag[1:right]