mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-01 08:44:13 +01:00
🔥 refactor eynollah ocr
.
This commit is contained in:
parent
30f9c695dc
commit
b161e33854
5 changed files with 769 additions and 865 deletions
|
|
@ -88,7 +88,6 @@ def ocr_cli(
|
||||||
tr_ocr,
|
tr_ocr,
|
||||||
do_not_mask_with_textline_contour,
|
do_not_mask_with_textline_contour,
|
||||||
batch_size,
|
batch_size,
|
||||||
dataset_abbrevation,
|
|
||||||
min_conf_value_of_textline_text,
|
min_conf_value_of_textline_text,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
|
@ -101,7 +100,6 @@ def ocr_cli(
|
||||||
tr_ocr=tr_ocr,
|
tr_ocr=tr_ocr,
|
||||||
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
pref_of_dataset=dataset_abbrevation,
|
|
||||||
min_conf_value_of_textline_text=min_conf_value_of_textline_text)
|
min_conf_value_of_textline_text=min_conf_value_of_textline_text)
|
||||||
eynollah_ocr.run(overwrite=overwrite,
|
eynollah_ocr.run(overwrite=overwrite,
|
||||||
dir_in=dir_in,
|
dir_in=dir_in,
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
16
src/eynollah/utils/font.py
Normal file
16
src/eynollah/utils/font.py
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
|
||||||
|
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
|
||||||
|
import sys
|
||||||
|
from PIL import ImageFont
|
||||||
|
|
||||||
|
if sys.version_info < (3, 10):
|
||||||
|
import importlib_resources
|
||||||
|
else:
|
||||||
|
import importlib.resources as importlib_resources
|
||||||
|
|
||||||
|
|
||||||
|
def get_font():
|
||||||
|
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||||
|
font = importlib_resources.files(__package__) / "../Charis-Regular.ttf"
|
||||||
|
with importlib_resources.as_file(font) as font:
|
||||||
|
return ImageFont.truetype(font=font, size=40)
|
||||||
|
|
@ -128,6 +128,7 @@ def return_textlines_split_if_needed(textline_image, textline_image_bin=None):
|
||||||
return [image1, image2], None
|
return [image1, image2], None
|
||||||
else:
|
else:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
|
def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
|
||||||
if img.shape[0]==0 or img.shape[1]==0:
|
if img.shape[0]==0 or img.shape[1]==0:
|
||||||
img_fin = np.ones((image_height, image_width, 3))
|
img_fin = np.ones((image_height, image_width, 3))
|
||||||
|
|
|
||||||
|
|
@ -88,3 +88,7 @@ def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region
|
||||||
order_of_texts.append(interest)
|
order_of_texts.append(interest)
|
||||||
|
|
||||||
return order_of_texts, id_of_texts
|
return order_of_texts, id_of_texts
|
||||||
|
|
||||||
|
def etree_namespace_for_element_tag(tag: str):
|
||||||
|
right = tag.find('}')
|
||||||
|
return tag[1:right]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue