@ -22,7 +22,7 @@ from ocrd_utils import getLogger
import cv2
import cv2
import numpy as np
import numpy as np
from transformers import TrOCRProcessor
from transformers import TrOCRProcessor
from PIL import Image
from PIL import Image , ImageDraw , ImageFont
import torch
import torch
from difflib import SequenceMatcher as sq
from difflib import SequenceMatcher as sq
from transformers import VisionEncoderDecoderModel
from transformers import VisionEncoderDecoderModel
@ -4409,7 +4409,6 @@ class Eynollah:
text_regions_p_1 , erosion_hurts , polygons_lines_xml , textline_mask_tot_ea , img_bin_light = \
text_regions_p_1 , erosion_hurts , polygons_lines_xml , textline_mask_tot_ea , img_bin_light = \
self . get_regions_light_v ( img_res , is_image_enhanced , num_col_classifier )
self . get_regions_light_v ( img_res , is_image_enhanced , num_col_classifier )
#print("text region early -2 in %.1fs", time.time() - t0)
#print("text region early -2 in %.1fs", time.time() - t0)
if num_col_classifier == 1 or num_col_classifier == 2 :
if num_col_classifier == 1 or num_col_classifier == 2 :
if num_col_classifier == 1 :
if num_col_classifier == 1 :
img_w_new = 1000
img_w_new = 1000
@ -4954,9 +4953,11 @@ class Eynollah_ocr:
dir_xmls = None ,
dir_xmls = None ,
dir_in = None ,
dir_in = None ,
dir_out = None ,
dir_out = None ,
dir_out_image_text = None ,
tr_ocr = False ,
tr_ocr = False ,
export_textline_images_and_text = False ,
export_textline_images_and_text = False ,
do_not_mask_with_textline_contour = False ,
do_not_mask_with_textline_contour = False ,
draw_texts_on_image = False ,
logger = None ,
logger = None ,
) :
) :
self . dir_in = dir_in
self . dir_in = dir_in
@ -4966,6 +4967,8 @@ class Eynollah_ocr:
self . tr_ocr = tr_ocr
self . tr_ocr = tr_ocr
self . export_textline_images_and_text = export_textline_images_and_text
self . export_textline_images_and_text = export_textline_images_and_text
self . do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
self . do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
self . draw_texts_on_image = draw_texts_on_image
self . dir_out_image_text = dir_out_image_text
if tr_ocr :
if tr_ocr :
self . processor = TrOCRProcessor . from_pretrained ( " microsoft/trocr-base-printed " )
self . processor = TrOCRProcessor . from_pretrained ( " microsoft/trocr-base-printed " )
self . device = torch . device ( " cuda:0 " if torch . cuda . is_available ( ) else " cpu " )
self . device = torch . device ( " cuda:0 " if torch . cuda . is_available ( ) else " cpu " )
@ -5083,6 +5086,23 @@ class Eynollah_ocr:
return peaks_final
return peaks_final
else :
else :
return None
return None
# Function to fit text inside the given area
def fit_text_single_line ( self , draw , text , font_path , max_width , max_height ) :
initial_font_size = 50
font_size = initial_font_size
while font_size > 10 : # Minimum font size
font = ImageFont . truetype ( font_path , font_size )
text_bbox = draw . textbbox ( ( 0 , 0 ) , text , font = font ) # Get text bounding box
text_width = text_bbox [ 2 ] - text_bbox [ 0 ]
text_height = text_bbox [ 3 ] - text_bbox [ 1 ]
if text_width < = max_width and text_height < = max_height :
return font # Return the best-fitting font
font_size - = 2 # Reduce font size and retry
return ImageFont . truetype ( font_path , 10 ) # Smallest font fallback
def return_textlines_split_if_needed ( self , textline_image ) :
def return_textlines_split_if_needed ( self , textline_image ) :
@ -5254,6 +5274,12 @@ class Eynollah_ocr:
dir_xml = os . path . join ( self . dir_xmls , file_name + ' .xml ' )
dir_xml = os . path . join ( self . dir_xmls , file_name + ' .xml ' )
out_file_ocr = os . path . join ( self . dir_out , file_name + ' .xml ' )
out_file_ocr = os . path . join ( self . dir_out , file_name + ' .xml ' )
img = cv2 . imread ( dir_img )
img = cv2 . imread ( dir_img )
if self . draw_texts_on_image :
out_image_with_text = os . path . join ( self . dir_out_image_text , file_name + ' .png ' )
image_text = Image . new ( " RGB " , ( img . shape [ 1 ] , img . shape [ 0 ] ) , " white " )
draw = ImageDraw . Draw ( image_text )
total_bb_coordinates = [ ]
tree1 = ET . parse ( dir_xml , parser = ET . XMLParser ( encoding = " utf-8 " ) )
tree1 = ET . parse ( dir_xml , parser = ET . XMLParser ( encoding = " utf-8 " ) )
root1 = tree1 . getroot ( )
root1 = tree1 . getroot ( )
@ -5283,6 +5309,9 @@ class Eynollah_ocr:
x , y , w , h = cv2 . boundingRect ( textline_coords )
x , y , w , h = cv2 . boundingRect ( textline_coords )
if self . draw_texts_on_image :
total_bb_coordinates . append ( [ x , y , w , h ] )
h2w_ratio = h / float ( w )
h2w_ratio = h / float ( w )
img_poly_on_img = np . copy ( img )
img_poly_on_img = np . copy ( img )
@ -5359,6 +5388,35 @@ class Eynollah_ocr:
extracted_texts_merged = [ ind for ind in extracted_texts_merged if ind is not None ]
extracted_texts_merged = [ ind for ind in extracted_texts_merged if ind is not None ]
unique_cropped_lines_region_indexer = np . unique ( cropped_lines_region_indexer )
unique_cropped_lines_region_indexer = np . unique ( cropped_lines_region_indexer )
if self . draw_texts_on_image :
font_path = " NotoSans-Regular.ttf " # Make sure this file exists!
font = ImageFont . truetype ( font_path , 40 )
for indexer_text , bb_ind in enumerate ( total_bb_coordinates ) :
x_bb = bb_ind [ 0 ]
y_bb = bb_ind [ 1 ]
w_bb = bb_ind [ 2 ]
h_bb = bb_ind [ 3 ]
font = self . fit_text_single_line ( draw , extracted_texts_merged [ indexer_text ] , font_path , w_bb , int ( h_bb * 0.4 ) )
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
text_bbox = draw . textbbox ( ( 0 , 0 ) , extracted_texts_merged [ indexer_text ] , font = font )
text_width = text_bbox [ 2 ] - text_bbox [ 0 ]
text_height = text_bbox [ 3 ] - text_bbox [ 1 ]
text_x = x_bb + ( w_bb - text_width ) / / 2 # Center horizontally
text_y = y_bb + ( h_bb - text_height ) / / 2 # Center vertically
# Draw the text
draw . text ( ( text_x , text_y ) , extracted_texts_merged [ indexer_text ] , fill = " black " , font = font )
image_text . save ( out_image_with_text )
text_by_textregion = [ ]
text_by_textregion = [ ]
for ind in unique_cropped_lines_region_indexer :
for ind in unique_cropped_lines_region_indexer :