mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-14 14:49:54 +02:00
In OCR, the predicted text is now drawn on the image, and the results are saved in a specified directory. This makes it easier to review the predicted output
This commit is contained in:
parent
9b04688ebc
commit
b1da0a3327
2 changed files with 75 additions and 3 deletions
|
@ -334,6 +334,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
help="directory of xmls",
|
help="directory of xmls",
|
||||||
type=click.Path(exists=True, file_okay=False),
|
type=click.Path(exists=True, file_okay=False),
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--dir_out_image_text",
|
||||||
|
"-doit",
|
||||||
|
help="directory of images with predicted text",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--model",
|
"--model",
|
||||||
"-m",
|
"-m",
|
||||||
|
@ -359,6 +365,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--draw_texts_on_image",
|
||||||
|
"-dtoi/-ndtoi",
|
||||||
|
is_flag=True,
|
||||||
|
help="if this parameter set to true, the predicted texts will be displayed on an image.",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--log_level",
|
"--log_level",
|
||||||
"-l",
|
"-l",
|
||||||
|
@ -366,18 +378,20 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
help="Override log level globally to this",
|
help="Override log level globally to this",
|
||||||
)
|
)
|
||||||
|
|
||||||
def ocr(dir_in, out, dir_xmls, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, log_level):
|
def ocr(dir_in, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, log_level):
|
||||||
if log_level:
|
if log_level:
|
||||||
setOverrideLogLevel(log_level)
|
setOverrideLogLevel(log_level)
|
||||||
initLogging()
|
initLogging()
|
||||||
eynollah_ocr = Eynollah_ocr(
|
eynollah_ocr = Eynollah_ocr(
|
||||||
dir_xmls=dir_xmls,
|
dir_xmls=dir_xmls,
|
||||||
|
dir_out_image_text=dir_out_image_text,
|
||||||
dir_in=dir_in,
|
dir_in=dir_in,
|
||||||
dir_out=out,
|
dir_out=out,
|
||||||
dir_models=model,
|
dir_models=model,
|
||||||
tr_ocr=tr_ocr,
|
tr_ocr=tr_ocr,
|
||||||
export_textline_images_and_text=export_textline_images_and_text,
|
export_textline_images_and_text=export_textline_images_and_text,
|
||||||
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
||||||
|
draw_texts_on_image=draw_texts_on_image,
|
||||||
)
|
)
|
||||||
eynollah_ocr.run()
|
eynollah_ocr.run()
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ from ocrd_utils import getLogger
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from transformers import TrOCRProcessor
|
from transformers import TrOCRProcessor
|
||||||
from PIL import Image
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
import torch
|
import torch
|
||||||
from difflib import SequenceMatcher as sq
|
from difflib import SequenceMatcher as sq
|
||||||
from transformers import VisionEncoderDecoderModel
|
from transformers import VisionEncoderDecoderModel
|
||||||
|
@ -4409,7 +4409,6 @@ class Eynollah:
|
||||||
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = \
|
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = \
|
||||||
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
|
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
|
||||||
#print("text region early -2 in %.1fs", time.time() - t0)
|
#print("text region early -2 in %.1fs", time.time() - t0)
|
||||||
|
|
||||||
if num_col_classifier == 1 or num_col_classifier ==2:
|
if num_col_classifier == 1 or num_col_classifier ==2:
|
||||||
if num_col_classifier == 1:
|
if num_col_classifier == 1:
|
||||||
img_w_new = 1000
|
img_w_new = 1000
|
||||||
|
@ -4954,9 +4953,11 @@ class Eynollah_ocr:
|
||||||
dir_xmls=None,
|
dir_xmls=None,
|
||||||
dir_in=None,
|
dir_in=None,
|
||||||
dir_out=None,
|
dir_out=None,
|
||||||
|
dir_out_image_text=None,
|
||||||
tr_ocr=False,
|
tr_ocr=False,
|
||||||
export_textline_images_and_text=False,
|
export_textline_images_and_text=False,
|
||||||
do_not_mask_with_textline_contour=False,
|
do_not_mask_with_textline_contour=False,
|
||||||
|
draw_texts_on_image=False,
|
||||||
logger=None,
|
logger=None,
|
||||||
):
|
):
|
||||||
self.dir_in = dir_in
|
self.dir_in = dir_in
|
||||||
|
@ -4966,6 +4967,8 @@ class Eynollah_ocr:
|
||||||
self.tr_ocr = tr_ocr
|
self.tr_ocr = tr_ocr
|
||||||
self.export_textline_images_and_text = export_textline_images_and_text
|
self.export_textline_images_and_text = export_textline_images_and_text
|
||||||
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
|
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
|
||||||
|
self.draw_texts_on_image = draw_texts_on_image
|
||||||
|
self.dir_out_image_text = dir_out_image_text
|
||||||
if tr_ocr:
|
if tr_ocr:
|
||||||
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
||||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
@ -5083,6 +5086,23 @@ class Eynollah_ocr:
|
||||||
return peaks_final
|
return peaks_final
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Function to fit text inside the given area
|
||||||
|
def fit_text_single_line(self, draw, text, font_path, max_width, max_height):
|
||||||
|
initial_font_size = 50
|
||||||
|
font_size = initial_font_size
|
||||||
|
while font_size > 10: # Minimum font size
|
||||||
|
font = ImageFont.truetype(font_path, font_size)
|
||||||
|
text_bbox = draw.textbbox((0, 0), text, font=font) # Get text bounding box
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_height = text_bbox[3] - text_bbox[1]
|
||||||
|
|
||||||
|
if text_width <= max_width and text_height <= max_height:
|
||||||
|
return font # Return the best-fitting font
|
||||||
|
|
||||||
|
font_size -= 2 # Reduce font size and retry
|
||||||
|
|
||||||
|
return ImageFont.truetype(font_path, 10) # Smallest font fallback
|
||||||
|
|
||||||
def return_textlines_split_if_needed(self, textline_image):
|
def return_textlines_split_if_needed(self, textline_image):
|
||||||
|
|
||||||
|
@ -5254,6 +5274,12 @@ class Eynollah_ocr:
|
||||||
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
|
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
|
||||||
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
|
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
|
||||||
img = cv2.imread(dir_img)
|
img = cv2.imread(dir_img)
|
||||||
|
|
||||||
|
if self.draw_texts_on_image:
|
||||||
|
out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
|
||||||
|
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
|
||||||
|
draw = ImageDraw.Draw(image_text)
|
||||||
|
total_bb_coordinates = []
|
||||||
|
|
||||||
tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
|
tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
|
||||||
root1=tree1.getroot()
|
root1=tree1.getroot()
|
||||||
|
@ -5283,6 +5309,9 @@ class Eynollah_ocr:
|
||||||
|
|
||||||
x,y,w,h = cv2.boundingRect(textline_coords)
|
x,y,w,h = cv2.boundingRect(textline_coords)
|
||||||
|
|
||||||
|
if self.draw_texts_on_image:
|
||||||
|
total_bb_coordinates.append([x,y,w,h])
|
||||||
|
|
||||||
h2w_ratio = h/float(w)
|
h2w_ratio = h/float(w)
|
||||||
|
|
||||||
img_poly_on_img = np.copy(img)
|
img_poly_on_img = np.copy(img)
|
||||||
|
@ -5359,6 +5388,35 @@ class Eynollah_ocr:
|
||||||
|
|
||||||
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
||||||
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
||||||
|
|
||||||
|
|
||||||
|
if self.draw_texts_on_image:
|
||||||
|
|
||||||
|
font_path = "NotoSans-Regular.ttf" # Make sure this file exists!
|
||||||
|
font = ImageFont.truetype(font_path, 40)
|
||||||
|
|
||||||
|
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
||||||
|
|
||||||
|
|
||||||
|
x_bb = bb_ind[0]
|
||||||
|
y_bb = bb_ind[1]
|
||||||
|
w_bb = bb_ind[2]
|
||||||
|
h_bb = bb_ind[3]
|
||||||
|
|
||||||
|
font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
|
||||||
|
|
||||||
|
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
||||||
|
|
||||||
|
text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_height = text_bbox[3] - text_bbox[1]
|
||||||
|
|
||||||
|
text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally
|
||||||
|
text_y = y_bb + (h_bb - text_height) // 2 # Center vertically
|
||||||
|
|
||||||
|
# Draw the text
|
||||||
|
draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
|
||||||
|
image_text.save(out_image_with_text)
|
||||||
|
|
||||||
text_by_textregion = []
|
text_by_textregion = []
|
||||||
for ind in unique_cropped_lines_region_indexer:
|
for ind in unique_cropped_lines_region_indexer:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue