mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-09-01 13:29:58 +02:00
eynollah ocr: support using either a specific model name or a models directory (default model)
This commit is contained in:
parent
7dd281267d
commit
fdcae8dd6e
2 changed files with 26 additions and 20 deletions
|
@ -456,6 +456,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
help="directory of models",
|
help="directory of models",
|
||||||
type=click.Path(exists=True, file_okay=False),
|
type=click.Path(exists=True, file_okay=False),
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--model_name",
|
||||||
|
help="Specific model file path to use for OCR",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--tr_ocr",
|
"--tr_ocr",
|
||||||
"-trocr/-notrocr",
|
"-trocr/-notrocr",
|
||||||
|
@ -474,12 +479,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
||||||
)
|
)
|
||||||
@click.option(
|
|
||||||
"--draw_texts_on_image",
|
|
||||||
"-dtoi/-ndtoi",
|
|
||||||
is_flag=True,
|
|
||||||
help="if this parameter set to true, the predicted texts will be displayed on an image.",
|
|
||||||
)
|
|
||||||
@click.option(
|
@click.option(
|
||||||
"--prediction_with_both_of_rgb_and_bin",
|
"--prediction_with_both_of_rgb_and_bin",
|
||||||
"-brb/-nbrb",
|
"-brb/-nbrb",
|
||||||
|
@ -508,16 +507,17 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
help="Override log level globally to this",
|
help="Override log level globally to this",
|
||||||
)
|
)
|
||||||
|
|
||||||
def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
|
def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
|
||||||
initLogging()
|
initLogging()
|
||||||
if log_level:
|
if log_level:
|
||||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||||
|
|
||||||
|
assert not model or not model_name, "model directory -m can not be set alongside specific model name --model_name"
|
||||||
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
|
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
|
||||||
assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
|
assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
|
||||||
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
|
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
|
||||||
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
|
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
|
||||||
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
|
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
|
||||||
assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text -etit can not be set alongside draw text on image -dtoi"
|
|
||||||
assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text -etit can not be set alongside prediction with both rgb and bin -brb"
|
assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text -etit can not be set alongside prediction with both rgb and bin -brb"
|
||||||
assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both."
|
assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both."
|
||||||
eynollah_ocr = Eynollah_ocr(
|
eynollah_ocr = Eynollah_ocr(
|
||||||
|
@ -528,10 +528,10 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text,
|
||||||
dir_in_bin=dir_in_bin,
|
dir_in_bin=dir_in_bin,
|
||||||
dir_out=out,
|
dir_out=out,
|
||||||
dir_models=model,
|
dir_models=model,
|
||||||
|
model_name=model_name,
|
||||||
tr_ocr=tr_ocr,
|
tr_ocr=tr_ocr,
|
||||||
export_textline_images_and_text=export_textline_images_and_text,
|
export_textline_images_and_text=export_textline_images_and_text,
|
||||||
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
||||||
draw_texts_on_image=draw_texts_on_image,
|
|
||||||
prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
|
prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
pref_of_dataset=dataset_abbrevation,
|
pref_of_dataset=dataset_abbrevation,
|
||||||
|
|
|
@ -5171,6 +5171,7 @@ class Eynollah_ocr:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
dir_models,
|
dir_models,
|
||||||
|
model_name=None,
|
||||||
dir_xmls=None,
|
dir_xmls=None,
|
||||||
dir_in=None,
|
dir_in=None,
|
||||||
image_filename=None,
|
image_filename=None,
|
||||||
|
@ -5181,7 +5182,6 @@ class Eynollah_ocr:
|
||||||
batch_size=None,
|
batch_size=None,
|
||||||
export_textline_images_and_text=False,
|
export_textline_images_and_text=False,
|
||||||
do_not_mask_with_textline_contour=False,
|
do_not_mask_with_textline_contour=False,
|
||||||
draw_texts_on_image=False,
|
|
||||||
prediction_with_both_of_rgb_and_bin=False,
|
prediction_with_both_of_rgb_and_bin=False,
|
||||||
pref_of_dataset=None,
|
pref_of_dataset=None,
|
||||||
min_conf_value_of_textline_text : Optional[float]=None,
|
min_conf_value_of_textline_text : Optional[float]=None,
|
||||||
|
@ -5193,10 +5193,10 @@ class Eynollah_ocr:
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.dir_xmls = dir_xmls
|
self.dir_xmls = dir_xmls
|
||||||
self.dir_models = dir_models
|
self.dir_models = dir_models
|
||||||
|
self.model_name = model_name
|
||||||
self.tr_ocr = tr_ocr
|
self.tr_ocr = tr_ocr
|
||||||
self.export_textline_images_and_text = export_textline_images_and_text
|
self.export_textline_images_and_text = export_textline_images_and_text
|
||||||
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
|
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
|
||||||
self.draw_texts_on_image = draw_texts_on_image
|
|
||||||
self.dir_out_image_text = dir_out_image_text
|
self.dir_out_image_text = dir_out_image_text
|
||||||
self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
|
self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
|
||||||
self.pref_of_dataset = pref_of_dataset
|
self.pref_of_dataset = pref_of_dataset
|
||||||
|
@ -5210,7 +5210,10 @@ class Eynollah_ocr:
|
||||||
if tr_ocr:
|
if tr_ocr:
|
||||||
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
||||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
|
if self.model_name:
|
||||||
|
self.model_ocr_dir = self.model_name
|
||||||
|
else:
|
||||||
|
self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
|
||||||
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
||||||
self.model_ocr.to(self.device)
|
self.model_ocr.to(self.device)
|
||||||
if not batch_size:
|
if not batch_size:
|
||||||
|
@ -5219,7 +5222,10 @@ class Eynollah_ocr:
|
||||||
self.b_s = int(batch_size)
|
self.b_s = int(batch_size)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.model_ocr_dir = dir_models + "/model_step_45000_ocr"#"/model_eynollah_ocr_cnnrnn_20250805"#
|
if self.model_name:
|
||||||
|
self.model_ocr_dir = self.model_name
|
||||||
|
else:
|
||||||
|
self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805"
|
||||||
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
||||||
|
|
||||||
self.prediction_model = tf.keras.models.Model(
|
self.prediction_model = tf.keras.models.Model(
|
||||||
|
@ -5230,7 +5236,7 @@ class Eynollah_ocr:
|
||||||
else:
|
else:
|
||||||
self.b_s = int(batch_size)
|
self.b_s = int(batch_size)
|
||||||
|
|
||||||
with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file:
|
with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
|
||||||
characters = json.load(config_file)
|
characters = json.load(config_file)
|
||||||
|
|
||||||
AUTOTUNE = tf.data.AUTOTUNE
|
AUTOTUNE = tf.data.AUTOTUNE
|
||||||
|
@ -5271,7 +5277,7 @@ class Eynollah_ocr:
|
||||||
|
|
||||||
img = cv2.imread(dir_img)
|
img = cv2.imread(dir_img)
|
||||||
|
|
||||||
if self.draw_texts_on_image:
|
if self.dir_out_image_text:
|
||||||
out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
|
out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
|
||||||
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
|
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
|
||||||
draw = ImageDraw.Draw(image_text)
|
draw = ImageDraw.Draw(image_text)
|
||||||
|
@ -5306,7 +5312,7 @@ class Eynollah_ocr:
|
||||||
textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] )
|
textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] )
|
||||||
x,y,w,h = cv2.boundingRect(textline_coords)
|
x,y,w,h = cv2.boundingRect(textline_coords)
|
||||||
|
|
||||||
if self.draw_texts_on_image:
|
if self.dir_out_image_text:
|
||||||
total_bb_coordinates.append([x,y,w,h])
|
total_bb_coordinates.append([x,y,w,h])
|
||||||
|
|
||||||
h2w_ratio = h/float(w)
|
h2w_ratio = h/float(w)
|
||||||
|
@ -5363,7 +5369,7 @@ class Eynollah_ocr:
|
||||||
|
|
||||||
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
||||||
|
|
||||||
if self.draw_texts_on_image:
|
if self.dir_out_image_text:
|
||||||
|
|
||||||
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||||
font = ImageFont.truetype(font_path, 40)
|
font = ImageFont.truetype(font_path, 40)
|
||||||
|
@ -5463,7 +5469,7 @@ class Eynollah_ocr:
|
||||||
dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png')
|
dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png')
|
||||||
img_bin = cv2.imread(dir_img_bin)
|
img_bin = cv2.imread(dir_img_bin)
|
||||||
|
|
||||||
if self.draw_texts_on_image:
|
if self.dir_out_image_text:
|
||||||
out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
|
out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
|
||||||
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
|
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
|
||||||
draw = ImageDraw.Draw(image_text)
|
draw = ImageDraw.Draw(image_text)
|
||||||
|
@ -5508,7 +5514,7 @@ class Eynollah_ocr:
|
||||||
if type_textregion=='drop-capital':
|
if type_textregion=='drop-capital':
|
||||||
angle_degrees = 0
|
angle_degrees = 0
|
||||||
|
|
||||||
if self.draw_texts_on_image:
|
if self.dir_out_image_text:
|
||||||
total_bb_coordinates.append([x,y,w,h])
|
total_bb_coordinates.append([x,y,w,h])
|
||||||
|
|
||||||
w_scaled = w * image_height/float(h)
|
w_scaled = w * image_height/float(h)
|
||||||
|
@ -5829,7 +5835,7 @@ class Eynollah_ocr:
|
||||||
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
||||||
|
|
||||||
|
|
||||||
if self.draw_texts_on_image:
|
if self.dir_out_image_text:
|
||||||
|
|
||||||
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||||
font = ImageFont.truetype(font_path, 40)
|
font = ImageFont.truetype(font_path, 40)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue