mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-01 08:44:13 +01:00
move line-gt extraction out of ocr to eynollah-training
This commit is contained in:
parent
951bd2fce6
commit
30f9c695dc
4 changed files with 500 additions and 429 deletions
|
|
@ -59,12 +59,6 @@ import click
|
|||
is_flag=True,
|
||||
help="if this parameter set to true, transformer ocr will be applied, otherwise cnn_rnn model.",
|
||||
)
|
||||
@click.option(
|
||||
"--export_textline_images_and_text",
|
||||
"-etit/-noetit",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, images and text in xml will be exported into output dir. This files can be used for training a OCR engine.",
|
||||
)
|
||||
@click.option(
|
||||
"--do_not_mask_with_textline_contour",
|
||||
"-nmtc/-mtc",
|
||||
|
|
@ -76,11 +70,6 @@ import click
|
|||
"-bs",
|
||||
help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
|
||||
)
|
||||
@click.option(
|
||||
"--dataset_abbrevation",
|
||||
"-ds_pref",
|
||||
help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
|
||||
)
|
||||
@click.option(
|
||||
"--min_conf_value_of_textline_text",
|
||||
"-min_conf",
|
||||
|
|
@ -97,7 +86,6 @@ def ocr_cli(
|
|||
dir_out_image_text,
|
||||
overwrite,
|
||||
tr_ocr,
|
||||
export_textline_images_and_text,
|
||||
do_not_mask_with_textline_contour,
|
||||
batch_size,
|
||||
dataset_abbrevation,
|
||||
|
|
@ -106,18 +94,11 @@ def ocr_cli(
|
|||
"""
|
||||
Recognize text with a CNN/RNN or transformer ML model.
|
||||
"""
|
||||
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
|
||||
# FIXME: refactor: move export_textline_images_and_text out of eynollah.py
|
||||
# assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
|
||||
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
|
||||
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
|
||||
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
|
||||
assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
|
||||
assert bool(image) ^ bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
|
||||
from ..eynollah_ocr import Eynollah_ocr
|
||||
eynollah_ocr = Eynollah_ocr(
|
||||
model_zoo=ctx.obj.model_zoo,
|
||||
tr_ocr=tr_ocr,
|
||||
export_textline_images_and_text=export_textline_images_and_text,
|
||||
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
||||
batch_size=batch_size,
|
||||
pref_of_dataset=dataset_abbrevation,
|
||||
|
|
|
|||
|
|
@ -9,17 +9,13 @@ from logging import Logger, getLogger
|
|||
from typing import Optional
|
||||
from pathlib import Path
|
||||
import os
|
||||
import json
|
||||
import gc
|
||||
import sys
|
||||
import math
|
||||
import time
|
||||
|
||||
from keras.layers import StringLookup
|
||||
import cv2
|
||||
import xml.etree.ElementTree as ET
|
||||
import tensorflow as tf
|
||||
from keras.models import load_model
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import numpy as np
|
||||
from eynollah.model_zoo import EynollahModelZoo
|
||||
|
|
@ -48,11 +44,6 @@ if sys.version_info < (3, 10):
|
|||
else:
|
||||
import importlib.resources as importlib_resources
|
||||
|
||||
try:
|
||||
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
except ImportError:
|
||||
TrOCRProcessor = VisionEncoderDecoderModel = None
|
||||
|
||||
class Eynollah_ocr:
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -60,27 +51,16 @@ class Eynollah_ocr:
|
|||
model_zoo: EynollahModelZoo,
|
||||
tr_ocr=False,
|
||||
batch_size: Optional[int]=None,
|
||||
export_textline_images_and_text: bool=False,
|
||||
do_not_mask_with_textline_contour: bool=False,
|
||||
pref_of_dataset=None,
|
||||
min_conf_value_of_textline_text : Optional[float]=None,
|
||||
logger: Optional[Logger]=None,
|
||||
):
|
||||
self.tr_ocr = tr_ocr
|
||||
# For generating textline-image pairs for traning, move to generate_gt_for_training
|
||||
self.export_textline_images_and_text = export_textline_images_and_text
|
||||
# masking for OCR and GT generation, relevant for skewed lines and bounding boxes
|
||||
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
|
||||
# prefix or dataset
|
||||
self.pref_of_dataset = pref_of_dataset
|
||||
self.logger = logger if logger else getLogger('eynollah.ocr')
|
||||
self.model_zoo = model_zoo
|
||||
|
||||
# TODO: Properly document what 'export_textline_images_and_text' is about
|
||||
if export_textline_images_and_text:
|
||||
self.logger.info("export_textline_images_and_text was set, so no actual models are loaded")
|
||||
return
|
||||
|
||||
self.min_conf_value_of_textline_text = min_conf_value_of_textline_text if min_conf_value_of_textline_text else 0.3
|
||||
self.b_s = 2 if batch_size is None and tr_ocr else 8 if batch_size is None else batch_size
|
||||
|
||||
|
|
@ -539,40 +519,55 @@ class Eynollah_ocr:
|
|||
mask_poly = mask_poly[y:y+h, x:x+w, :]
|
||||
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
|
||||
|
||||
if self.export_textline_images_and_text:
|
||||
# print(file_name, angle_degrees, w*h,
|
||||
# mask_poly[:,:,0].sum(),
|
||||
# mask_poly[:,:,0].sum() /float(w*h) ,
|
||||
# 'didi')
|
||||
|
||||
if angle_degrees > 3:
|
||||
better_des_slope = get_orientation_moments(textline_coords)
|
||||
|
||||
img_crop = rotate_image_with_padding(img_crop, better_des_slope)
|
||||
if dir_in_bin is not None:
|
||||
img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope)
|
||||
|
||||
mask_poly = rotate_image_with_padding(mask_poly, better_des_slope)
|
||||
mask_poly = mask_poly.astype('uint8')
|
||||
|
||||
#new bounding box
|
||||
x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
|
||||
|
||||
mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
img_crop[mask_poly==0] = 255
|
||||
|
||||
else:
|
||||
# print(file_name, angle_degrees, w*h,
|
||||
# mask_poly[:,:,0].sum(),
|
||||
# mask_poly[:,:,0].sum() /float(w*h) ,
|
||||
# 'didi')
|
||||
|
||||
if angle_degrees > 3:
|
||||
better_des_slope = get_orientation_moments(textline_coords)
|
||||
|
||||
img_crop = rotate_image_with_padding(img_crop, better_des_slope)
|
||||
if dir_in_bin is not None:
|
||||
img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope)
|
||||
|
||||
mask_poly = rotate_image_with_padding(mask_poly, better_des_slope)
|
||||
mask_poly = mask_poly.astype('uint8')
|
||||
|
||||
#new bounding box
|
||||
x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
|
||||
|
||||
mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
|
||||
if dir_in_bin is not None:
|
||||
img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
img_crop[mask_poly==0] = 255
|
||||
img_crop_bin[mask_poly==0] = 255
|
||||
|
||||
if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
|
||||
if dir_in_bin is not None:
|
||||
img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
img_crop_bin[mask_poly==0] = 255
|
||||
|
||||
if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
|
||||
img_crop, img_crop_bin = \
|
||||
break_curved_line_into_small_pieces_and_then_merge(
|
||||
img_crop, mask_poly, img_crop_bin)
|
||||
else:
|
||||
img_crop, _ = \
|
||||
break_curved_line_into_small_pieces_and_then_merge(
|
||||
img_crop, mask_poly)
|
||||
|
||||
else:
|
||||
better_des_slope = 0
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
img_crop[mask_poly==0] = 255
|
||||
if dir_in_bin is not None:
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
img_crop_bin[mask_poly==0] = 255
|
||||
if type_textregion=='drop-capital':
|
||||
pass
|
||||
else:
|
||||
if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
|
||||
if dir_in_bin is not None:
|
||||
img_crop, img_crop_bin = \
|
||||
break_curved_line_into_small_pieces_and_then_merge(
|
||||
|
|
@ -581,188 +576,178 @@ class Eynollah_ocr:
|
|||
img_crop, _ = \
|
||||
break_curved_line_into_small_pieces_and_then_merge(
|
||||
img_crop, mask_poly)
|
||||
|
||||
else:
|
||||
better_des_slope = 0
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
img_crop[mask_poly==0] = 255
|
||||
if dir_in_bin is not None:
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
img_crop_bin[mask_poly==0] = 255
|
||||
if type_textregion=='drop-capital':
|
||||
pass
|
||||
else:
|
||||
if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
|
||||
if dir_in_bin is not None:
|
||||
img_crop, img_crop_bin = \
|
||||
break_curved_line_into_small_pieces_and_then_merge(
|
||||
img_crop, mask_poly, img_crop_bin)
|
||||
else:
|
||||
img_crop, _ = \
|
||||
break_curved_line_into_small_pieces_and_then_merge(
|
||||
img_crop, mask_poly)
|
||||
|
||||
if not self.export_textline_images_and_text:
|
||||
if w_scaled < 750:#1.5*image_width:
|
||||
if w_scaled < 750:#1.5*image_width:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
img_crop, image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
if abs(better_des_slope) > 45:
|
||||
cropped_lines_ver_index.append(1)
|
||||
else:
|
||||
cropped_lines_ver_index.append(0)
|
||||
|
||||
cropped_lines_meging_indexing.append(0)
|
||||
if dir_in_bin is not None:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
img_crop, image_height, image_width)
|
||||
img_crop_bin, image_height, image_width)
|
||||
cropped_lines_bin.append(img_fin)
|
||||
else:
|
||||
splited_images, splited_images_bin = return_textlines_split_if_needed(
|
||||
img_crop, img_crop_bin if dir_in_bin is not None else None)
|
||||
if splited_images:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images[0], image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(1)
|
||||
|
||||
if abs(better_des_slope) > 45:
|
||||
cropped_lines_ver_index.append(1)
|
||||
else:
|
||||
cropped_lines_ver_index.append(0)
|
||||
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images[1], image_height, image_width)
|
||||
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(-1)
|
||||
|
||||
if abs(better_des_slope) > 45:
|
||||
cropped_lines_ver_index.append(1)
|
||||
else:
|
||||
cropped_lines_ver_index.append(0)
|
||||
|
||||
if dir_in_bin is not None:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images_bin[0], image_height, image_width)
|
||||
cropped_lines_bin.append(img_fin)
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images_bin[1], image_height, image_width)
|
||||
cropped_lines_bin.append(img_fin)
|
||||
|
||||
else:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
img_crop, image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(0)
|
||||
|
||||
if abs(better_des_slope) > 45:
|
||||
cropped_lines_ver_index.append(1)
|
||||
else:
|
||||
cropped_lines_ver_index.append(0)
|
||||
|
||||
if dir_in_bin is not None:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
img_crop_bin, image_height, image_width)
|
||||
cropped_lines_bin.append(img_fin)
|
||||
else:
|
||||
splited_images, splited_images_bin = return_textlines_split_if_needed(
|
||||
img_crop, img_crop_bin if dir_in_bin is not None else None)
|
||||
if splited_images:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images[0], image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(1)
|
||||
|
||||
if abs(better_des_slope) > 45:
|
||||
cropped_lines_ver_index.append(1)
|
||||
else:
|
||||
cropped_lines_ver_index.append(0)
|
||||
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images[1], image_height, image_width)
|
||||
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(-1)
|
||||
|
||||
if abs(better_des_slope) > 45:
|
||||
cropped_lines_ver_index.append(1)
|
||||
else:
|
||||
cropped_lines_ver_index.append(0)
|
||||
|
||||
if dir_in_bin is not None:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images_bin[0], image_height, image_width)
|
||||
cropped_lines_bin.append(img_fin)
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
splited_images_bin[1], image_height, image_width)
|
||||
cropped_lines_bin.append(img_fin)
|
||||
|
||||
else:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
img_crop, image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(0)
|
||||
|
||||
if abs(better_des_slope) > 45:
|
||||
cropped_lines_ver_index.append(1)
|
||||
else:
|
||||
cropped_lines_ver_index.append(0)
|
||||
|
||||
if dir_in_bin is not None:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
|
||||
img_crop_bin, image_height, image_width)
|
||||
cropped_lines_bin.append(img_fin)
|
||||
|
||||
if self.export_textline_images_and_text:
|
||||
if img_crop.shape[0]==0 or img_crop.shape[1]==0:
|
||||
pass
|
||||
else:
|
||||
if child_textlines.tag.endswith("TextEquiv"):
|
||||
for cheild_text in child_textlines:
|
||||
if cheild_text.tag.endswith("Unicode"):
|
||||
textline_text = cheild_text.text
|
||||
if textline_text:
|
||||
base_name = os.path.join(
|
||||
dir_out, file_name + '_line_' + str(indexer_textlines))
|
||||
if self.pref_of_dataset:
|
||||
base_name += '_' + self.pref_of_dataset
|
||||
if not self.do_not_mask_with_textline_contour:
|
||||
base_name += '_masked'
|
||||
|
||||
with open(base_name + '.txt', 'w') as text_file:
|
||||
text_file.write(textline_text)
|
||||
cv2.imwrite(base_name + '.png', img_crop)
|
||||
indexer_textlines+=1
|
||||
|
||||
if not self.export_textline_images_and_text:
|
||||
indexer_text_region = indexer_text_region +1
|
||||
|
||||
if not self.export_textline_images_and_text:
|
||||
extracted_texts = []
|
||||
extracted_conf_value = []
|
||||
|
||||
n_iterations = math.ceil(len(cropped_lines) / self.b_s)
|
||||
|
||||
for i in range(n_iterations):
|
||||
if i==(n_iterations-1):
|
||||
n_start = i*self.b_s
|
||||
imgs = cropped_lines[n_start:]
|
||||
imgs = np.array(imgs)
|
||||
imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
|
||||
|
||||
ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
|
||||
indices_ver = np.where(ver_imgs == 1)[0]
|
||||
|
||||
#print(indices_ver, 'indices_ver')
|
||||
if len(indices_ver)>0:
|
||||
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
|
||||
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
|
||||
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||
|
||||
else:
|
||||
imgs_ver_flipped = None
|
||||
|
||||
if dir_in_bin is not None:
|
||||
imgs_bin = cropped_lines_bin[n_start:]
|
||||
imgs_bin = np.array(imgs_bin)
|
||||
imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
|
||||
|
||||
if len(indices_ver)>0:
|
||||
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
|
||||
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
|
||||
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||
|
||||
else:
|
||||
imgs_bin_ver_flipped = None
|
||||
else:
|
||||
n_start = i*self.b_s
|
||||
n_end = (i+1)*self.b_s
|
||||
imgs = cropped_lines[n_start:n_end]
|
||||
imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
|
||||
|
||||
indexer_text_region = indexer_text_region +1
|
||||
|
||||
extracted_texts = []
|
||||
extracted_conf_value = []
|
||||
|
||||
n_iterations = math.ceil(len(cropped_lines) / self.b_s)
|
||||
|
||||
for i in range(n_iterations):
|
||||
if i==(n_iterations-1):
|
||||
n_start = i*self.b_s
|
||||
imgs = cropped_lines[n_start:]
|
||||
imgs = np.array(imgs)
|
||||
imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
|
||||
|
||||
ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
|
||||
indices_ver = np.where(ver_imgs == 1)[0]
|
||||
|
||||
#print(indices_ver, 'indices_ver')
|
||||
if len(indices_ver)>0:
|
||||
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
|
||||
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
|
||||
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||
|
||||
ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
|
||||
indices_ver = np.where(ver_imgs == 1)[0]
|
||||
#print(indices_ver, 'indices_ver')
|
||||
else:
|
||||
imgs_ver_flipped = None
|
||||
|
||||
if dir_in_bin is not None:
|
||||
imgs_bin = cropped_lines_bin[n_start:]
|
||||
imgs_bin = np.array(imgs_bin)
|
||||
imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
|
||||
|
||||
if len(indices_ver)>0:
|
||||
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
|
||||
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
|
||||
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
|
||||
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
|
||||
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||
|
||||
else:
|
||||
imgs_ver_flipped = None
|
||||
|
||||
|
||||
if dir_in_bin is not None:
|
||||
imgs_bin = cropped_lines_bin[n_start:n_end]
|
||||
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
|
||||
|
||||
|
||||
if len(indices_ver)>0:
|
||||
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
|
||||
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
|
||||
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||
else:
|
||||
imgs_bin_ver_flipped = None
|
||||
|
||||
|
||||
self.logger.debug("processing next %d lines", len(imgs))
|
||||
preds = self.model_zoo.get('ocr').predict(imgs, verbose=0)
|
||||
imgs_bin_ver_flipped = None
|
||||
else:
|
||||
n_start = i*self.b_s
|
||||
n_end = (i+1)*self.b_s
|
||||
imgs = cropped_lines[n_start:n_end]
|
||||
imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
|
||||
|
||||
ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
|
||||
indices_ver = np.where(ver_imgs == 1)[0]
|
||||
#print(indices_ver, 'indices_ver')
|
||||
|
||||
if len(indices_ver)>0:
|
||||
preds_flipped = self.model_zoo.get('ocr').predict(imgs_ver_flipped, verbose=0)
|
||||
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
|
||||
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
|
||||
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||
else:
|
||||
imgs_ver_flipped = None
|
||||
|
||||
|
||||
if dir_in_bin is not None:
|
||||
imgs_bin = cropped_lines_bin[n_start:n_end]
|
||||
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
|
||||
|
||||
|
||||
if len(indices_ver)>0:
|
||||
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
|
||||
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
|
||||
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||
else:
|
||||
imgs_bin_ver_flipped = None
|
||||
|
||||
|
||||
self.logger.debug("processing next %d lines", len(imgs))
|
||||
preds = self.model_zoo.get('ocr').predict(imgs, verbose=0)
|
||||
|
||||
if len(indices_ver)>0:
|
||||
preds_flipped = self.model_zoo.get('ocr').predict(imgs_ver_flipped, verbose=0)
|
||||
preds_max_fliped = np.max(preds_flipped, axis=2 )
|
||||
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
|
||||
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
|
||||
masked_means_flipped = \
|
||||
np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
|
||||
np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
|
||||
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
|
||||
|
||||
preds_max = np.max(preds, axis=2 )
|
||||
preds_max_args = np.argmax(preds, axis=2 )
|
||||
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
|
||||
|
||||
masked_means = \
|
||||
np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
|
||||
np.sum(pred_max_not_unk_mask_bool, axis=1)
|
||||
masked_means[np.isnan(masked_means)] = 0
|
||||
|
||||
masked_means_ver = masked_means[indices_ver]
|
||||
#print(masked_means_ver, 'pred_max_not_unk')
|
||||
|
||||
indices_where_flipped_conf_value_is_higher = \
|
||||
np.where(masked_means_flipped > masked_means_ver)[0]
|
||||
|
||||
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
|
||||
if len(indices_where_flipped_conf_value_is_higher)>0:
|
||||
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
|
||||
preds[indices_to_be_replaced,:,:] = \
|
||||
preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
|
||||
if dir_in_bin is not None:
|
||||
preds_bin = self.model_zoo.get('ocr').predict(imgs_bin, verbose=0)
|
||||
|
||||
if len(indices_ver)>0:
|
||||
preds_flipped = self.model_zoo.get('ocr').predict(imgs_bin_ver_flipped, verbose=0)
|
||||
preds_max_fliped = np.max(preds_flipped, axis=2 )
|
||||
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
|
||||
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
|
||||
|
|
@ -789,212 +774,179 @@ class Eynollah_ocr:
|
|||
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
|
||||
if len(indices_where_flipped_conf_value_is_higher)>0:
|
||||
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
|
||||
preds[indices_to_be_replaced,:,:] = \
|
||||
preds_bin[indices_to_be_replaced,:,:] = \
|
||||
preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
|
||||
if dir_in_bin is not None:
|
||||
preds_bin = self.model_zoo.get('ocr').predict(imgs_bin, verbose=0)
|
||||
|
||||
if len(indices_ver)>0:
|
||||
preds_flipped = self.model_zoo.get('ocr').predict(imgs_bin_ver_flipped, verbose=0)
|
||||
preds_max_fliped = np.max(preds_flipped, axis=2 )
|
||||
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
|
||||
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
|
||||
masked_means_flipped = \
|
||||
np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
|
||||
np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
|
||||
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
|
||||
|
||||
preds_max = np.max(preds, axis=2 )
|
||||
preds_max_args = np.argmax(preds, axis=2 )
|
||||
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
|
||||
|
||||
masked_means = \
|
||||
np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
|
||||
np.sum(pred_max_not_unk_mask_bool, axis=1)
|
||||
masked_means[np.isnan(masked_means)] = 0
|
||||
|
||||
masked_means_ver = masked_means[indices_ver]
|
||||
#print(masked_means_ver, 'pred_max_not_unk')
|
||||
|
||||
indices_where_flipped_conf_value_is_higher = \
|
||||
np.where(masked_means_flipped > masked_means_ver)[0]
|
||||
|
||||
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
|
||||
if len(indices_where_flipped_conf_value_is_higher)>0:
|
||||
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
|
||||
preds_bin[indices_to_be_replaced,:,:] = \
|
||||
preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
|
||||
|
||||
preds = (preds + preds_bin) / 2.
|
||||
|
||||
pred_texts = decode_batch_predictions(preds, self.model_zoo.get('num_to_char'))
|
||||
|
||||
preds_max = np.max(preds, axis=2 )
|
||||
preds_max_args = np.argmax(preds, axis=2 )
|
||||
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
|
||||
masked_means = \
|
||||
np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
|
||||
np.sum(pred_max_not_unk_mask_bool, axis=1)
|
||||
preds = (preds + preds_bin) / 2.
|
||||
|
||||
for ib in range(imgs.shape[0]):
|
||||
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
|
||||
if masked_means[ib] >= self.min_conf_value_of_textline_text:
|
||||
extracted_texts.append(pred_texts_ib)
|
||||
extracted_conf_value.append(masked_means[ib])
|
||||
else:
|
||||
extracted_texts.append("")
|
||||
extracted_conf_value.append(0)
|
||||
del cropped_lines
|
||||
if dir_in_bin is not None:
|
||||
del cropped_lines_bin
|
||||
gc.collect()
|
||||
pred_texts = decode_batch_predictions(preds, self.model_zoo.get('num_to_char'))
|
||||
|
||||
extracted_texts_merged = [extracted_texts[ind]
|
||||
if cropped_lines_meging_indexing[ind]==0
|
||||
else extracted_texts[ind]+" "+extracted_texts[ind+1]
|
||||
if cropped_lines_meging_indexing[ind]==1
|
||||
else None
|
||||
for ind in range(len(cropped_lines_meging_indexing))]
|
||||
|
||||
extracted_conf_value_merged = [extracted_conf_value[ind]
|
||||
if cropped_lines_meging_indexing[ind]==0
|
||||
else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2.
|
||||
if cropped_lines_meging_indexing[ind]==1
|
||||
else None
|
||||
for ind in range(len(cropped_lines_meging_indexing))]
|
||||
preds_max = np.max(preds, axis=2 )
|
||||
preds_max_args = np.argmax(preds, axis=2 )
|
||||
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
|
||||
masked_means = \
|
||||
np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
|
||||
np.sum(pred_max_not_unk_mask_bool, axis=1)
|
||||
|
||||
extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm]
|
||||
for ind_cfm in range(len(extracted_texts_merged))
|
||||
if extracted_texts_merged[ind_cfm] is not None]
|
||||
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
||||
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
||||
|
||||
if dir_out_image_text:
|
||||
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||
font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
|
||||
with importlib_resources.as_file(font) as font:
|
||||
font = ImageFont.truetype(font=font, size=40)
|
||||
|
||||
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
||||
x_bb = bb_ind[0]
|
||||
y_bb = bb_ind[1]
|
||||
w_bb = bb_ind[2]
|
||||
h_bb = bb_ind[3]
|
||||
|
||||
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
|
||||
font.path, w_bb, int(h_bb*0.4) )
|
||||
|
||||
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
||||
|
||||
text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
|
||||
text_width = text_bbox[2] - text_bbox[0]
|
||||
text_height = text_bbox[3] - text_bbox[1]
|
||||
|
||||
text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally
|
||||
text_y = y_bb + (h_bb - text_height) // 2 # Center vertically
|
||||
|
||||
# Draw the text
|
||||
draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
|
||||
image_text.save(out_image_with_text)
|
||||
|
||||
text_by_textregion = []
|
||||
for ind in unique_cropped_lines_region_indexer:
|
||||
ind = np.array(cropped_lines_region_indexer)==ind
|
||||
extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
|
||||
if len(extracted_texts_merged_un)>1:
|
||||
text_by_textregion_ind = ""
|
||||
next_glue = ""
|
||||
for indt in range(len(extracted_texts_merged_un)):
|
||||
if (extracted_texts_merged_un[indt].endswith('⸗') or
|
||||
extracted_texts_merged_un[indt].endswith('-') or
|
||||
extracted_texts_merged_un[indt].endswith('¬')):
|
||||
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
|
||||
next_glue = ""
|
||||
else:
|
||||
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
|
||||
next_glue = " "
|
||||
text_by_textregion.append(text_by_textregion_ind)
|
||||
for ib in range(imgs.shape[0]):
|
||||
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
|
||||
if masked_means[ib] >= self.min_conf_value_of_textline_text:
|
||||
extracted_texts.append(pred_texts_ib)
|
||||
extracted_conf_value.append(masked_means[ib])
|
||||
else:
|
||||
text_by_textregion.append(" ".join(extracted_texts_merged_un))
|
||||
#print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
|
||||
extracted_texts.append("")
|
||||
extracted_conf_value.append(0)
|
||||
del cropped_lines
|
||||
if dir_in_bin is not None:
|
||||
del cropped_lines_bin
|
||||
gc.collect()
|
||||
|
||||
extracted_texts_merged = [extracted_texts[ind]
|
||||
if cropped_lines_meging_indexing[ind]==0
|
||||
else extracted_texts[ind]+" "+extracted_texts[ind+1]
|
||||
if cropped_lines_meging_indexing[ind]==1
|
||||
else None
|
||||
for ind in range(len(cropped_lines_meging_indexing))]
|
||||
|
||||
extracted_conf_value_merged = [extracted_conf_value[ind]
|
||||
if cropped_lines_meging_indexing[ind]==0
|
||||
else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2.
|
||||
if cropped_lines_meging_indexing[ind]==1
|
||||
else None
|
||||
for ind in range(len(cropped_lines_meging_indexing))]
|
||||
|
||||
###index_tot_regions = []
|
||||
###tot_region_ref = []
|
||||
extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm]
|
||||
for ind_cfm in range(len(extracted_texts_merged))
|
||||
if extracted_texts_merged[ind_cfm] is not None]
|
||||
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
||||
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
||||
|
||||
if dir_out_image_text:
|
||||
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||
font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
|
||||
with importlib_resources.as_file(font) as font:
|
||||
font = ImageFont.truetype(font=font, size=40)
|
||||
|
||||
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
||||
x_bb = bb_ind[0]
|
||||
y_bb = bb_ind[1]
|
||||
w_bb = bb_ind[2]
|
||||
h_bb = bb_ind[3]
|
||||
|
||||
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
|
||||
font.path, w_bb, int(h_bb*0.4) )
|
||||
|
||||
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
||||
|
||||
text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
|
||||
text_width = text_bbox[2] - text_bbox[0]
|
||||
text_height = text_bbox[3] - text_bbox[1]
|
||||
|
||||
###for jj in root1.iter(link+'RegionRefIndexed'):
|
||||
###index_tot_regions.append(jj.attrib['index'])
|
||||
###tot_region_ref.append(jj.attrib['regionRef'])
|
||||
|
||||
###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
|
||||
|
||||
#id_textregions = []
|
||||
#textregions_by_existing_ids = []
|
||||
indexer = 0
|
||||
indexer_textregion = 0
|
||||
for nn in root1.iter(region_tags):
|
||||
#id_textregion = nn.attrib['id']
|
||||
#id_textregions.append(id_textregion)
|
||||
#textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
|
||||
|
||||
is_textregion_text = False
|
||||
for childtest in nn:
|
||||
if childtest.tag.endswith("TextEquiv"):
|
||||
is_textregion_text = True
|
||||
|
||||
if not is_textregion_text:
|
||||
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
|
||||
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
|
||||
text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally
|
||||
text_y = y_bb + (h_bb - text_height) // 2 # Center vertically
|
||||
|
||||
|
||||
has_textline = False
|
||||
for child_textregion in nn:
|
||||
if child_textregion.tag.endswith("TextLine"):
|
||||
|
||||
is_textline_text = False
|
||||
for childtest2 in child_textregion:
|
||||
if childtest2.tag.endswith("TextEquiv"):
|
||||
is_textline_text = True
|
||||
|
||||
|
||||
if not is_textline_text:
|
||||
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
|
||||
text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
|
||||
unicode_textline = ET.SubElement(text_subelement, 'Unicode')
|
||||
unicode_textline.text = extracted_texts_merged[indexer]
|
||||
else:
|
||||
for childtest3 in child_textregion:
|
||||
if childtest3.tag.endswith("TextEquiv"):
|
||||
for child_uc in childtest3:
|
||||
if child_uc.tag.endswith("Unicode"):
|
||||
childtest3.set('conf',
|
||||
f"{extracted_conf_value_merged[indexer]:.2f}")
|
||||
child_uc.text = extracted_texts_merged[indexer]
|
||||
|
||||
indexer = indexer + 1
|
||||
has_textline = True
|
||||
if has_textline:
|
||||
if is_textregion_text:
|
||||
for child4 in nn:
|
||||
if child4.tag.endswith("TextEquiv"):
|
||||
for childtr_uc in child4:
|
||||
if childtr_uc.tag.endswith("Unicode"):
|
||||
childtr_uc.text = text_by_textregion[indexer_textregion]
|
||||
# Draw the text
|
||||
draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
|
||||
image_text.save(out_image_with_text)
|
||||
|
||||
text_by_textregion = []
|
||||
for ind in unique_cropped_lines_region_indexer:
|
||||
ind = np.array(cropped_lines_region_indexer)==ind
|
||||
extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
|
||||
if len(extracted_texts_merged_un)>1:
|
||||
text_by_textregion_ind = ""
|
||||
next_glue = ""
|
||||
for indt in range(len(extracted_texts_merged_un)):
|
||||
if (extracted_texts_merged_un[indt].endswith('⸗') or
|
||||
extracted_texts_merged_un[indt].endswith('-') or
|
||||
extracted_texts_merged_un[indt].endswith('¬')):
|
||||
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
|
||||
next_glue = ""
|
||||
else:
|
||||
unicode_textregion.text = text_by_textregion[indexer_textregion]
|
||||
indexer_textregion = indexer_textregion + 1
|
||||
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
|
||||
next_glue = " "
|
||||
text_by_textregion.append(text_by_textregion_ind)
|
||||
else:
|
||||
text_by_textregion.append(" ".join(extracted_texts_merged_un))
|
||||
#print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
|
||||
|
||||
###index_tot_regions = []
|
||||
###tot_region_ref = []
|
||||
|
||||
###for jj in root1.iter(link+'RegionRefIndexed'):
|
||||
###index_tot_regions.append(jj.attrib['index'])
|
||||
###tot_region_ref.append(jj.attrib['regionRef'])
|
||||
|
||||
###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
|
||||
|
||||
#id_textregions = []
|
||||
#textregions_by_existing_ids = []
|
||||
indexer = 0
|
||||
indexer_textregion = 0
|
||||
for nn in root1.iter(region_tags):
|
||||
#id_textregion = nn.attrib['id']
|
||||
#id_textregions.append(id_textregion)
|
||||
#textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
|
||||
|
||||
is_textregion_text = False
|
||||
for childtest in nn:
|
||||
if childtest.tag.endswith("TextEquiv"):
|
||||
is_textregion_text = True
|
||||
|
||||
if not is_textregion_text:
|
||||
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
|
||||
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
|
||||
|
||||
|
||||
has_textline = False
|
||||
for child_textregion in nn:
|
||||
if child_textregion.tag.endswith("TextLine"):
|
||||
|
||||
###sample_order = [(id_to_order[tid], text)
|
||||
### for tid, text in zip(id_textregions, textregions_by_existing_ids)
|
||||
### if tid in id_to_order]
|
||||
|
||||
##ordered_texts_sample = [text for _, text in sorted(sample_order)]
|
||||
##tot_page_text = ' '.join(ordered_texts_sample)
|
||||
|
||||
##for page_element in root1.iter(link+'Page'):
|
||||
##text_page = ET.SubElement(page_element, 'TextEquiv')
|
||||
##unicode_textpage = ET.SubElement(text_page, 'Unicode')
|
||||
##unicode_textpage.text = tot_page_text
|
||||
|
||||
ET.register_namespace("",name_space)
|
||||
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
|
||||
#print("Job done in %.1fs", time.time() - t0)
|
||||
is_textline_text = False
|
||||
for childtest2 in child_textregion:
|
||||
if childtest2.tag.endswith("TextEquiv"):
|
||||
is_textline_text = True
|
||||
|
||||
|
||||
if not is_textline_text:
|
||||
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
|
||||
text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
|
||||
unicode_textline = ET.SubElement(text_subelement, 'Unicode')
|
||||
unicode_textline.text = extracted_texts_merged[indexer]
|
||||
else:
|
||||
for childtest3 in child_textregion:
|
||||
if childtest3.tag.endswith("TextEquiv"):
|
||||
for child_uc in childtest3:
|
||||
if child_uc.tag.endswith("Unicode"):
|
||||
childtest3.set('conf',
|
||||
f"{extracted_conf_value_merged[indexer]:.2f}")
|
||||
child_uc.text = extracted_texts_merged[indexer]
|
||||
|
||||
indexer = indexer + 1
|
||||
has_textline = True
|
||||
if has_textline:
|
||||
if is_textregion_text:
|
||||
for child4 in nn:
|
||||
if child4.tag.endswith("TextEquiv"):
|
||||
for childtr_uc in child4:
|
||||
if childtr_uc.tag.endswith("Unicode"):
|
||||
childtr_uc.text = text_by_textregion[indexer_textregion]
|
||||
else:
|
||||
unicode_textregion.text = text_by_textregion[indexer_textregion]
|
||||
indexer_textregion = indexer_textregion + 1
|
||||
|
||||
###sample_order = [(id_to_order[tid], text)
|
||||
### for tid, text in zip(id_textregions, textregions_by_existing_ids)
|
||||
### if tid in id_to_order]
|
||||
|
||||
##ordered_texts_sample = [text for _, text in sorted(sample_order)]
|
||||
##tot_page_text = ' '.join(ordered_texts_sample)
|
||||
|
||||
##for page_element in root1.iter(link+'Page'):
|
||||
##text_page = ET.SubElement(page_element, 'TextEquiv')
|
||||
##unicode_textpage = ET.SubElement(text_page, 'Unicode')
|
||||
##unicode_textpage.text = tot_page_text
|
||||
|
||||
ET.register_namespace("",name_space)
|
||||
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
|
||||
#print("Job done in %.1fs", time.time() - t0)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from .build_model_load_pretrained_weights_and_save import build_model_load_pretr
|
|||
from .generate_gt_for_training import main as generate_gt_cli
|
||||
from .inference import main as inference_cli
|
||||
from .train import ex
|
||||
from .extract_line_gt import linegt_cli
|
||||
|
||||
@click.command(context_settings=dict(
|
||||
ignore_unknown_options=True,
|
||||
|
|
@ -24,3 +25,4 @@ main.add_command(build_model_load_pretrained_weights_and_save)
|
|||
main.add_command(generate_gt_cli, 'generate-gt')
|
||||
main.add_command(inference_cli, 'inference')
|
||||
main.add_command(train_cli, 'train')
|
||||
main.add_command(linegt_cli, 'export_textline_images_and_text')
|
||||
|
|
|
|||
136
src/eynollah/training/extract_line_gt.py
Normal file
136
src/eynollah/training/extract_line_gt.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
from logging import Logger, getLogger
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
import click
|
||||
import cv2
|
||||
import xml.etree.ElementTree as ET
|
||||
import numpy as np
|
||||
|
||||
from ..utils import is_image_filename
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
"--image",
|
||||
"-i",
|
||||
help="input image filename",
|
||||
type=click.Path(exists=True, dir_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
'image_filename',
|
||||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_xmls",
|
||||
"-dx",
|
||||
help="directory of input PAGE-XML files (in addition to --dir_in; filename stems must match the image files, with '.xml' suffix).",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--out",
|
||||
"-o",
|
||||
'dir_out',
|
||||
help="directory for output PAGE-XML files",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--dataset_abbrevation",
|
||||
"-ds_pref",
|
||||
'pref_of_dataset',
|
||||
help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
|
||||
)
|
||||
@click.option(
|
||||
"--do_not_mask_with_textline_contour",
|
||||
"-nmtc/-mtc",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
||||
)
|
||||
def linegt_cli(
|
||||
image_filename,
|
||||
dir_in,
|
||||
dir_xmls,
|
||||
dir_out,
|
||||
pref_of_dataset,
|
||||
do_not_mask_with_textline_contour,
|
||||
):
|
||||
assert bool(dir_in) ^ bool(image_filename), "Set --dir-in or --image-filename, not both"
|
||||
if dir_in:
|
||||
ls_imgs = [
|
||||
os.path.join(dir_in, image_filename) for image_filename in filter(is_image_filename, os.listdir(dir_in))
|
||||
]
|
||||
else:
|
||||
assert image_filename
|
||||
ls_imgs = [image_filename]
|
||||
|
||||
for dir_img in ls_imgs:
|
||||
file_name = Path(dir_img).stem
|
||||
dir_xml = os.path.join(dir_xmls, file_name + '.xml')
|
||||
|
||||
img = cv2.imread(dir_img)
|
||||
|
||||
total_bb_coordinates = []
|
||||
|
||||
tree1 = ET.parse(dir_xml, parser=ET.XMLParser(encoding="utf-8"))
|
||||
root1 = tree1.getroot()
|
||||
alltags = [elem.tag for elem in root1.iter()]
|
||||
|
||||
name_space = alltags[0].split('}')[0]
|
||||
name_space = name_space.split('{')[1]
|
||||
|
||||
region_tags = [x for x in alltags if x.endswith('TextRegion')][0]
|
||||
|
||||
cropped_lines_region_indexer = []
|
||||
|
||||
indexer_text_region = 0
|
||||
indexer_textlines = 0
|
||||
# FIXME: non recursive, use OCR-D PAGE generateDS API. Or use an existing tool for this purpose altogether
|
||||
for nn in root1.iter(region_tags):
|
||||
for child_textregion in nn:
|
||||
if child_textregion.tag.endswith("TextLine"):
|
||||
for child_textlines in child_textregion:
|
||||
if child_textlines.tag.endswith("Coords"):
|
||||
cropped_lines_region_indexer.append(indexer_text_region)
|
||||
p_h = child_textlines.attrib['points'].split(' ')
|
||||
textline_coords = np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])
|
||||
|
||||
x, y, w, h = cv2.boundingRect(textline_coords)
|
||||
|
||||
total_bb_coordinates.append([x, y, w, h])
|
||||
|
||||
img_poly_on_img = np.copy(img)
|
||||
|
||||
mask_poly = np.zeros(img.shape)
|
||||
mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1))
|
||||
|
||||
mask_poly = mask_poly[y : y + h, x : x + w, :]
|
||||
img_crop = img_poly_on_img[y : y + h, x : x + w, :]
|
||||
|
||||
if not do_not_mask_with_textline_contour:
|
||||
img_crop[mask_poly == 0] = 255
|
||||
|
||||
if img_crop.shape[0] == 0 or img_crop.shape[1] == 0:
|
||||
continue
|
||||
|
||||
if child_textlines.tag.endswith("TextEquiv"):
|
||||
for cheild_text in child_textlines:
|
||||
if cheild_text.tag.endswith("Unicode"):
|
||||
textline_text = cheild_text.text
|
||||
if textline_text:
|
||||
base_name = os.path.join(
|
||||
dir_out, file_name + '_line_' + str(indexer_textlines)
|
||||
)
|
||||
if pref_of_dataset:
|
||||
base_name += '_' + pref_of_dataset
|
||||
if not do_not_mask_with_textline_contour:
|
||||
base_name += '_masked'
|
||||
|
||||
with open(base_name + '.txt', 'w') as text_file:
|
||||
text_file.write(textline_text)
|
||||
cv2.imwrite(base_name + '.png', img_crop)
|
||||
indexer_textlines += 1
|
||||
Loading…
Add table
Add a link
Reference in a new issue