mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-08-29 11:59:55 +02:00
let to add dataset abbrevation to extracted textline images and text
This commit is contained in:
parent
8c8fa461bb
commit
5d447abcc4
2 changed files with 71 additions and 37 deletions
|
@ -342,7 +342,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
"-m",
|
"-m",
|
||||||
help="directory of models",
|
help="directory of models",
|
||||||
type=click.Path(exists=True, file_okay=False),
|
type=click.Path(exists=True, file_okay=False),
|
||||||
required=True,
|
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--tr_ocr",
|
"--tr_ocr",
|
||||||
|
@ -379,6 +378,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
"-bs",
|
"-bs",
|
||||||
help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
|
help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--dataset_abbrevation",
|
||||||
|
"-ds_pref",
|
||||||
|
help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--log_level",
|
"--log_level",
|
||||||
"-l",
|
"-l",
|
||||||
|
@ -386,10 +390,18 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
||||||
help="Override log level globally to this",
|
help="Override log level globally to this",
|
||||||
)
|
)
|
||||||
|
|
||||||
def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, log_level):
|
def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
|
||||||
initLogging()
|
initLogging()
|
||||||
if log_level:
|
if log_level:
|
||||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||||
|
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
|
||||||
|
assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
|
||||||
|
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
|
||||||
|
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
|
||||||
|
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
|
||||||
|
assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text -etit can not be set alongside draw text on image -dtoi"
|
||||||
|
assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text -etit can not be set alongside prediction with both rgb and bin -brb"
|
||||||
|
|
||||||
eynollah_ocr = Eynollah_ocr(
|
eynollah_ocr = Eynollah_ocr(
|
||||||
dir_xmls=dir_xmls,
|
dir_xmls=dir_xmls,
|
||||||
dir_out_image_text=dir_out_image_text,
|
dir_out_image_text=dir_out_image_text,
|
||||||
|
@ -403,6 +415,7 @@ def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, ex
|
||||||
draw_texts_on_image=draw_texts_on_image,
|
draw_texts_on_image=draw_texts_on_image,
|
||||||
prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
|
prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
|
pref_of_dataset=dataset_abbrevation,
|
||||||
)
|
)
|
||||||
eynollah_ocr.run()
|
eynollah_ocr.run()
|
||||||
|
|
||||||
|
|
|
@ -4877,6 +4877,7 @@ class Eynollah_ocr:
|
||||||
do_not_mask_with_textline_contour=False,
|
do_not_mask_with_textline_contour=False,
|
||||||
draw_texts_on_image=False,
|
draw_texts_on_image=False,
|
||||||
prediction_with_both_of_rgb_and_bin=False,
|
prediction_with_both_of_rgb_and_bin=False,
|
||||||
|
pref_of_dataset = None,
|
||||||
logger=None,
|
logger=None,
|
||||||
):
|
):
|
||||||
self.dir_in = dir_in
|
self.dir_in = dir_in
|
||||||
|
@ -4890,43 +4891,45 @@ class Eynollah_ocr:
|
||||||
self.draw_texts_on_image = draw_texts_on_image
|
self.draw_texts_on_image = draw_texts_on_image
|
||||||
self.dir_out_image_text = dir_out_image_text
|
self.dir_out_image_text = dir_out_image_text
|
||||||
self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
|
self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
|
||||||
if tr_ocr:
|
self.pref_of_dataset = pref_of_dataset
|
||||||
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
if not export_textline_images_and_text:
|
||||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
if tr_ocr:
|
||||||
self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
|
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
||||||
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
self.model_ocr.to(self.device)
|
self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
|
||||||
if not batch_size:
|
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
||||||
self.b_s = 2
|
self.model_ocr.to(self.device)
|
||||||
|
if not batch_size:
|
||||||
|
self.b_s = 2
|
||||||
|
else:
|
||||||
|
self.b_s = int(batch_size)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.b_s = int(batch_size)
|
self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
|
||||||
|
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
||||||
|
|
||||||
else:
|
self.prediction_model = tf.keras.models.Model(
|
||||||
self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
|
model_ocr.get_layer(name = "image").input,
|
||||||
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
model_ocr.get_layer(name = "dense2").output)
|
||||||
|
if not batch_size:
|
||||||
self.prediction_model = tf.keras.models.Model(
|
self.b_s = 8
|
||||||
model_ocr.get_layer(name = "image").input,
|
else:
|
||||||
model_ocr.get_layer(name = "dense2").output)
|
self.b_s = int(batch_size)
|
||||||
if not batch_size:
|
|
||||||
self.b_s = 8
|
|
||||||
else:
|
|
||||||
self.b_s = int(batch_size)
|
|
||||||
|
|
||||||
|
|
||||||
with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
|
with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
|
||||||
characters = json.load(config_file)
|
characters = json.load(config_file)
|
||||||
|
|
||||||
|
|
||||||
AUTOTUNE = tf.data.AUTOTUNE
|
AUTOTUNE = tf.data.AUTOTUNE
|
||||||
|
|
||||||
# Mapping characters to integers.
|
# Mapping characters to integers.
|
||||||
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
|
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
|
||||||
|
|
||||||
# Mapping integers back to original characters.
|
# Mapping integers back to original characters.
|
||||||
self.num_to_char = StringLookup(
|
self.num_to_char = StringLookup(
|
||||||
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
|
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def decode_batch_predictions(self, pred, max_len = 128):
|
def decode_batch_predictions(self, pred, max_len = 128):
|
||||||
|
@ -5365,10 +5368,28 @@ class Eynollah_ocr:
|
||||||
if cheild_text.tag.endswith("Unicode"):
|
if cheild_text.tag.endswith("Unicode"):
|
||||||
textline_text = cheild_text.text
|
textline_text = cheild_text.text
|
||||||
if textline_text:
|
if textline_text:
|
||||||
with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file:
|
if self.do_not_mask_with_textline_contour:
|
||||||
text_file.write(textline_text)
|
if self.pref_of_dataset:
|
||||||
|
with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.txt'), 'w') as text_file:
|
||||||
|
text_file.write(textline_text)
|
||||||
|
|
||||||
cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop )
|
cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.png'), img_crop )
|
||||||
|
else:
|
||||||
|
with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file:
|
||||||
|
text_file.write(textline_text)
|
||||||
|
|
||||||
|
cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop )
|
||||||
|
else:
|
||||||
|
if self.pref_of_dataset:
|
||||||
|
with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.txt'), 'w') as text_file:
|
||||||
|
text_file.write(textline_text)
|
||||||
|
|
||||||
|
cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.png'), img_crop )
|
||||||
|
else:
|
||||||
|
with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.txt'), 'w') as text_file:
|
||||||
|
text_file.write(textline_text)
|
||||||
|
|
||||||
|
cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.png'), img_crop )
|
||||||
|
|
||||||
indexer_textlines+=1
|
indexer_textlines+=1
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue