mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-27 15:54:13 +01:00
Merge branch 'main' into loky-with-shm-for-175-rebuilt
This commit is contained in:
commit
13f85b0d5c
26 changed files with 8138 additions and 694 deletions
BIN
src/eynollah/Charis-Regular.ttf
Normal file
BIN
src/eynollah/Charis-Regular.ttf
Normal file
Binary file not shown.
|
|
@ -1,8 +1,11 @@
|
|||
import sys
|
||||
import click
|
||||
import logging
|
||||
from ocrd_utils import initLogging, getLevelName, getLogger
|
||||
from eynollah.eynollah import Eynollah, Eynollah_ocr
|
||||
from eynollah.sbb_binarize import SbbBinarizer
|
||||
from eynollah.image_enhancer import Enhancer
|
||||
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
|
|
@ -10,79 +13,98 @@ def main():
|
|||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--dir_xml",
|
||||
"-dx",
|
||||
help="directory of GT page-xml files",
|
||||
"--input",
|
||||
"-i",
|
||||
help="PAGE-XML input filename",
|
||||
type=click.Path(exists=True, dir_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
help="directory of PAGE-XML input files (instead of --input)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_out_modal_image",
|
||||
"-domi",
|
||||
help="directory where ground truth images would be written",
|
||||
"--out",
|
||||
"-o",
|
||||
help="directory for output images",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--dir_out_classes",
|
||||
"-docl",
|
||||
help="directory where ground truth classes would be written",
|
||||
"--model",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--input_height",
|
||||
"-ih",
|
||||
help="input height",
|
||||
"--log_level",
|
||||
"-l",
|
||||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
)
|
||||
@click.option(
|
||||
"--input_width",
|
||||
"-iw",
|
||||
help="input width",
|
||||
)
|
||||
@click.option(
|
||||
"--min_area_size",
|
||||
"-min",
|
||||
help="min area size of regions considered for reading order training.",
|
||||
)
|
||||
def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size):
|
||||
xml_files_ind = os.listdir(dir_xml)
|
||||
|
||||
def machine_based_reading_order(input, dir_in, out, model, log_level):
|
||||
assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
orderer = machine_based_reading_order_on_layout(model)
|
||||
if log_level:
|
||||
orderer.logger.setLevel(getLevelName(log_level))
|
||||
|
||||
orderer.run(xml_filename=input,
|
||||
dir_in=dir_in,
|
||||
dir_out=out,
|
||||
)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.')
|
||||
@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction')
|
||||
@click.argument('input_image', required=False)
|
||||
@click.argument('output_image', required=False)
|
||||
@click.option(
|
||||
"--input-image", "--image",
|
||||
"-i",
|
||||
help="input image filename",
|
||||
type=click.Path(exists=True, dir_okay=False)
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
help="directory of input images",
|
||||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_out",
|
||||
"-do",
|
||||
help="directory for output images",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
"--output",
|
||||
"-o",
|
||||
help="output image (if using -i) or output image directory (if using -di)",
|
||||
type=click.Path(file_okay=True, dir_okay=True),
|
||||
required=True,
|
||||
)
|
||||
def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out):
|
||||
assert (dir_out is None) == (dir_in is None), "Options -di and -do are mutually dependent"
|
||||
assert (input_image is None) == (output_image is None), "INPUT_IMAGE and OUTPUT_IMAGE are mutually dependent"
|
||||
assert (dir_in is None) != (input_image is None), "Specify either -di and -do options, or INPUT_IMAGE and OUTPUT_IMAGE"
|
||||
SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out)
|
||||
|
||||
|
||||
@click.option(
|
||||
"--log_level",
|
||||
"-l",
|
||||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
)
|
||||
def binarization(patches, model_dir, input_image, dir_in, output, log_level):
|
||||
assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
binarizer = SbbBinarizer(model_dir)
|
||||
if log_level:
|
||||
binarizer.log.setLevel(getLevelName(log_level))
|
||||
binarizer.run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--image",
|
||||
"-i",
|
||||
help="image filename",
|
||||
help="input image filename",
|
||||
type=click.Path(exists=True, dir_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--out",
|
||||
"-o",
|
||||
help="directory to write output xml data",
|
||||
help="directory for output PAGE-XML files",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
|
|
@ -95,7 +117,82 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
|||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
help="directory of images",
|
||||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--num_col_upper",
|
||||
"-ncu",
|
||||
help="lower limit of columns in document image",
|
||||
)
|
||||
@click.option(
|
||||
"--num_col_lower",
|
||||
"-ncl",
|
||||
help="upper limit of columns in document image",
|
||||
)
|
||||
@click.option(
|
||||
"--save_org_scale/--no_save_org_scale",
|
||||
"-sos/-nosos",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will save the enhanced image in org scale.",
|
||||
)
|
||||
@click.option(
|
||||
"--log_level",
|
||||
"-l",
|
||||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
)
|
||||
|
||||
def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level):
|
||||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
initLogging()
|
||||
enhancer = Enhancer(
|
||||
model,
|
||||
num_col_upper=num_col_upper,
|
||||
num_col_lower=num_col_lower,
|
||||
save_org_scale=save_org_scale,
|
||||
)
|
||||
if log_level:
|
||||
enhancer.logger.setLevel(getLevelName(log_level))
|
||||
enhancer.run(overwrite=overwrite,
|
||||
dir_in=dir_in,
|
||||
image_filename=image,
|
||||
dir_out=out,
|
||||
)
|
||||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--image",
|
||||
"-i",
|
||||
help="input image filename",
|
||||
type=click.Path(exists=True, dir_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--out",
|
||||
"-o",
|
||||
help="directory for output PAGE-XML files",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--overwrite",
|
||||
"-O",
|
||||
help="overwrite (instead of skipping) if output xml exists",
|
||||
is_flag=True,
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
|
|
@ -225,6 +322,17 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
|||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will try to do ocr",
|
||||
)
|
||||
@click.option(
|
||||
"--transformer_ocr",
|
||||
"-tr/-notr",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will apply transformer ocr",
|
||||
)
|
||||
@click.option(
|
||||
"--batch_size_ocr",
|
||||
"-bs_ocr",
|
||||
help="number of inference batch size of ocr model. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
|
||||
)
|
||||
@click.option(
|
||||
"--num_col_upper",
|
||||
"-ncu",
|
||||
|
|
@ -235,23 +343,46 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
|||
"-ncl",
|
||||
help="upper limit of columns in document image",
|
||||
)
|
||||
@click.option(
|
||||
"--threshold_art_class_layout",
|
||||
"-tharl",
|
||||
help="threshold of artifical class in the case of layout detection. The default value is 0.1",
|
||||
)
|
||||
@click.option(
|
||||
"--threshold_art_class_textline",
|
||||
"-thart",
|
||||
help="threshold of artifical class in the case of textline detection. The default value is 0.1",
|
||||
)
|
||||
@click.option(
|
||||
"--skip_layout_and_reading_order",
|
||||
"-slro/-noslro",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.",
|
||||
)
|
||||
# TODO move to top-level CLI context
|
||||
@click.option(
|
||||
"--log_level",
|
||||
"-l",
|
||||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
help="Override 'eynollah' log level globally to this",
|
||||
)
|
||||
#
|
||||
@click.option(
|
||||
"--setup-logging",
|
||||
is_flag=True,
|
||||
help="Setup a basic console logger",
|
||||
)
|
||||
|
||||
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
|
||||
initLogging()
|
||||
if log_level:
|
||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging):
|
||||
if setup_logging:
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
console_handler.setFormatter(formatter)
|
||||
getLogger('eynollah').addHandler(console_handler)
|
||||
getLogger('eynollah').setLevel(logging.INFO)
|
||||
else:
|
||||
initLogging()
|
||||
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
|
||||
assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep"
|
||||
assert enable_plotting or not save_all, "Plotting with -sa also requires -ep"
|
||||
|
|
@ -270,17 +401,10 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
assert not extract_only_images or not tables, "Image extraction -eoi can not be set alongside tables -tab"
|
||||
assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l"
|
||||
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
|
||||
assert image or dir_in, "Either a single image -i or a dir_in -di is required"
|
||||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
eynollah = Eynollah(
|
||||
model,
|
||||
logger=getLogger('eynollah'),
|
||||
dir_out=out,
|
||||
dir_of_cropped_images=save_images,
|
||||
extract_only_images=extract_only_images,
|
||||
dir_of_layout=save_layout,
|
||||
dir_of_deskewed=save_deskewed,
|
||||
dir_of_all=save_all,
|
||||
dir_save_page=save_page,
|
||||
enable_plotting=enable_plotting,
|
||||
allow_enhancement=allow_enhancement,
|
||||
curved_line=curved_line,
|
||||
|
|
@ -295,54 +419,82 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
ignore_page_extraction=ignore_page_extraction,
|
||||
reading_order_machine_based=reading_order_machine_based,
|
||||
do_ocr=do_ocr,
|
||||
transformer_ocr=transformer_ocr,
|
||||
batch_size_ocr=batch_size_ocr,
|
||||
num_col_upper=num_col_upper,
|
||||
num_col_lower=num_col_lower,
|
||||
skip_layout_and_reading_order=skip_layout_and_reading_order,
|
||||
threshold_art_class_textline=threshold_art_class_textline,
|
||||
threshold_art_class_layout=threshold_art_class_layout,
|
||||
)
|
||||
if log_level:
|
||||
eynollah.logger.setLevel(getLevelName(log_level))
|
||||
eynollah.run(overwrite=overwrite,
|
||||
image_filename=image,
|
||||
dir_in=dir_in,
|
||||
dir_out=out,
|
||||
dir_of_cropped_images=save_images,
|
||||
dir_of_layout=save_layout,
|
||||
dir_of_deskewed=save_deskewed,
|
||||
dir_of_all=save_all,
|
||||
dir_save_page=save_page,
|
||||
)
|
||||
if dir_in:
|
||||
eynollah.run(dir_in=dir_in, overwrite=overwrite)
|
||||
else:
|
||||
eynollah.run(image_filename=image, overwrite=overwrite)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--image",
|
||||
"-i",
|
||||
help="input image filename",
|
||||
type=click.Path(exists=True, dir_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
help="directory of images",
|
||||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in_bin",
|
||||
"-dib",
|
||||
help="directory of binarized images. This should be given if you want to do prediction based on both rgb and bin images. And all bin images are png files",
|
||||
help="directory of binarized images (in addition to --dir_in for RGB images; filename stems must match the RGB image files, with '.png' suffix).\nPerform prediction using both RGB and binary images. (This does not necessarily improve results, however it may be beneficial for certain document images.)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--out",
|
||||
"-o",
|
||||
help="directory to write output xml data",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--dir_xmls",
|
||||
"-dx",
|
||||
help="directory of xmls",
|
||||
help="directory of input PAGE-XML files (in addition to --dir_in; filename stems must match the image files, with '.xml' suffix).",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--out",
|
||||
"-o",
|
||||
help="directory for output PAGE-XML files",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--dir_out_image_text",
|
||||
"-doit",
|
||||
help="directory of images with predicted text",
|
||||
help="directory for output images, newly rendered with predicted text",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--overwrite",
|
||||
"-O",
|
||||
help="overwrite (instead of skipping) if output xml exists",
|
||||
is_flag=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--model_name",
|
||||
help="Specific model file path to use for OCR",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--tr_ocr",
|
||||
|
|
@ -363,16 +515,19 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
||||
)
|
||||
@click.option(
|
||||
"--draw_texts_on_image",
|
||||
"-dtoi/-ndtoi",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, the predicted texts will be displayed on an image.",
|
||||
"--batch_size",
|
||||
"-bs",
|
||||
help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
|
||||
)
|
||||
@click.option(
|
||||
"--prediction_with_both_of_rgb_and_bin",
|
||||
"-brb/-nbrb",
|
||||
is_flag=True,
|
||||
help="If this parameter is set to True, the prediction will be performed using both RGB and binary images. However, this does not necessarily improve results; it may be beneficial for certain document images.",
|
||||
"--dataset_abbrevation",
|
||||
"-ds_pref",
|
||||
help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
|
||||
)
|
||||
@click.option(
|
||||
"--min_conf_value_of_textline_text",
|
||||
"-min_conf",
|
||||
help="minimum OCR confidence value. Text lines with a confidence value lower than this threshold will not be included in the output XML file.",
|
||||
)
|
||||
@click.option(
|
||||
"--log_level",
|
||||
|
|
@ -381,24 +536,36 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
help="Override log level globally to this",
|
||||
)
|
||||
|
||||
def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, log_level):
|
||||
def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
|
||||
initLogging()
|
||||
if log_level:
|
||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||
|
||||
assert bool(model) != bool(model_name), "Either -m (model directory) or --model_name (specific model name) must be provided."
|
||||
assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr"
|
||||
assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m"
|
||||
assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs"
|
||||
assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib"
|
||||
assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit"
|
||||
assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
|
||||
eynollah_ocr = Eynollah_ocr(
|
||||
dir_xmls=dir_xmls,
|
||||
dir_out_image_text=dir_out_image_text,
|
||||
dir_in=dir_in,
|
||||
dir_in_bin=dir_in_bin,
|
||||
dir_out=out,
|
||||
dir_models=model,
|
||||
model_name=model_name,
|
||||
tr_ocr=tr_ocr,
|
||||
export_textline_images_and_text=export_textline_images_and_text,
|
||||
do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
|
||||
draw_texts_on_image=draw_texts_on_image,
|
||||
prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
|
||||
batch_size=batch_size,
|
||||
pref_of_dataset=dataset_abbrevation,
|
||||
min_conf_value_of_textline_text=min_conf_value_of_textline_text,
|
||||
)
|
||||
if log_level:
|
||||
eynollah_ocr.logger.setLevel(getLevelName(log_level))
|
||||
eynollah_ocr.run(overwrite=overwrite,
|
||||
dir_in=dir_in,
|
||||
dir_in_bin=dir_in_bin,
|
||||
image_filename=image,
|
||||
dir_xmls=dir_xmls,
|
||||
dir_out_image_text=dir_out_image_text,
|
||||
dir_out=out,
|
||||
)
|
||||
eynollah_ocr.run()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
731
src/eynollah/image_enhancer.py
Normal file
731
src/eynollah/image_enhancer.py
Normal file
|
|
@ -0,0 +1,731 @@
|
|||
"""
|
||||
Image enhancer. The output can be written as same scale of input or in new predicted scale.
|
||||
"""
|
||||
|
||||
from logging import Logger
|
||||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
import atexit
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from multiprocessing import cpu_count
|
||||
import gc
|
||||
import cv2
|
||||
import numpy as np
|
||||
from ocrd_utils import getLogger, tf_disable_interactive_logs
|
||||
import tensorflow as tf
|
||||
from skimage.morphology import skeletonize
|
||||
from tensorflow.keras.models import load_model
|
||||
from .utils.resize import resize_image
|
||||
from .utils.pil_cv2 import pil2cv
|
||||
from .utils import (
|
||||
is_image_filename,
|
||||
crop_image_inside_box
|
||||
)
|
||||
|
||||
DPI_THRESHOLD = 298
|
||||
KERNEL = np.ones((5, 5), np.uint8)
|
||||
|
||||
|
||||
class Enhancer:
|
||||
def __init__(
|
||||
self,
|
||||
dir_models : str,
|
||||
num_col_upper : Optional[int] = None,
|
||||
num_col_lower : Optional[int] = None,
|
||||
save_org_scale : bool = False,
|
||||
logger : Optional[Logger] = None,
|
||||
):
|
||||
self.input_binary = False
|
||||
self.light_version = False
|
||||
self.save_org_scale = save_org_scale
|
||||
if num_col_upper:
|
||||
self.num_col_upper = int(num_col_upper)
|
||||
else:
|
||||
self.num_col_upper = num_col_upper
|
||||
if num_col_lower:
|
||||
self.num_col_lower = int(num_col_lower)
|
||||
else:
|
||||
self.num_col_lower = num_col_lower
|
||||
|
||||
self.logger = logger if logger else getLogger('enhancement')
|
||||
self.dir_models = dir_models
|
||||
self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425"
|
||||
self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
|
||||
self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425"
|
||||
self.model_page_dir = dir_models + "/model_eynollah_page_extraction_20250915"
|
||||
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
except:
|
||||
self.logger.warning("no GPU device available")
|
||||
|
||||
self.model_page = self.our_load_model(self.model_page_dir)
|
||||
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
|
||||
self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement)
|
||||
self.model_bin = self.our_load_model(self.model_dir_of_binarization)
|
||||
|
||||
def cache_images(self, image_filename=None, image_pil=None, dpi=None):
|
||||
ret = {}
|
||||
if image_filename:
|
||||
ret['img'] = cv2.imread(image_filename)
|
||||
if self.light_version:
|
||||
self.dpi = 100
|
||||
else:
|
||||
self.dpi = 0#check_dpi(image_filename)
|
||||
else:
|
||||
ret['img'] = pil2cv(image_pil)
|
||||
if self.light_version:
|
||||
self.dpi = 100
|
||||
else:
|
||||
self.dpi = 0#check_dpi(image_pil)
|
||||
ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY)
|
||||
for prefix in ('', '_grayscale'):
|
||||
ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8)
|
||||
self._imgs = ret
|
||||
if dpi is not None:
|
||||
self.dpi = dpi
|
||||
|
||||
def reset_file_name_dir(self, image_filename, dir_out):
|
||||
self.cache_images(image_filename=image_filename)
|
||||
self.output_filename = os.path.join(dir_out, Path(image_filename).stem +'.png')
|
||||
|
||||
def imread(self, grayscale=False, uint8=True):
|
||||
key = 'img'
|
||||
if grayscale:
|
||||
key += '_grayscale'
|
||||
if uint8:
|
||||
key += '_uint8'
|
||||
return self._imgs[key].copy()
|
||||
|
||||
def isNaN(self, num):
|
||||
return num != num
|
||||
|
||||
@staticmethod
|
||||
def our_load_model(model_file):
|
||||
if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
|
||||
# prefer SavedModel over HDF5 format if it exists
|
||||
model_file = model_file[:-3]
|
||||
try:
|
||||
model = load_model(model_file, compile=False)
|
||||
except:
|
||||
model = load_model(model_file, compile=False, custom_objects={
|
||||
"PatchEncoder": PatchEncoder, "Patches": Patches})
|
||||
return model
|
||||
|
||||
def predict_enhancement(self, img):
|
||||
self.logger.debug("enter predict_enhancement")
|
||||
|
||||
img_height_model = self.model_enhancement.layers[-1].output_shape[1]
|
||||
img_width_model = self.model_enhancement.layers[-1].output_shape[2]
|
||||
if img.shape[0] < img_height_model:
|
||||
img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
|
||||
if img.shape[1] < img_width_model:
|
||||
img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST)
|
||||
margin = int(0.1 * img_width_model)
|
||||
width_mid = img_width_model - 2 * margin
|
||||
height_mid = img_height_model - 2 * margin
|
||||
img = img / 255.
|
||||
img_h = img.shape[0]
|
||||
img_w = img.shape[1]
|
||||
|
||||
prediction_true = np.zeros((img_h, img_w, 3))
|
||||
nxf = img_w / float(width_mid)
|
||||
nyf = img_h / float(height_mid)
|
||||
nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
|
||||
nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
|
||||
|
||||
for i in range(nxf):
|
||||
for j in range(nyf):
|
||||
if i == 0:
|
||||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + img_width_model
|
||||
else:
|
||||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + img_width_model
|
||||
if j == 0:
|
||||
index_y_d = j * height_mid
|
||||
index_y_u = index_y_d + img_height_model
|
||||
else:
|
||||
index_y_d = j * height_mid
|
||||
index_y_u = index_y_d + img_height_model
|
||||
|
||||
if index_x_u > img_w:
|
||||
index_x_u = img_w
|
||||
index_x_d = img_w - img_width_model
|
||||
if index_y_u > img_h:
|
||||
index_y_u = img_h
|
||||
index_y_d = img_h - img_height_model
|
||||
|
||||
img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
|
||||
label_p_pred = self.model_enhancement.predict(img_patch, verbose=0)
|
||||
seg = label_p_pred[0, :, :, :] * 255
|
||||
|
||||
if i == 0 and j == 0:
|
||||
prediction_true[index_y_d + 0:index_y_u - margin,
|
||||
index_x_d + 0:index_x_u - margin] = \
|
||||
seg[0:-margin or None,
|
||||
0:-margin or None]
|
||||
elif i == nxf - 1 and j == nyf - 1:
|
||||
prediction_true[index_y_d + margin:index_y_u - 0,
|
||||
index_x_d + margin:index_x_u - 0] = \
|
||||
seg[margin:,
|
||||
margin:]
|
||||
elif i == 0 and j == nyf - 1:
|
||||
prediction_true[index_y_d + margin:index_y_u - 0,
|
||||
index_x_d + 0:index_x_u - margin] = \
|
||||
seg[margin:,
|
||||
0:-margin or None]
|
||||
elif i == nxf - 1 and j == 0:
|
||||
prediction_true[index_y_d + 0:index_y_u - margin,
|
||||
index_x_d + margin:index_x_u - 0] = \
|
||||
seg[0:-margin or None,
|
||||
margin:]
|
||||
elif i == 0 and j != 0 and j != nyf - 1:
|
||||
prediction_true[index_y_d + margin:index_y_u - margin,
|
||||
index_x_d + 0:index_x_u - margin] = \
|
||||
seg[margin:-margin or None,
|
||||
0:-margin or None]
|
||||
elif i == nxf - 1 and j != 0 and j != nyf - 1:
|
||||
prediction_true[index_y_d + margin:index_y_u - margin,
|
||||
index_x_d + margin:index_x_u - 0] = \
|
||||
seg[margin:-margin or None,
|
||||
margin:]
|
||||
elif i != 0 and i != nxf - 1 and j == 0:
|
||||
prediction_true[index_y_d + 0:index_y_u - margin,
|
||||
index_x_d + margin:index_x_u - margin] = \
|
||||
seg[0:-margin or None,
|
||||
margin:-margin or None]
|
||||
elif i != 0 and i != nxf - 1 and j == nyf - 1:
|
||||
prediction_true[index_y_d + margin:index_y_u - 0,
|
||||
index_x_d + margin:index_x_u - margin] = \
|
||||
seg[margin:,
|
||||
margin:-margin or None]
|
||||
else:
|
||||
prediction_true[index_y_d + margin:index_y_u - margin,
|
||||
index_x_d + margin:index_x_u - margin] = \
|
||||
seg[margin:-margin or None,
|
||||
margin:-margin or None]
|
||||
|
||||
prediction_true = prediction_true.astype(int)
|
||||
return prediction_true
|
||||
|
||||
def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
|
||||
self.logger.debug("enter calculate_width_height_by_columns")
|
||||
if num_col == 1:
|
||||
img_w_new = 2000
|
||||
elif num_col == 2:
|
||||
img_w_new = 2400
|
||||
elif num_col == 3:
|
||||
img_w_new = 3000
|
||||
elif num_col == 4:
|
||||
img_w_new = 4000
|
||||
elif num_col == 5:
|
||||
img_w_new = 5000
|
||||
elif num_col == 6:
|
||||
img_w_new = 6500
|
||||
else:
|
||||
img_w_new = width_early
|
||||
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
||||
|
||||
if img_h_new >= 8000:
|
||||
img_new = np.copy(img)
|
||||
num_column_is_classified = False
|
||||
else:
|
||||
img_new = resize_image(img, img_h_new, img_w_new)
|
||||
num_column_is_classified = True
|
||||
|
||||
return img_new, num_column_is_classified
|
||||
|
||||
def early_page_for_num_of_column_classification(self,img_bin):
|
||||
self.logger.debug("enter early_page_for_num_of_column_classification")
|
||||
if self.input_binary:
|
||||
img = np.copy(img_bin).astype(np.uint8)
|
||||
else:
|
||||
img = self.imread()
|
||||
img = cv2.GaussianBlur(img, (5, 5), 0)
|
||||
img_page_prediction = self.do_prediction(False, img, self.model_page)
|
||||
|
||||
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
thresh = cv2.dilate(thresh, KERNEL, iterations=3)
|
||||
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
if len(contours)>0:
|
||||
cnt_size = np.array([cv2.contourArea(contours[j])
|
||||
for j in range(len(contours))])
|
||||
cnt = contours[np.argmax(cnt_size)]
|
||||
box = cv2.boundingRect(cnt)
|
||||
else:
|
||||
box = [0, 0, img.shape[1], img.shape[0]]
|
||||
cropped_page, page_coord = crop_image_inside_box(box, img)
|
||||
|
||||
self.logger.debug("exit early_page_for_num_of_column_classification")
|
||||
return cropped_page, page_coord
|
||||
|
||||
def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred):
|
||||
self.logger.debug("enter calculate_width_height_by_columns")
|
||||
if num_col == 1:
|
||||
img_w_new = 1000
|
||||
else:
|
||||
img_w_new = 1300
|
||||
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
||||
|
||||
if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
|
||||
img_new = np.copy(img)
|
||||
num_column_is_classified = False
|
||||
#elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
|
||||
elif img_h_new >= 8000:
|
||||
img_new = np.copy(img)
|
||||
num_column_is_classified = False
|
||||
else:
|
||||
img_new = resize_image(img, img_h_new, img_w_new)
|
||||
num_column_is_classified = True
|
||||
|
||||
return img_new, num_column_is_classified
|
||||
|
||||
def resize_and_enhance_image_with_column_classifier(self, light_version):
|
||||
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
||||
dpi = 0#self.dpi
|
||||
self.logger.info("Detected %s DPI", dpi)
|
||||
if self.input_binary:
|
||||
img = self.imread()
|
||||
prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5)
|
||||
prediction_bin = 255 * (prediction_bin[:,:,0]==0)
|
||||
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
|
||||
img= np.copy(prediction_bin)
|
||||
img_bin = prediction_bin
|
||||
else:
|
||||
img = self.imread()
|
||||
self.h_org, self.w_org = img.shape[:2]
|
||||
img_bin = None
|
||||
|
||||
width_early = img.shape[1]
|
||||
t1 = time.time()
|
||||
_, page_coord = self.early_page_for_num_of_column_classification(img_bin)
|
||||
|
||||
self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :]
|
||||
self.page_coord = page_coord
|
||||
|
||||
if self.num_col_upper and not self.num_col_lower:
|
||||
num_col = self.num_col_upper
|
||||
label_p_pred = [np.ones(6)]
|
||||
elif self.num_col_lower and not self.num_col_upper:
|
||||
num_col = self.num_col_lower
|
||||
label_p_pred = [np.ones(6)]
|
||||
elif not self.num_col_upper and not self.num_col_lower:
|
||||
if self.input_binary:
|
||||
img_in = np.copy(img)
|
||||
img_in = img_in / 255.0
|
||||
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
||||
img_in = img_in.reshape(1, 448, 448, 3)
|
||||
else:
|
||||
img_1ch = self.imread(grayscale=True)
|
||||
width_early = img_1ch.shape[1]
|
||||
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
||||
|
||||
img_1ch = img_1ch / 255.0
|
||||
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
||||
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
||||
img_in[0, :, :, 0] = img_1ch[:, :]
|
||||
img_in[0, :, :, 1] = img_1ch[:, :]
|
||||
img_in[0, :, :, 2] = img_1ch[:, :]
|
||||
|
||||
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
|
||||
num_col = np.argmax(label_p_pred[0]) + 1
|
||||
elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
|
||||
if self.input_binary:
|
||||
img_in = np.copy(img)
|
||||
img_in = img_in / 255.0
|
||||
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
||||
img_in = img_in.reshape(1, 448, 448, 3)
|
||||
else:
|
||||
img_1ch = self.imread(grayscale=True)
|
||||
width_early = img_1ch.shape[1]
|
||||
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
||||
|
||||
img_1ch = img_1ch / 255.0
|
||||
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
||||
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
||||
img_in[0, :, :, 0] = img_1ch[:, :]
|
||||
img_in[0, :, :, 1] = img_1ch[:, :]
|
||||
img_in[0, :, :, 2] = img_1ch[:, :]
|
||||
|
||||
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
|
||||
num_col = np.argmax(label_p_pred[0]) + 1
|
||||
|
||||
if num_col > self.num_col_upper:
|
||||
num_col = self.num_col_upper
|
||||
label_p_pred = [np.ones(6)]
|
||||
if num_col < self.num_col_lower:
|
||||
num_col = self.num_col_lower
|
||||
label_p_pred = [np.ones(6)]
|
||||
else:
|
||||
num_col = self.num_col_upper
|
||||
label_p_pred = [np.ones(6)]
|
||||
|
||||
self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
|
||||
|
||||
if dpi < DPI_THRESHOLD:
|
||||
if light_version and num_col in (1,2):
|
||||
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
|
||||
img, num_col, width_early, label_p_pred)
|
||||
else:
|
||||
img_new, num_column_is_classified = self.calculate_width_height_by_columns(
|
||||
img, num_col, width_early, label_p_pred)
|
||||
if light_version:
|
||||
image_res = np.copy(img_new)
|
||||
else:
|
||||
image_res = self.predict_enhancement(img_new)
|
||||
is_image_enhanced = True
|
||||
|
||||
else:
|
||||
num_column_is_classified = True
|
||||
image_res = np.copy(img)
|
||||
is_image_enhanced = False
|
||||
|
||||
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
|
||||
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
|
||||
def do_prediction(
|
||||
self, patches, img, model,
|
||||
n_batch_inference=1, marginal_of_patch_percent=0.1,
|
||||
thresholding_for_some_classes_in_light_version=False,
|
||||
thresholding_for_artificial_class_in_light_version=False, thresholding_for_fl_light_version=False, threshold_art_class_textline=0.1):
|
||||
|
||||
self.logger.debug("enter do_prediction")
|
||||
img_height_model = model.layers[-1].output_shape[1]
|
||||
img_width_model = model.layers[-1].output_shape[2]
|
||||
|
||||
if not patches:
|
||||
img_h_page = img.shape[0]
|
||||
img_w_page = img.shape[1]
|
||||
img = img / float(255.0)
|
||||
img = resize_image(img, img_height_model, img_width_model)
|
||||
|
||||
label_p_pred = model.predict(img[np.newaxis], verbose=0)
|
||||
seg = np.argmax(label_p_pred, axis=3)[0]
|
||||
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
seg_art = label_p_pred[0,:,:,2]
|
||||
|
||||
seg_art[seg_art<threshold_art_class_textline] = 0
|
||||
seg_art[seg_art>0] =1
|
||||
|
||||
skeleton_art = skeletonize(seg_art)
|
||||
skeleton_art = skeleton_art*1
|
||||
|
||||
seg[skeleton_art==1]=2
|
||||
|
||||
if thresholding_for_fl_light_version:
|
||||
seg_header = label_p_pred[0,:,:,2]
|
||||
|
||||
seg_header[seg_header<0.2] = 0
|
||||
seg_header[seg_header>0] =1
|
||||
|
||||
seg[seg_header==1]=2
|
||||
|
||||
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
||||
prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
|
||||
return prediction_true
|
||||
|
||||
if img.shape[0] < img_height_model:
|
||||
img = resize_image(img, img_height_model, img.shape[1])
|
||||
if img.shape[1] < img_width_model:
|
||||
img = resize_image(img, img.shape[0], img_width_model)
|
||||
|
||||
self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
|
||||
margin = int(marginal_of_patch_percent * img_height_model)
|
||||
width_mid = img_width_model - 2 * margin
|
||||
height_mid = img_height_model - 2 * margin
|
||||
img = img / 255.
|
||||
#img = img.astype(np.float16)
|
||||
img_h = img.shape[0]
|
||||
img_w = img.shape[1]
|
||||
prediction_true = np.zeros((img_h, img_w, 3))
|
||||
mask_true = np.zeros((img_h, img_w))
|
||||
nxf = img_w / float(width_mid)
|
||||
nyf = img_h / float(height_mid)
|
||||
nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
|
||||
nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
|
||||
|
||||
list_i_s = []
|
||||
list_j_s = []
|
||||
list_x_u = []
|
||||
list_x_d = []
|
||||
list_y_u = []
|
||||
list_y_d = []
|
||||
|
||||
batch_indexer = 0
|
||||
img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
|
||||
for i in range(nxf):
|
||||
for j in range(nyf):
|
||||
if i == 0:
|
||||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + img_width_model
|
||||
else:
|
||||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + img_width_model
|
||||
if j == 0:
|
||||
index_y_d = j * height_mid
|
||||
index_y_u = index_y_d + img_height_model
|
||||
else:
|
||||
index_y_d = j * height_mid
|
||||
index_y_u = index_y_d + img_height_model
|
||||
if index_x_u > img_w:
|
||||
index_x_u = img_w
|
||||
index_x_d = img_w - img_width_model
|
||||
if index_y_u > img_h:
|
||||
index_y_u = img_h
|
||||
index_y_d = img_h - img_height_model
|
||||
|
||||
list_i_s.append(i)
|
||||
list_j_s.append(j)
|
||||
list_x_u.append(index_x_u)
|
||||
list_x_d.append(index_x_d)
|
||||
list_y_d.append(index_y_d)
|
||||
list_y_u.append(index_y_u)
|
||||
|
||||
img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
|
||||
batch_indexer += 1
|
||||
|
||||
if (batch_indexer == n_batch_inference or
|
||||
# last batch
|
||||
i == nxf - 1 and j == nyf - 1):
|
||||
self.logger.debug("predicting patches on %s", str(img_patch.shape))
|
||||
label_p_pred = model.predict(img_patch, verbose=0)
|
||||
seg = np.argmax(label_p_pred, axis=3)
|
||||
|
||||
if thresholding_for_some_classes_in_light_version:
|
||||
seg_not_base = label_p_pred[:,:,:,4]
|
||||
seg_not_base[seg_not_base>0.03] =1
|
||||
seg_not_base[seg_not_base<1] =0
|
||||
|
||||
seg_line = label_p_pred[:,:,:,3]
|
||||
seg_line[seg_line>0.1] =1
|
||||
seg_line[seg_line<1] =0
|
||||
|
||||
seg_background = label_p_pred[:,:,:,0]
|
||||
seg_background[seg_background>0.25] =1
|
||||
seg_background[seg_background<1] =0
|
||||
|
||||
seg[seg_not_base==1]=4
|
||||
seg[seg_background==1]=0
|
||||
seg[(seg_line==1) & (seg==0)]=3
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
seg_art = label_p_pred[:,:,:,2]
|
||||
|
||||
seg_art[seg_art<threshold_art_class_textline] = 0
|
||||
seg_art[seg_art>0] =1
|
||||
|
||||
##seg[seg_art==1]=2
|
||||
|
||||
indexer_inside_batch = 0
|
||||
for i_batch, j_batch in zip(list_i_s, list_j_s):
|
||||
seg_in = seg[indexer_inside_batch]
|
||||
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
seg_in_art = seg_art[indexer_inside_batch]
|
||||
|
||||
index_y_u_in = list_y_u[indexer_inside_batch]
|
||||
index_y_d_in = list_y_d[indexer_inside_batch]
|
||||
|
||||
index_x_u_in = list_x_u[indexer_inside_batch]
|
||||
index_x_d_in = list_x_d[indexer_inside_batch]
|
||||
|
||||
if i_batch == 0 and j_batch == 0:
|
||||
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
||||
index_x_d_in + 0:index_x_u_in - margin] = \
|
||||
seg_in[0:-margin or None,
|
||||
0:-margin or None,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
||||
index_x_d_in + 0:index_x_u_in - margin, 1] = \
|
||||
seg_in_art[0:-margin or None,
|
||||
0:-margin or None]
|
||||
|
||||
elif i_batch == nxf - 1 and j_batch == nyf - 1:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
||||
index_x_d_in + margin:index_x_u_in - 0] = \
|
||||
seg_in[margin:,
|
||||
margin:,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
||||
index_x_d_in + margin:index_x_u_in - 0, 1] = \
|
||||
seg_in_art[margin:,
|
||||
margin:]
|
||||
|
||||
elif i_batch == 0 and j_batch == nyf - 1:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
||||
index_x_d_in + 0:index_x_u_in - margin] = \
|
||||
seg_in[margin:,
|
||||
0:-margin or None,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
||||
index_x_d_in + 0:index_x_u_in - margin, 1] = \
|
||||
seg_in_art[margin:,
|
||||
0:-margin or None]
|
||||
|
||||
elif i_batch == nxf - 1 and j_batch == 0:
|
||||
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - 0] = \
|
||||
seg_in[0:-margin or None,
|
||||
margin:,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - 0, 1] = \
|
||||
seg_in_art[0:-margin or None,
|
||||
margin:]
|
||||
|
||||
elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
||||
index_x_d_in + 0:index_x_u_in - margin] = \
|
||||
seg_in[margin:-margin or None,
|
||||
0:-margin or None,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
||||
index_x_d_in + 0:index_x_u_in - margin, 1] = \
|
||||
seg_in_art[margin:-margin or None,
|
||||
0:-margin or None]
|
||||
|
||||
elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - 0] = \
|
||||
seg_in[margin:-margin or None,
|
||||
margin:,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - 0, 1] = \
|
||||
seg_in_art[margin:-margin or None,
|
||||
margin:]
|
||||
|
||||
elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0:
|
||||
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - margin] = \
|
||||
seg_in[0:-margin or None,
|
||||
margin:-margin or None,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - margin, 1] = \
|
||||
seg_in_art[0:-margin or None,
|
||||
margin:-margin or None]
|
||||
|
||||
elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
||||
index_x_d_in + margin:index_x_u_in - margin] = \
|
||||
seg_in[margin:,
|
||||
margin:-margin or None,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
||||
index_x_d_in + margin:index_x_u_in - margin, 1] = \
|
||||
seg_in_art[margin:,
|
||||
margin:-margin or None]
|
||||
|
||||
else:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - margin] = \
|
||||
seg_in[margin:-margin or None,
|
||||
margin:-margin or None,
|
||||
np.newaxis]
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
||||
index_x_d_in + margin:index_x_u_in - margin, 1] = \
|
||||
seg_in_art[margin:-margin or None,
|
||||
margin:-margin or None]
|
||||
indexer_inside_batch += 1
|
||||
|
||||
|
||||
list_i_s = []
|
||||
list_j_s = []
|
||||
list_x_u = []
|
||||
list_x_d = []
|
||||
list_y_u = []
|
||||
list_y_d = []
|
||||
|
||||
batch_indexer = 0
|
||||
img_patch[:] = 0
|
||||
|
||||
prediction_true = prediction_true.astype(np.uint8)
|
||||
|
||||
if thresholding_for_artificial_class_in_light_version:
|
||||
kernel_min = np.ones((3, 3), np.uint8)
|
||||
prediction_true[:,:,0][prediction_true[:,:,0]==2] = 0
|
||||
|
||||
skeleton_art = skeletonize(prediction_true[:,:,1])
|
||||
skeleton_art = skeleton_art*1
|
||||
|
||||
skeleton_art = skeleton_art.astype('uint8')
|
||||
|
||||
skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
|
||||
|
||||
prediction_true[:,:,0][skeleton_art==1]=2
|
||||
#del model
|
||||
gc.collect()
|
||||
return prediction_true
|
||||
|
||||
def run_enhancement(self, light_version):
|
||||
t_in = time.time()
|
||||
self.logger.info("Resizing and enhancing image...")
|
||||
is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = \
|
||||
self.resize_and_enhance_image_with_column_classifier(light_version)
|
||||
|
||||
self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ')
|
||||
return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified
|
||||
|
||||
|
||||
def run_single(self):
|
||||
t0 = time.time()
|
||||
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(light_version=False)
|
||||
|
||||
return img_res
|
||||
|
||||
|
||||
def run(self,
|
||||
overwrite: bool = False,
|
||||
image_filename: Optional[str] = None,
|
||||
dir_in: Optional[str] = None,
|
||||
dir_out: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Get image and scales, then extract the page of scanned image
|
||||
"""
|
||||
self.logger.debug("enter run")
|
||||
t0_tot = time.time()
|
||||
|
||||
if dir_in:
|
||||
ls_imgs = [os.path.join(dir_in, image_filename)
|
||||
for image_filename in filter(is_image_filename,
|
||||
os.listdir(dir_in))]
|
||||
elif image_filename:
|
||||
ls_imgs = [image_filename]
|
||||
else:
|
||||
raise ValueError("run requires either a single image filename or a directory")
|
||||
|
||||
for img_filename in ls_imgs:
|
||||
self.logger.info(img_filename)
|
||||
t0 = time.time()
|
||||
|
||||
self.reset_file_name_dir(img_filename, dir_out)
|
||||
#print("text region early -11 in %.1fs", time.time() - t0)
|
||||
|
||||
if os.path.exists(self.output_filename):
|
||||
if overwrite:
|
||||
self.logger.warning("will overwrite existing output file '%s'", self.output_filename)
|
||||
else:
|
||||
self.logger.warning("will skip input for existing output file '%s'", self.output_filename)
|
||||
continue
|
||||
|
||||
image_enhanced = self.run_single()
|
||||
if self.save_org_scale:
|
||||
image_enhanced = resize_image(image_enhanced, self.h_org, self.w_org)
|
||||
|
||||
cv2.imwrite(self.output_filename, image_enhanced)
|
||||
|
||||
813
src/eynollah/mb_ro_on_layout.py
Normal file
813
src/eynollah/mb_ro_on_layout.py
Normal file
|
|
@ -0,0 +1,813 @@
|
|||
"""
|
||||
Image enhancer. The output can be written as same scale of input or in new predicted scale.
|
||||
"""
|
||||
|
||||
from logging import Logger
|
||||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
import atexit
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from multiprocessing import cpu_count
|
||||
import xml.etree.ElementTree as ET
|
||||
import cv2
|
||||
import numpy as np
|
||||
from ocrd_utils import getLogger
|
||||
import statistics
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import load_model
|
||||
from .utils.resize import resize_image
|
||||
|
||||
from .utils.contour import (
|
||||
find_new_features_of_contours,
|
||||
return_contours_of_image,
|
||||
return_parent_contours,
|
||||
)
|
||||
from .utils import is_xml_filename
|
||||
|
||||
DPI_THRESHOLD = 298
|
||||
KERNEL = np.ones((5, 5), np.uint8)
|
||||
|
||||
|
||||
class machine_based_reading_order_on_layout:
|
||||
def __init__(
|
||||
self,
|
||||
dir_models : str,
|
||||
logger : Optional[Logger] = None,
|
||||
):
|
||||
self.logger = logger if logger else getLogger('mbreorder')
|
||||
self.dir_models = dir_models
|
||||
self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"
|
||||
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
except:
|
||||
self.logger.warning("no GPU device available")
|
||||
|
||||
self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
|
||||
self.light_version = True
|
||||
|
||||
@staticmethod
|
||||
def our_load_model(model_file):
|
||||
if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
|
||||
# prefer SavedModel over HDF5 format if it exists
|
||||
model_file = model_file[:-3]
|
||||
try:
|
||||
model = load_model(model_file, compile=False)
|
||||
except:
|
||||
model = load_model(model_file, compile=False, custom_objects={
|
||||
"PatchEncoder": PatchEncoder, "Patches": Patches})
|
||||
return model
|
||||
|
||||
def read_xml(self, xml_file):
|
||||
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
|
||||
root1=tree1.getroot()
|
||||
alltags=[elem.tag for elem in root1.iter()]
|
||||
link=alltags[0].split('}')[0]+'}'
|
||||
|
||||
index_tot_regions = []
|
||||
tot_region_ref = []
|
||||
|
||||
for jj in root1.iter(link+'Page'):
|
||||
y_len=int(jj.attrib['imageHeight'])
|
||||
x_len=int(jj.attrib['imageWidth'])
|
||||
|
||||
for jj in root1.iter(link+'RegionRefIndexed'):
|
||||
index_tot_regions.append(jj.attrib['index'])
|
||||
tot_region_ref.append(jj.attrib['regionRef'])
|
||||
|
||||
if (link+'PrintSpace' in alltags) or (link+'Border' in alltags):
|
||||
co_printspace = []
|
||||
if link+'PrintSpace' in alltags:
|
||||
region_tags_printspace = np.unique([x for x in alltags if x.endswith('PrintSpace')])
|
||||
elif link+'Border' in alltags:
|
||||
region_tags_printspace = np.unique([x for x in alltags if x.endswith('Border')])
|
||||
|
||||
for tag in region_tags_printspace:
|
||||
if link+'PrintSpace' in alltags:
|
||||
tag_endings_printspace = ['}PrintSpace','}printspace']
|
||||
elif link+'Border' in alltags:
|
||||
tag_endings_printspace = ['}Border','}border']
|
||||
|
||||
if tag.endswith(tag_endings_printspace[0]) or tag.endswith(tag_endings_printspace[1]):
|
||||
for nn in root1.iter(tag):
|
||||
c_t_in = []
|
||||
sumi = 0
|
||||
for vv in nn.iter():
|
||||
# check the format of coords
|
||||
if vv.tag == link + 'Coords':
|
||||
coords = bool(vv.attrib)
|
||||
if coords:
|
||||
p_h = vv.attrib['points'].split(' ')
|
||||
c_t_in.append(
|
||||
np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
if vv.tag == link + 'Point':
|
||||
c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
|
||||
sumi += 1
|
||||
elif vv.tag != link + 'Point' and sumi >= 1:
|
||||
break
|
||||
co_printspace.append(np.array(c_t_in))
|
||||
img_printspace = np.zeros( (y_len,x_len,3) )
|
||||
img_printspace=cv2.fillPoly(img_printspace, pts =co_printspace, color=(1,1,1))
|
||||
img_printspace = img_printspace.astype(np.uint8)
|
||||
|
||||
imgray = cv2.cvtColor(img_printspace, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
|
||||
cnt = contours[np.argmax(cnt_size)]
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
|
||||
bb_coord_printspace = [x, y, w, h]
|
||||
|
||||
else:
|
||||
bb_coord_printspace = None
|
||||
|
||||
|
||||
region_tags=np.unique([x for x in alltags if x.endswith('Region')])
|
||||
co_text_paragraph=[]
|
||||
co_text_drop=[]
|
||||
co_text_heading=[]
|
||||
co_text_header=[]
|
||||
co_text_marginalia=[]
|
||||
co_text_catch=[]
|
||||
co_text_page_number=[]
|
||||
co_text_signature_mark=[]
|
||||
co_sep=[]
|
||||
co_img=[]
|
||||
co_table=[]
|
||||
co_graphic=[]
|
||||
co_graphic_text_annotation=[]
|
||||
co_graphic_decoration=[]
|
||||
co_noise=[]
|
||||
|
||||
co_text_paragraph_text=[]
|
||||
co_text_drop_text=[]
|
||||
co_text_heading_text=[]
|
||||
co_text_header_text=[]
|
||||
co_text_marginalia_text=[]
|
||||
co_text_catch_text=[]
|
||||
co_text_page_number_text=[]
|
||||
co_text_signature_mark_text=[]
|
||||
co_sep_text=[]
|
||||
co_img_text=[]
|
||||
co_table_text=[]
|
||||
co_graphic_text=[]
|
||||
co_graphic_text_annotation_text=[]
|
||||
co_graphic_decoration_text=[]
|
||||
co_noise_text=[]
|
||||
|
||||
id_paragraph = []
|
||||
id_header = []
|
||||
id_heading = []
|
||||
id_marginalia = []
|
||||
|
||||
for tag in region_tags:
|
||||
if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
|
||||
for nn in root1.iter(tag):
|
||||
for child2 in nn:
|
||||
tag2 = child2.tag
|
||||
if tag2.endswith('}TextEquiv') or tag2.endswith('}TextEquiv'):
|
||||
for childtext2 in child2:
|
||||
if childtext2.tag.endswith('}Unicode') or childtext2.tag.endswith('}Unicode'):
|
||||
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
||||
co_text_drop_text.append(childtext2.text)
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='heading':
|
||||
co_text_heading_text.append(childtext2.text)
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
||||
co_text_signature_mark_text.append(childtext2.text)
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='header':
|
||||
co_text_header_text.append(childtext2.text)
|
||||
###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
||||
###co_text_catch_text.append(childtext2.text)
|
||||
###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
|
||||
###co_text_page_number_text.append(childtext2.text)
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
||||
co_text_marginalia_text.append(childtext2.text)
|
||||
else:
|
||||
co_text_paragraph_text.append(childtext2.text)
|
||||
c_t_in_drop=[]
|
||||
c_t_in_paragraph=[]
|
||||
c_t_in_heading=[]
|
||||
c_t_in_header=[]
|
||||
c_t_in_page_number=[]
|
||||
c_t_in_signature_mark=[]
|
||||
c_t_in_catch=[]
|
||||
c_t_in_marginalia=[]
|
||||
|
||||
|
||||
sumi=0
|
||||
for vv in nn.iter():
|
||||
# check the format of coords
|
||||
if vv.tag==link+'Coords':
|
||||
|
||||
coords=bool(vv.attrib)
|
||||
if coords:
|
||||
#print('birda1')
|
||||
p_h=vv.attrib['points'].split(' ')
|
||||
|
||||
|
||||
|
||||
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
||||
|
||||
c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='heading':
|
||||
##id_heading.append(nn.attrib['id'])
|
||||
c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
||||
|
||||
c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
#print(c_t_in_paragraph)
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='header':
|
||||
#id_header.append(nn.attrib['id'])
|
||||
c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
|
||||
###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
||||
###c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
|
||||
###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
|
||||
|
||||
###c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
||||
#id_marginalia.append(nn.attrib['id'])
|
||||
|
||||
c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
else:
|
||||
#id_paragraph.append(nn.attrib['id'])
|
||||
|
||||
c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
if vv.tag==link+'Point':
|
||||
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
||||
|
||||
c_t_in_drop.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='heading':
|
||||
#id_heading.append(nn.attrib['id'])
|
||||
c_t_in_heading.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
||||
|
||||
c_t_in_signature_mark.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='header':
|
||||
#id_header.append(nn.attrib['id'])
|
||||
c_t_in_header.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
|
||||
###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
||||
###c_t_in_catch.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
###sumi+=1
|
||||
|
||||
###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
|
||||
|
||||
###c_t_in_page_number.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
###sumi+=1
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
||||
#id_marginalia.append(nn.attrib['id'])
|
||||
|
||||
c_t_in_marginalia.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
else:
|
||||
#id_paragraph.append(nn.attrib['id'])
|
||||
c_t_in_paragraph.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
elif vv.tag!=link+'Point' and sumi>=1:
|
||||
break
|
||||
|
||||
if len(c_t_in_drop)>0:
|
||||
co_text_drop.append(np.array(c_t_in_drop))
|
||||
if len(c_t_in_paragraph)>0:
|
||||
co_text_paragraph.append(np.array(c_t_in_paragraph))
|
||||
id_paragraph.append(nn.attrib['id'])
|
||||
if len(c_t_in_heading)>0:
|
||||
co_text_heading.append(np.array(c_t_in_heading))
|
||||
id_heading.append(nn.attrib['id'])
|
||||
|
||||
if len(c_t_in_header)>0:
|
||||
co_text_header.append(np.array(c_t_in_header))
|
||||
id_header.append(nn.attrib['id'])
|
||||
if len(c_t_in_page_number)>0:
|
||||
co_text_page_number.append(np.array(c_t_in_page_number))
|
||||
if len(c_t_in_catch)>0:
|
||||
co_text_catch.append(np.array(c_t_in_catch))
|
||||
|
||||
if len(c_t_in_signature_mark)>0:
|
||||
co_text_signature_mark.append(np.array(c_t_in_signature_mark))
|
||||
|
||||
if len(c_t_in_marginalia)>0:
|
||||
co_text_marginalia.append(np.array(c_t_in_marginalia))
|
||||
id_marginalia.append(nn.attrib['id'])
|
||||
|
||||
|
||||
elif tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
|
||||
for nn in root1.iter(tag):
|
||||
c_t_in=[]
|
||||
c_t_in_text_annotation=[]
|
||||
c_t_in_decoration=[]
|
||||
sumi=0
|
||||
for vv in nn.iter():
|
||||
# check the format of coords
|
||||
if vv.tag==link+'Coords':
|
||||
coords=bool(vv.attrib)
|
||||
if coords:
|
||||
p_h=vv.attrib['points'].split(' ')
|
||||
|
||||
if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
|
||||
c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='decoration':
|
||||
c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
else:
|
||||
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
|
||||
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
if vv.tag==link+'Point':
|
||||
if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
|
||||
c_t_in_text_annotation.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
elif "type" in nn.attrib and nn.attrib['type']=='decoration':
|
||||
c_t_in_decoration.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
else:
|
||||
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
if len(c_t_in_text_annotation)>0:
|
||||
co_graphic_text_annotation.append(np.array(c_t_in_text_annotation))
|
||||
if len(c_t_in_decoration)>0:
|
||||
co_graphic_decoration.append(np.array(c_t_in_decoration))
|
||||
if len(c_t_in)>0:
|
||||
co_graphic.append(np.array(c_t_in))
|
||||
|
||||
|
||||
|
||||
elif tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
|
||||
for nn in root1.iter(tag):
|
||||
c_t_in=[]
|
||||
sumi=0
|
||||
for vv in nn.iter():
|
||||
# check the format of coords
|
||||
if vv.tag==link+'Coords':
|
||||
coords=bool(vv.attrib)
|
||||
if coords:
|
||||
p_h=vv.attrib['points'].split(' ')
|
||||
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
if vv.tag==link+'Point':
|
||||
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
elif vv.tag!=link+'Point' and sumi>=1:
|
||||
break
|
||||
co_img.append(np.array(c_t_in))
|
||||
co_img_text.append(' ')
|
||||
|
||||
|
||||
elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
|
||||
for nn in root1.iter(tag):
|
||||
c_t_in=[]
|
||||
sumi=0
|
||||
for vv in nn.iter():
|
||||
# check the format of coords
|
||||
if vv.tag==link+'Coords':
|
||||
coords=bool(vv.attrib)
|
||||
if coords:
|
||||
p_h=vv.attrib['points'].split(' ')
|
||||
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
if vv.tag==link+'Point':
|
||||
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
elif vv.tag!=link+'Point' and sumi>=1:
|
||||
break
|
||||
co_sep.append(np.array(c_t_in))
|
||||
|
||||
|
||||
|
||||
elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
|
||||
for nn in root1.iter(tag):
|
||||
c_t_in=[]
|
||||
sumi=0
|
||||
for vv in nn.iter():
|
||||
# check the format of coords
|
||||
if vv.tag==link+'Coords':
|
||||
coords=bool(vv.attrib)
|
||||
if coords:
|
||||
p_h=vv.attrib['points'].split(' ')
|
||||
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
if vv.tag==link+'Point':
|
||||
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
elif vv.tag!=link+'Point' and sumi>=1:
|
||||
break
|
||||
co_table.append(np.array(c_t_in))
|
||||
co_table_text.append(' ')
|
||||
|
||||
elif tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
|
||||
for nn in root1.iter(tag):
|
||||
c_t_in=[]
|
||||
sumi=0
|
||||
for vv in nn.iter():
|
||||
# check the format of coords
|
||||
if vv.tag==link+'Coords':
|
||||
coords=bool(vv.attrib)
|
||||
if coords:
|
||||
p_h=vv.attrib['points'].split(' ')
|
||||
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
if vv.tag==link+'Point':
|
||||
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
||||
sumi+=1
|
||||
|
||||
elif vv.tag!=link+'Point' and sumi>=1:
|
||||
break
|
||||
co_noise.append(np.array(c_t_in))
|
||||
co_noise_text.append(' ')
|
||||
|
||||
img = np.zeros( (y_len,x_len,3) )
|
||||
img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(1,1,1))
|
||||
|
||||
img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(2,2,2))
|
||||
img_poly=cv2.fillPoly(img, pts =co_text_header, color=(2,2,2))
|
||||
img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(3,3,3))
|
||||
img_poly=cv2.fillPoly(img, pts =co_img, color=(4,4,4))
|
||||
img_poly=cv2.fillPoly(img, pts =co_sep, color=(5,5,5))
|
||||
|
||||
return tree1, root1, bb_coord_printspace, id_paragraph, id_header+id_heading, co_text_paragraph, co_text_header+co_text_heading,\
|
||||
tot_region_ref,x_len, y_len,index_tot_regions, img_poly
|
||||
|
||||
def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc):
|
||||
indexes_of_located_cont = []
|
||||
center_x_coordinates_of_located = []
|
||||
center_y_coordinates_of_located = []
|
||||
#M_main_tot = [cv2.moments(contours_loc[j])
|
||||
#for j in range(len(contours_loc))]
|
||||
#cx_main_loc = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
||||
#cy_main_loc = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
||||
|
||||
for ij in range(len(contours)):
|
||||
results = [cv2.pointPolygonTest(contours[ij], (cx_main_loc[ind], cy_main_loc[ind]), False)
|
||||
for ind in range(len(cy_main_loc)) ]
|
||||
results = np.array(results)
|
||||
indexes_in = np.where((results == 0) | (results == 1))
|
||||
indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1))
|
||||
|
||||
indexes_of_located_cont.append(indexes)
|
||||
center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] )
|
||||
center_y_coordinates_of_located.append(np.array(cy_main_loc)[indexes_in] )
|
||||
|
||||
return indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located
|
||||
|
||||
def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
|
||||
height1 =672#448
|
||||
width1 = 448#224
|
||||
|
||||
height2 =672#448
|
||||
width2= 448#224
|
||||
|
||||
height3 =672#448
|
||||
width3 = 448#224
|
||||
|
||||
inference_bs = 3
|
||||
|
||||
ver_kernel = np.ones((5, 1), dtype=np.uint8)
|
||||
hor_kernel = np.ones((1, 5), dtype=np.uint8)
|
||||
|
||||
|
||||
min_cont_size_to_be_dilated = 10
|
||||
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
||||
cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
|
||||
args_cont_located = np.array(range(len(contours_only_text_parent)))
|
||||
|
||||
diff_y_conts = np.abs(y_max_conts[:]-y_min_conts)
|
||||
diff_x_conts = np.abs(x_max_conts[:]-x_min_conts)
|
||||
|
||||
mean_x = statistics.mean(diff_x_conts)
|
||||
median_x = statistics.median(diff_x_conts)
|
||||
|
||||
|
||||
diff_x_ratio= diff_x_conts/mean_x
|
||||
|
||||
args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3]
|
||||
args_cont_located_included = args_cont_located[diff_x_ratio<1.3]
|
||||
|
||||
contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3]
|
||||
contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3]
|
||||
|
||||
|
||||
cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3]
|
||||
cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3]
|
||||
|
||||
cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3]
|
||||
cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3]
|
||||
|
||||
#print(diff_x_ratio, 'ratio')
|
||||
text_regions_p = text_regions_p.astype('uint8')
|
||||
|
||||
if len(contours_only_text_parent_excluded)>0:
|
||||
textregion_par = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])).astype('uint8')
|
||||
textregion_par = cv2.fillPoly(textregion_par, pts=contours_only_text_parent_included, color=(1,1))
|
||||
else:
|
||||
textregion_par = (text_regions_p[:,:]==1)*1
|
||||
textregion_par = textregion_par.astype('uint8')
|
||||
|
||||
text_regions_p_textregions_dilated = cv2.erode(textregion_par , hor_kernel, iterations=2)
|
||||
text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=4)
|
||||
text_regions_p_textregions_dilated = cv2.erode(text_regions_p_textregions_dilated , hor_kernel, iterations=1)
|
||||
text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=5)
|
||||
text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
|
||||
|
||||
|
||||
contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
|
||||
contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
|
||||
|
||||
indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included)
|
||||
|
||||
|
||||
if len(args_cont_located_excluded)>0:
|
||||
for ind in args_cont_located_excluded:
|
||||
indexes_of_located_cont.append(np.array([ind]))
|
||||
contours_only_dilated.append(contours_only_text_parent[ind])
|
||||
center_y_coordinates_of_located.append(0)
|
||||
|
||||
array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
|
||||
flattened_array = np.concatenate([arr.ravel() for arr in array_list])
|
||||
#print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques')
|
||||
|
||||
missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) )
|
||||
#print(missing_textregions, 'missing_textregions')
|
||||
|
||||
for ind in missing_textregions:
|
||||
indexes_of_located_cont.append(np.array([ind]))
|
||||
contours_only_dilated.append(contours_only_text_parent[ind])
|
||||
center_y_coordinates_of_located.append(0)
|
||||
|
||||
|
||||
if contours_only_text_parent_h:
|
||||
for vi in range(len(contours_only_text_parent_h)):
|
||||
indexes_of_located_cont.append(int(vi+len(contours_only_text_parent)))
|
||||
|
||||
array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
|
||||
flattened_array = np.concatenate([arr.ravel() for arr in array_list])
|
||||
|
||||
y_len = text_regions_p.shape[0]
|
||||
x_len = text_regions_p.shape[1]
|
||||
|
||||
img_poly = np.zeros((y_len,x_len), dtype='uint8')
|
||||
###img_poly[text_regions_p[:,:]==1] = 1
|
||||
###img_poly[text_regions_p[:,:]==2] = 2
|
||||
###img_poly[text_regions_p[:,:]==3] = 4
|
||||
###img_poly[text_regions_p[:,:]==6] = 5
|
||||
|
||||
##img_poly[text_regions_p[:,:]==1] = 1
|
||||
##img_poly[text_regions_p[:,:]==2] = 2
|
||||
##img_poly[text_regions_p[:,:]==3] = 3
|
||||
##img_poly[text_regions_p[:,:]==4] = 4
|
||||
##img_poly[text_regions_p[:,:]==5] = 5
|
||||
|
||||
img_poly = np.copy(text_regions_p)
|
||||
|
||||
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
|
||||
if contours_only_text_parent_h:
|
||||
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
|
||||
contours_only_text_parent_h)
|
||||
for j in range(len(cy_main)):
|
||||
img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,
|
||||
int(x_min_main[j]):int(x_max_main[j])] = 1
|
||||
co_text_all_org = contours_only_text_parent + contours_only_text_parent_h
|
||||
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
||||
co_text_all = contours_only_dilated + contours_only_text_parent_h
|
||||
else:
|
||||
co_text_all = contours_only_text_parent + contours_only_text_parent_h
|
||||
else:
|
||||
co_text_all_org = contours_only_text_parent
|
||||
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
||||
co_text_all = contours_only_dilated
|
||||
else:
|
||||
co_text_all = contours_only_text_parent
|
||||
|
||||
if not len(co_text_all):
|
||||
return [], []
|
||||
|
||||
labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
|
||||
|
||||
co_text_all = [(i/6).astype(int) for i in co_text_all]
|
||||
for i in range(len(co_text_all)):
|
||||
img = labels_con[:,:,i].astype(np.uint8)
|
||||
|
||||
#img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
|
||||
labels_con[:,:,i] = img
|
||||
|
||||
|
||||
labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool)
|
||||
img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
|
||||
img_poly = resize_image(img_poly, height3, width3)
|
||||
|
||||
|
||||
|
||||
input_1 = np.zeros((inference_bs, height1, width1, 3))
|
||||
ordered = [list(range(len(co_text_all)))]
|
||||
index_update = 0
|
||||
#print(labels_con.shape[2],"number of regions for reading order")
|
||||
while index_update>=0:
|
||||
ij_list = ordered.pop(index_update)
|
||||
i = ij_list.pop(0)
|
||||
|
||||
ante_list = []
|
||||
post_list = []
|
||||
tot_counter = 0
|
||||
batch = []
|
||||
for j in ij_list:
|
||||
img1 = labels_con[:,:,i].astype(float)
|
||||
img2 = labels_con[:,:,j].astype(float)
|
||||
img1[img_poly==5] = 2
|
||||
img2[img_poly==5] = 2
|
||||
img1[img_header_and_sep==1] = 3
|
||||
img2[img_header_and_sep==1] = 3
|
||||
|
||||
input_1[len(batch), :, :, 0] = img1 / 3.
|
||||
input_1[len(batch), :, :, 2] = img2 / 3.
|
||||
input_1[len(batch), :, :, 1] = img_poly / 5.
|
||||
|
||||
tot_counter += 1
|
||||
batch.append(j)
|
||||
if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
|
||||
y_pr = self.model_reading_order.predict(input_1 , verbose=0)
|
||||
for jb, j in enumerate(batch):
|
||||
if y_pr[jb][0]>=0.5:
|
||||
post_list.append(j)
|
||||
else:
|
||||
ante_list.append(j)
|
||||
batch = []
|
||||
|
||||
if len(ante_list):
|
||||
ordered.insert(index_update, ante_list)
|
||||
index_update += 1
|
||||
ordered.insert(index_update, [i])
|
||||
if len(post_list):
|
||||
ordered.insert(index_update + 1, post_list)
|
||||
|
||||
index_update = -1
|
||||
for index_next, ij_list in enumerate(ordered):
|
||||
if len(ij_list) > 1:
|
||||
index_update = index_next
|
||||
break
|
||||
|
||||
ordered = [i[0] for i in ordered]
|
||||
|
||||
##id_all_text = np.array(id_all_text)[index_sort]
|
||||
|
||||
|
||||
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
||||
org_contours_indexes = []
|
||||
for ind in range(len(ordered)):
|
||||
region_with_curr_order = ordered[ind]
|
||||
if region_with_curr_order < len(contours_only_dilated):
|
||||
if np.isscalar(indexes_of_located_cont[region_with_curr_order]):
|
||||
org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
|
||||
else:
|
||||
arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order])
|
||||
org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list (
|
||||
else:
|
||||
org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
|
||||
|
||||
region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
|
||||
return org_contours_indexes, region_ids
|
||||
else:
|
||||
region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
|
||||
return ordered, region_ids
|
||||
|
||||
|
||||
|
||||
|
||||
def run(self,
|
||||
overwrite: bool = False,
|
||||
xml_filename: Optional[str] = None,
|
||||
dir_in: Optional[str] = None,
|
||||
dir_out: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Get image and scales, then extract the page of scanned image
|
||||
"""
|
||||
self.logger.debug("enter run")
|
||||
t0_tot = time.time()
|
||||
|
||||
if dir_in:
|
||||
ls_xmls = [os.path.join(dir_in, xml_filename)
|
||||
for xml_filename in filter(is_xml_filename,
|
||||
os.listdir(dir_in))]
|
||||
elif xml_filename:
|
||||
ls_xmls = [xml_filename]
|
||||
else:
|
||||
raise ValueError("run requires either a single image filename or a directory")
|
||||
|
||||
for xml_filename in ls_xmls:
|
||||
self.logger.info(xml_filename)
|
||||
t0 = time.time()
|
||||
|
||||
file_name = Path(xml_filename).stem
|
||||
(tree_xml, root_xml, bb_coord_printspace, id_paragraph, id_header,
|
||||
co_text_paragraph, co_text_header, tot_region_ref,
|
||||
x_len, y_len, index_tot_regions, img_poly) = self.read_xml(xml_filename)
|
||||
|
||||
id_all_text = id_paragraph + id_header
|
||||
|
||||
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(co_text_paragraph, co_text_header, img_poly[:,:,0])
|
||||
|
||||
id_all_text = np.array(id_all_text)[order_text_new]
|
||||
|
||||
alltags=[elem.tag for elem in root_xml.iter()]
|
||||
|
||||
|
||||
|
||||
link=alltags[0].split('}')[0]+'}'
|
||||
name_space = alltags[0].split('}')[0]
|
||||
name_space = name_space.split('{')[1]
|
||||
|
||||
page_element = root_xml.find(link+'Page')
|
||||
|
||||
|
||||
old_ro = root_xml.find(".//{*}ReadingOrder")
|
||||
|
||||
if old_ro is not None:
|
||||
page_element.remove(old_ro)
|
||||
|
||||
#print(old_ro, 'old_ro')
|
||||
ro_subelement = ET.Element('ReadingOrder')
|
||||
|
||||
ro_subelement2 = ET.SubElement(ro_subelement, 'OrderedGroup')
|
||||
ro_subelement2.set('id', "ro357564684568544579089")
|
||||
|
||||
for index, id_text in enumerate(id_all_text):
|
||||
new_element_2 = ET.SubElement(ro_subelement2, 'RegionRefIndexed')
|
||||
new_element_2.set('regionRef', id_all_text[index])
|
||||
new_element_2.set('index', str(index))
|
||||
|
||||
if (link+'PrintSpace' in alltags) or (link+'Border' in alltags):
|
||||
page_element.insert(1, ro_subelement)
|
||||
else:
|
||||
page_element.insert(0, ro_subelement)
|
||||
|
||||
alltags=[elem.tag for elem in root_xml.iter()]
|
||||
|
||||
ET.register_namespace("",name_space)
|
||||
tree_xml.write(os.path.join(dir_out, file_name+'.xml'),
|
||||
xml_declaration=True,
|
||||
method='xml',
|
||||
encoding="utf8",
|
||||
default_namespace=None)
|
||||
|
||||
#sys.exit()
|
||||
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"version": "0.4.0",
|
||||
"version": "0.5.0",
|
||||
"git_url": "https://github.com/qurator-spk/eynollah",
|
||||
"dockerhub": "ocrd/eynollah",
|
||||
"tools": {
|
||||
|
|
@ -82,13 +82,23 @@
|
|||
}
|
||||
},
|
||||
"resources": [
|
||||
{
|
||||
"url": "https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1",
|
||||
"name": "models_layout_v0_5_0",
|
||||
"type": "archive",
|
||||
"path_in_archive": "models_layout_v0_5_0",
|
||||
"size": 3525684179,
|
||||
"description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement",
|
||||
"version_range": ">= v0.5.0"
|
||||
},
|
||||
{
|
||||
"description": "models for eynollah (TensorFlow SavedModel format)",
|
||||
"url": "https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz",
|
||||
"name": "default",
|
||||
"size": 1894627041,
|
||||
"type": "archive",
|
||||
"path_in_archive": "models_eynollah"
|
||||
"path_in_archive": "models_eynollah",
|
||||
"version_range": ">= v0.3.0, < v0.5.0"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from functools import cached_property
|
||||
from typing import Optional
|
||||
from ocrd_models import OcrdPage
|
||||
from ocrd import Processor, OcrdPageResult
|
||||
from ocrd import OcrdPageResultImage, Processor, OcrdPageResult
|
||||
|
||||
from .eynollah import Eynollah, EynollahXmlWriter
|
||||
|
||||
|
|
@ -9,8 +10,8 @@ class EynollahProcessor(Processor):
|
|||
# already employs GPU (without singleton process atm)
|
||||
max_workers = 1
|
||||
|
||||
@property
|
||||
def executable(self):
|
||||
@cached_property
|
||||
def executable(self) -> str:
|
||||
return 'ocrd-eynollah-segment'
|
||||
|
||||
def setup(self) -> None:
|
||||
|
|
@ -20,7 +21,6 @@ class EynollahProcessor(Processor):
|
|||
"and parameter 'light_version' (faster+simpler method for main region detection and deskewing)")
|
||||
self.eynollah = Eynollah(
|
||||
self.resolve_resource(self.parameter['models']),
|
||||
logger=self.logger,
|
||||
allow_enhancement=self.parameter['allow_enhancement'],
|
||||
curved_line=self.parameter['curved_line'],
|
||||
right2left=self.parameter['right_to_left'],
|
||||
|
|
@ -33,6 +33,7 @@ class EynollahProcessor(Processor):
|
|||
headers_off=self.parameter['headers_off'],
|
||||
tables=self.parameter['tables'],
|
||||
)
|
||||
self.eynollah.logger = self.logger
|
||||
self.eynollah.plotter = None
|
||||
|
||||
def shutdown(self):
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import tensorflow as tf
|
|||
from tensorflow.keras.models import load_model
|
||||
from tensorflow.python.keras import backend as tensorflow_backend
|
||||
|
||||
from .utils import is_image_filename
|
||||
|
||||
def resize_image(img_in, input_height, input_width):
|
||||
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
||||
|
|
@ -314,8 +315,8 @@ class SbbBinarizer:
|
|||
prediction_true = prediction_true.astype(np.uint8)
|
||||
return prediction_true[:,:,0]
|
||||
|
||||
def run(self, image=None, image_path=None, save=None, use_patches=False, dir_in=None, dir_out=None):
|
||||
print(dir_in,'dir_in')
|
||||
def run(self, image=None, image_path=None, output=None, use_patches=False, dir_in=None):
|
||||
# print(dir_in,'dir_in')
|
||||
if not dir_in:
|
||||
if (image is not None and image_path is not None) or \
|
||||
(image is None and image_path is None):
|
||||
|
|
@ -343,11 +344,11 @@ class SbbBinarizer:
|
|||
kernel = np.ones((5, 5), np.uint8)
|
||||
img_last[:, :][img_last[:, :] > 0] = 255
|
||||
img_last = (img_last[:, :] == 0) * 255
|
||||
if save:
|
||||
cv2.imwrite(save, img_last)
|
||||
if output:
|
||||
cv2.imwrite(output, img_last)
|
||||
return img_last
|
||||
else:
|
||||
ls_imgs = os.listdir(dir_in)
|
||||
ls_imgs = list(filter(is_image_filename, os.listdir(dir_in)))
|
||||
for image_name in ls_imgs:
|
||||
image_stem = image_name.split('.')[0]
|
||||
print(image_name,'image_name')
|
||||
|
|
@ -374,4 +375,4 @@ class SbbBinarizer:
|
|||
img_last[:, :][img_last[:, :] > 0] = 255
|
||||
img_last = (img_last[:, :] == 0) * 255
|
||||
|
||||
cv2.imwrite(os.path.join(dir_out,image_stem+'.png'), img_last)
|
||||
cv2.imwrite(os.path.join(output, image_stem + '.png'), img_last)
|
||||
|
|
|
|||
|
|
@ -1012,7 +1012,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
|
|||
(regions_model_full[:,:,0]==2)).sum()
|
||||
pixels_main = all_pixels - pixels_header
|
||||
|
||||
if (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ):
|
||||
if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ):
|
||||
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
|
||||
contours_only_text_parent_head.append(contours_only_text_parent[ii])
|
||||
conf_contours_head.append(None) # why not conf_contours[ii], too?
|
||||
|
|
@ -2017,7 +2017,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
|||
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
|
||||
|
||||
ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
|
||||
for column in range(i_s_nc, x_end_biggest_column):
|
||||
for column in range(int(i_s_nc), int(x_end_biggest_column)):
|
||||
ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column]
|
||||
#print('babali2')
|
||||
#print(ind_args_in_col,'ind_args_in_col')
|
||||
|
|
@ -2069,7 +2069,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
|||
x_end_itself=x_end_copy.pop(il)
|
||||
|
||||
#print(y_copy,'y_copy2')
|
||||
for column in range(x_start_itself, x_end_itself+1):
|
||||
for column in range(int(x_start_itself), int(x_end_itself)+1):
|
||||
#print(column,'cols')
|
||||
y_in_cols=[]
|
||||
for yic in range(len(y_copy)):
|
||||
|
|
@ -2198,3 +2198,14 @@ def return_boxes_of_images_by_order_of_reading_new(
|
|||
|
||||
logger.debug('exit return_boxes_of_images_by_order_of_reading_new')
|
||||
return boxes, peaks_neg_tot_tables
|
||||
|
||||
def is_image_filename(fname: str) -> bool:
|
||||
return fname.lower().endswith(('.jpg',
|
||||
'.jpeg',
|
||||
'.png',
|
||||
'.tif',
|
||||
'.tiff',
|
||||
))
|
||||
|
||||
def is_xml_filename(fname: str) -> bool:
|
||||
return fname.lower().endswith('.xml')
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
|||
mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1]))
|
||||
mask_marginals=mask_marginals.astype(np.uint8)
|
||||
|
||||
|
||||
text_with_lines=text_with_lines.astype(np.uint8)
|
||||
##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
|
||||
|
||||
|
|
@ -26,8 +25,12 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
|||
text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1])
|
||||
text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7)
|
||||
text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
|
||||
|
||||
|
||||
|
||||
|
||||
if light_version:
|
||||
kernel_hor = np.ones((1, 5), dtype=np.uint8)
|
||||
text_with_lines = cv2.erode(text_with_lines,kernel_hor,iterations=6)
|
||||
|
||||
text_with_lines_y=text_with_lines.sum(axis=0)
|
||||
text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0)
|
||||
|
||||
|
|
@ -40,8 +43,10 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
|||
elif thickness_along_y_percent>=30 and thickness_along_y_percent<50:
|
||||
min_textline_thickness=20
|
||||
else:
|
||||
min_textline_thickness=40
|
||||
|
||||
if light_version:
|
||||
min_textline_thickness=45
|
||||
else:
|
||||
min_textline_thickness=40
|
||||
|
||||
|
||||
if thickness_along_y_percent>=14:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import numpy as np
|
|||
import cv2
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
from multiprocessing import Process, Queue, cpu_count
|
||||
from multiprocessing import Pool
|
||||
from .rotate import rotate_image
|
||||
from .resize import resize_image
|
||||
from .contour import (
|
||||
|
|
@ -1472,7 +1474,7 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
|
|||
main_page=False, logger=None, plotter=None, map=None):
|
||||
if main_page and plotter:
|
||||
plotter.save_plot_of_textline_density(img_patch_org)
|
||||
|
||||
|
||||
img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
|
||||
img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
|
||||
|
||||
|
|
@ -1493,7 +1495,10 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
|
|||
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
|
||||
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
elif main_page:
|
||||
angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
|
||||
#angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
|
||||
angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4),
|
||||
np.linspace(-6, 6, n_tot_angles // 2),
|
||||
np.linspace(7, 12, n_tot_angles // 4)))
|
||||
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
||||
|
||||
early_slope_edge=11
|
||||
|
|
|
|||
488
src/eynollah/utils/utils_ocr.py
Normal file
488
src/eynollah/utils/utils_ocr.py
Normal file
|
|
@ -0,0 +1,488 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
import tensorflow as tf
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
import math
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from Bio import pairwise2
|
||||
from .resize import resize_image
|
||||
|
||||
def decode_batch_predictions(pred, num_to_char, max_len = 128):
|
||||
# input_len is the product of the batch size and the
|
||||
# number of time steps.
|
||||
input_len = np.ones(pred.shape[0]) * pred.shape[1]
|
||||
|
||||
# Decode CTC predictions using greedy search.
|
||||
# decoded is a tuple with 2 elements.
|
||||
decoded = tf.keras.backend.ctc_decode(pred,
|
||||
input_length = input_len,
|
||||
beam_width = 100)
|
||||
# The outputs are in the first element of the tuple.
|
||||
# Additionally, the first element is actually a list,
|
||||
# therefore we take the first element of that list as well.
|
||||
#print(decoded,'decoded')
|
||||
decoded = decoded[0][0][:, :max_len]
|
||||
|
||||
#print(decoded, decoded.shape,'decoded')
|
||||
|
||||
output = []
|
||||
for d in decoded:
|
||||
# Convert the predicted indices to the corresponding chars.
|
||||
d = tf.strings.reduce_join(num_to_char(d))
|
||||
d = d.numpy().decode("utf-8")
|
||||
output.append(d)
|
||||
return output
|
||||
|
||||
|
||||
def distortion_free_resize(image, img_size):
|
||||
w, h = img_size
|
||||
image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
|
||||
|
||||
# Check tha amount of padding needed to be done.
|
||||
pad_height = h - tf.shape(image)[0]
|
||||
pad_width = w - tf.shape(image)[1]
|
||||
|
||||
# Only necessary if you want to do same amount of padding on both sides.
|
||||
if pad_height % 2 != 0:
|
||||
height = pad_height // 2
|
||||
pad_height_top = height + 1
|
||||
pad_height_bottom = height
|
||||
else:
|
||||
pad_height_top = pad_height_bottom = pad_height // 2
|
||||
|
||||
if pad_width % 2 != 0:
|
||||
width = pad_width // 2
|
||||
pad_width_left = width + 1
|
||||
pad_width_right = width
|
||||
else:
|
||||
pad_width_left = pad_width_right = pad_width // 2
|
||||
|
||||
image = tf.pad(
|
||||
image,
|
||||
paddings=[
|
||||
[pad_height_top, pad_height_bottom],
|
||||
[pad_width_left, pad_width_right],
|
||||
[0, 0],
|
||||
],
|
||||
)
|
||||
|
||||
image = tf.transpose(image, (1, 0, 2))
|
||||
image = tf.image.flip_left_right(image)
|
||||
return image
|
||||
|
||||
def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
|
||||
width = np.shape(textline_image)[1]
|
||||
height = np.shape(textline_image)[0]
|
||||
common_window = int(0.06*width)
|
||||
|
||||
width1 = int ( width/2. - common_window )
|
||||
width2 = int ( width/2. + common_window )
|
||||
|
||||
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
||||
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
||||
|
||||
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
||||
if len(peaks_real)>70:
|
||||
|
||||
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
||||
|
||||
arg_max = np.argmax(sum_smoothed[peaks_real])
|
||||
peaks_final = peaks_real[arg_max]
|
||||
return peaks_final
|
||||
else:
|
||||
return None
|
||||
# Function to fit text inside the given area
|
||||
def fit_text_single_line(draw, text, font_path, max_width, max_height):
|
||||
initial_font_size = 50
|
||||
font_size = initial_font_size
|
||||
while font_size > 10: # Minimum font size
|
||||
font = ImageFont.truetype(font_path, font_size)
|
||||
text_bbox = draw.textbbox((0, 0), text, font=font) # Get text bounding box
|
||||
text_width = text_bbox[2] - text_bbox[0]
|
||||
text_height = text_bbox[3] - text_bbox[1]
|
||||
|
||||
if text_width <= max_width and text_height <= max_height:
|
||||
return font # Return the best-fitting font
|
||||
|
||||
font_size -= 2 # Reduce font size and retry
|
||||
|
||||
return ImageFont.truetype(font_path, 10) # Smallest font fallback
|
||||
|
||||
def return_textlines_split_if_needed(textline_image, textline_image_bin=None):
|
||||
|
||||
split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
|
||||
if split_point:
|
||||
image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
|
||||
image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
|
||||
if textline_image_bin is not None:
|
||||
image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height))
|
||||
image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height))
|
||||
return [image1, image2], [image1_bin, image2_bin]
|
||||
else:
|
||||
return [image1, image2], None
|
||||
else:
|
||||
return None, None
|
||||
def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
|
||||
if img.shape[0]==0 or img.shape[1]==0:
|
||||
img_fin = np.ones((image_height, image_width, 3))
|
||||
else:
|
||||
ratio = image_height /float(img.shape[0])
|
||||
w_ratio = int(ratio * img.shape[1])
|
||||
|
||||
if w_ratio <= image_width:
|
||||
width_new = w_ratio
|
||||
else:
|
||||
width_new = image_width
|
||||
|
||||
if width_new == 0:
|
||||
width_new = img.shape[1]
|
||||
|
||||
|
||||
img = resize_image(img, image_height, width_new)
|
||||
img_fin = np.ones((image_height, image_width, 3))*255
|
||||
|
||||
img_fin[:,:width_new,:] = img[:,:,:]
|
||||
img_fin = img_fin / 255.
|
||||
return img_fin
|
||||
|
||||
def get_deskewed_contour_and_bb_and_image(contour, image, deskew_angle):
|
||||
(h_in, w_in) = image.shape[:2]
|
||||
center = (w_in // 2, h_in // 2)
|
||||
|
||||
rotation_matrix = cv2.getRotationMatrix2D(center, deskew_angle, 1.0)
|
||||
|
||||
cos_angle = abs(rotation_matrix[0, 0])
|
||||
sin_angle = abs(rotation_matrix[0, 1])
|
||||
new_w = int((h_in * sin_angle) + (w_in * cos_angle))
|
||||
new_h = int((h_in * cos_angle) + (w_in * sin_angle))
|
||||
|
||||
rotation_matrix[0, 2] += (new_w / 2) - center[0]
|
||||
rotation_matrix[1, 2] += (new_h / 2) - center[1]
|
||||
|
||||
deskewed_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h))
|
||||
|
||||
contour_points = np.array(contour, dtype=np.float32)
|
||||
transformed_points = cv2.transform(np.array([contour_points]), rotation_matrix)[0]
|
||||
|
||||
x, y, w, h = cv2.boundingRect(np.array(transformed_points, dtype=np.int32))
|
||||
cropped_textline = deskewed_image[y:y+h, x:x+w]
|
||||
|
||||
return cropped_textline
|
||||
|
||||
def rotate_image_with_padding(image, angle, border_value=(0,0,0)):
|
||||
# Get image dimensions
|
||||
(h, w) = image.shape[:2]
|
||||
|
||||
# Calculate the center of the image
|
||||
center = (w // 2, h // 2)
|
||||
|
||||
# Get the rotation matrix
|
||||
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||
|
||||
# Compute the new bounding dimensions
|
||||
cos = abs(rotation_matrix[0, 0])
|
||||
sin = abs(rotation_matrix[0, 1])
|
||||
new_w = int((h * sin) + (w * cos))
|
||||
new_h = int((h * cos) + (w * sin))
|
||||
|
||||
# Adjust the rotation matrix to account for translation
|
||||
rotation_matrix[0, 2] += (new_w / 2) - center[0]
|
||||
rotation_matrix[1, 2] += (new_h / 2) - center[1]
|
||||
|
||||
# Perform the rotation
|
||||
try:
|
||||
rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
|
||||
except:
|
||||
rotated_image = np.copy(image)
|
||||
|
||||
return rotated_image
|
||||
|
||||
def get_orientation_moments(contour):
|
||||
moments = cv2.moments(contour)
|
||||
if moments["mu20"] - moments["mu02"] == 0: # Avoid division by zero
|
||||
return 90 if moments["mu11"] > 0 else -90
|
||||
else:
|
||||
angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
|
||||
return np.degrees(angle) # Convert radians to degrees
|
||||
|
||||
|
||||
def get_orientation_moments_of_mask(mask):
|
||||
mask=mask.astype('uint8')
|
||||
contours, _ = cv2.findContours(mask[:,:,0], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
largest_contour = max(contours, key=cv2.contourArea) if contours else None
|
||||
|
||||
moments = cv2.moments(largest_contour)
|
||||
if moments["mu20"] - moments["mu02"] == 0: # Avoid division by zero
|
||||
return 90 if moments["mu11"] > 0 else -90
|
||||
else:
|
||||
angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
|
||||
return np.degrees(angle) # Convert radians to degrees
|
||||
|
||||
def get_contours_and_bounding_boxes(mask):
|
||||
# Find contours in the binary mask
|
||||
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
largest_contour = max(contours, key=cv2.contourArea) if contours else None
|
||||
|
||||
# Get the bounding rectangle for the contour
|
||||
x, y, w, h = cv2.boundingRect(largest_contour)
|
||||
#bounding_boxes.append((x, y, w, h))
|
||||
|
||||
return x, y, w, h
|
||||
|
||||
def return_splitting_point_of_image(image_to_spliited):
|
||||
width = np.shape(image_to_spliited)[1]
|
||||
height = np.shape(image_to_spliited)[0]
|
||||
common_window = int(0.03*width)
|
||||
|
||||
width1 = int ( common_window)
|
||||
width2 = int ( width - common_window )
|
||||
|
||||
img_sum = np.sum(image_to_spliited[:,:,0], axis=0)
|
||||
sum_smoothed = gaussian_filter1d(img_sum, 1)
|
||||
|
||||
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
||||
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
||||
|
||||
arg_sort = np.argsort(sum_smoothed[peaks_real])
|
||||
peaks_sort_4 = peaks_real[arg_sort][::-1][:3]
|
||||
|
||||
return np.sort(peaks_sort_4)
|
||||
|
||||
def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved, img_bin_curved=None):
|
||||
peaks_4 = return_splitting_point_of_image(img_curved)
|
||||
if len(peaks_4)>0:
|
||||
imgs_tot = []
|
||||
|
||||
for ind in range(len(peaks_4)+1):
|
||||
if ind==0:
|
||||
img = img_curved[:, :peaks_4[ind], :]
|
||||
if img_bin_curved is not None:
|
||||
img_bin = img_bin_curved[:, :peaks_4[ind], :]
|
||||
mask = mask_curved[:, :peaks_4[ind], :]
|
||||
elif ind==len(peaks_4):
|
||||
img = img_curved[:, peaks_4[ind-1]:, :]
|
||||
if img_bin_curved is not None:
|
||||
img_bin = img_bin_curved[:, peaks_4[ind-1]:, :]
|
||||
mask = mask_curved[:, peaks_4[ind-1]:, :]
|
||||
else:
|
||||
img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
|
||||
if img_bin_curved is not None:
|
||||
img_bin = img_bin_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
|
||||
mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
|
||||
|
||||
or_ma = get_orientation_moments_of_mask(mask)
|
||||
|
||||
if img_bin_curved is not None:
|
||||
imgs_tot.append([img, mask, or_ma, img_bin] )
|
||||
else:
|
||||
imgs_tot.append([img, mask, or_ma] )
|
||||
|
||||
|
||||
w_tot_des_list = []
|
||||
w_tot_des = 0
|
||||
imgs_deskewed_list = []
|
||||
imgs_bin_deskewed_list = []
|
||||
|
||||
for ind in range(len(imgs_tot)):
|
||||
img_in = imgs_tot[ind][0]
|
||||
mask_in = imgs_tot[ind][1]
|
||||
ori_in = imgs_tot[ind][2]
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in = imgs_tot[ind][3]
|
||||
|
||||
if abs(ori_in)<45:
|
||||
img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in_des = rotate_image_with_padding(img_bin_in, ori_in, border_value=(255,255,255) )
|
||||
mask_in_des = rotate_image_with_padding(mask_in, ori_in)
|
||||
mask_in_des = mask_in_des.astype('uint8')
|
||||
|
||||
#new bounding box
|
||||
x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
|
||||
|
||||
if w_n==0 or h_n==0:
|
||||
img_in_des = np.copy(img_in)
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in_des = np.copy(img_bin_in)
|
||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||
if w_relative==0:
|
||||
w_relative = img_in_des.shape[1]
|
||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||
else:
|
||||
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
|
||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||
if w_relative==0:
|
||||
w_relative = img_in_des.shape[1]
|
||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||
|
||||
|
||||
else:
|
||||
img_in_des = np.copy(img_in)
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in_des = np.copy(img_bin_in)
|
||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||
if w_relative==0:
|
||||
w_relative = img_in_des.shape[1]
|
||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||
if img_bin_curved is not None:
|
||||
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||
|
||||
w_tot_des+=img_in_des.shape[1]
|
||||
w_tot_des_list.append(img_in_des.shape[1])
|
||||
imgs_deskewed_list.append(img_in_des)
|
||||
if img_bin_curved is not None:
|
||||
imgs_bin_deskewed_list.append(img_bin_in_des)
|
||||
|
||||
|
||||
|
||||
|
||||
img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
|
||||
if img_bin_curved is not None:
|
||||
img_bin_final_deskewed = np.zeros((32, w_tot_des, 3))+255
|
||||
else:
|
||||
img_bin_final_deskewed = None
|
||||
|
||||
w_indexer = 0
|
||||
for ind in range(len(w_tot_des_list)):
|
||||
img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
|
||||
if img_bin_curved is not None:
|
||||
img_bin_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_bin_deskewed_list[ind][:,:,:]
|
||||
w_indexer = w_indexer+w_tot_des_list[ind]
|
||||
return img_final_deskewed, img_bin_final_deskewed
|
||||
else:
|
||||
return img_curved, img_bin_curved
|
||||
|
||||
def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind):
|
||||
textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
|
||||
textline_contour[:,1] = textline_contour[:,1] + box_ind[0]
|
||||
return textline_contour
|
||||
|
||||
|
||||
def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False):
|
||||
max_len = 512
|
||||
padding_token = 299
|
||||
image_width = 512#max_len * 4
|
||||
image_height = 32
|
||||
ind_tot = 0
|
||||
#cv2.imwrite('./img_out.png', image_page)
|
||||
ocr_all_textlines = []
|
||||
cropped_lines_region_indexer = []
|
||||
cropped_lines_meging_indexing = []
|
||||
cropped_lines = []
|
||||
indexer_text_region = 0
|
||||
|
||||
for indexing, ind_poly_first in enumerate(all_found_textline_polygons):
|
||||
#ocr_textline_in_textregion = []
|
||||
if len(ind_poly_first)==0:
|
||||
cropped_lines_region_indexer.append(indexer_text_region)
|
||||
cropped_lines_meging_indexing.append(0)
|
||||
img_fin = np.ones((image_height, image_width, 3))*1
|
||||
cropped_lines.append(img_fin)
|
||||
|
||||
else:
|
||||
for indexing2, ind_poly in enumerate(ind_poly_first):
|
||||
cropped_lines_region_indexer.append(indexer_text_region)
|
||||
if not (textline_light or curved_line):
|
||||
ind_poly = copy.deepcopy(ind_poly)
|
||||
box_ind = all_box_coord[indexing]
|
||||
|
||||
ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
|
||||
#print(ind_poly_copy)
|
||||
ind_poly[ind_poly<0] = 0
|
||||
x, y, w, h = cv2.boundingRect(ind_poly)
|
||||
|
||||
w_scaled = w * image_height/float(h)
|
||||
|
||||
mask_poly = np.zeros(image.shape)
|
||||
|
||||
img_poly_on_img = np.copy(image)
|
||||
|
||||
mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1))
|
||||
|
||||
|
||||
|
||||
mask_poly = mask_poly[y:y+h, x:x+w, :]
|
||||
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
|
||||
|
||||
img_crop[mask_poly==0] = 255
|
||||
|
||||
if w_scaled < 640:#1.5*image_width:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(0)
|
||||
else:
|
||||
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
|
||||
|
||||
if splited_images:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(1)
|
||||
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
|
||||
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(-1)
|
||||
|
||||
else:
|
||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
||||
cropped_lines.append(img_fin)
|
||||
cropped_lines_meging_indexing.append(0)
|
||||
|
||||
indexer_text_region+=1
|
||||
|
||||
extracted_texts = []
|
||||
|
||||
n_iterations = math.ceil(len(cropped_lines) / b_s_ocr)
|
||||
|
||||
for i in range(n_iterations):
|
||||
if i==(n_iterations-1):
|
||||
n_start = i*b_s_ocr
|
||||
imgs = cropped_lines[n_start:]
|
||||
imgs = np.array(imgs)
|
||||
imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
|
||||
|
||||
|
||||
else:
|
||||
n_start = i*b_s_ocr
|
||||
n_end = (i+1)*b_s_ocr
|
||||
imgs = cropped_lines[n_start:n_end]
|
||||
imgs = np.array(imgs).reshape(b_s_ocr, image_height, image_width, 3)
|
||||
|
||||
|
||||
preds = prediction_model.predict(imgs, verbose=0)
|
||||
|
||||
pred_texts = decode_batch_predictions(preds, num_to_char)
|
||||
|
||||
for ib in range(imgs.shape[0]):
|
||||
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
|
||||
extracted_texts.append(pred_texts_ib)
|
||||
|
||||
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
|
||||
|
||||
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
||||
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
||||
|
||||
ocr_all_textlines = []
|
||||
for ind in unique_cropped_lines_region_indexer:
|
||||
ocr_textline_in_textregion = []
|
||||
extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
|
||||
for it_ind, text_textline in enumerate(extracted_texts_merged_un):
|
||||
ocr_textline_in_textregion.append(text_textline)
|
||||
ocr_all_textlines.append(ocr_textline_in_textregion)
|
||||
return ocr_all_textlines
|
||||
|
||||
def biopython_align(str1, str2):
|
||||
alignments = pairwise2.align.globalms(str1, str2, 2, -1, -2, -2)
|
||||
best_alignment = alignments[0] # Get the best alignment
|
||||
return best_alignment.seqA, best_alignment.seqB
|
||||
|
|
@ -46,16 +46,22 @@ def create_page_xml(imageFilename, height, width):
|
|||
))
|
||||
return pcgts
|
||||
|
||||
def xml_reading_order(page, order_of_texts, id_of_marginalia):
|
||||
def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right):
|
||||
region_order = ReadingOrderType()
|
||||
og = OrderedGroupType(id="ro357564684568544579089")
|
||||
page.set_ReadingOrder(region_order)
|
||||
region_order.set_OrderedGroup(og)
|
||||
region_counter = EynollahIdCounter()
|
||||
|
||||
for id_marginal in id_of_marginalia_left:
|
||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
||||
region_counter.inc('region')
|
||||
|
||||
for idx_textregion, _ in enumerate(order_of_texts):
|
||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(order_of_texts[idx_textregion] + 1)))
|
||||
region_counter.inc('region')
|
||||
for id_marginal in id_of_marginalia:
|
||||
|
||||
for id_marginal in id_of_marginalia_right:
|
||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
||||
region_counter.inc('region')
|
||||
|
||||
|
|
|
|||
|
|
@ -56,10 +56,12 @@ class EynollahXmlWriter():
|
|||
points_page_print = points_page_print + ' '
|
||||
return points_page_print[:-1]
|
||||
|
||||
def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter):
|
||||
def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_all_textlines_textregion):
|
||||
for j in range(len(all_found_textline_polygons_marginals[marginal_idx])):
|
||||
coords = CoordsType()
|
||||
textline = TextLineType(id=counter.next_line_id, Coords=coords)
|
||||
if ocr_all_textlines_textregion:
|
||||
textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] )
|
||||
marginal_region.add_TextLine(textline)
|
||||
marginal_region.set_orientation(-slopes_marginals[marginal_idx])
|
||||
points_co = ''
|
||||
|
|
@ -119,7 +121,7 @@ class EynollahXmlWriter():
|
|||
points_co += ','
|
||||
points_co += str(textline_y_coord)
|
||||
|
||||
if (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) <= 45:
|
||||
if self.textline_light or (self.curved_line and np.abs(slopes[region_idx]) <= 45):
|
||||
if len(contour_textline) == 2:
|
||||
points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
|
|
@ -128,7 +130,7 @@ class EynollahXmlWriter():
|
|||
points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y))
|
||||
elif (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) > 45:
|
||||
elif self.curved_line and np.abs(slopes[region_idx]) > 45:
|
||||
if len(contour_textline)==2:
|
||||
points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x))
|
||||
points_co += ','
|
||||
|
|
@ -168,7 +170,7 @@ class EynollahXmlWriter():
|
|||
with open(self.output_filename, 'w') as f:
|
||||
f.write(to_xml(pcgts))
|
||||
|
||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion):
|
||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False):
|
||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||
|
||||
# create the file structure
|
||||
|
|
@ -179,12 +181,13 @@ class EynollahXmlWriter():
|
|||
counter = EynollahIdCounter()
|
||||
if len(found_polygons_text_region) > 0:
|
||||
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
|
||||
id_of_marginalia = [_counter_marginals.next_region_id for _ in found_polygons_marginals]
|
||||
xml_reading_order(page, order_of_texts, id_of_marginalia)
|
||||
id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left]
|
||||
id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right]
|
||||
xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
|
||||
|
||||
for mm in range(len(found_polygons_text_region)):
|
||||
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]),
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
|
||||
)
|
||||
#textregion.set_conf(conf_contours_textregion[mm])
|
||||
page.add_TextRegion(textregion)
|
||||
|
|
@ -193,12 +196,29 @@ class EynollahXmlWriter():
|
|||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals)):
|
||||
|
||||
for mm in range(len(found_polygons_marginals_left)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
|
||||
if ocr_all_textlines_marginals_left:
|
||||
ocr_textlines = ocr_all_textlines_marginals_left[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
|
||||
#print(ocr_textlines, mm, len(all_found_textline_polygons_marginals_left[mm]) )
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals_right)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_right:
|
||||
ocr_textlines = ocr_all_textlines_marginals_right[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_text_region_img)):
|
||||
img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType())
|
||||
|
|
@ -242,7 +262,7 @@ class EynollahXmlWriter():
|
|||
|
||||
return pcgts
|
||||
|
||||
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines, conf_contours_textregion, conf_contours_textregion_h):
|
||||
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None):
|
||||
self.logger.debug('enter build_pagexml_full_layout')
|
||||
|
||||
# create the file structure
|
||||
|
|
@ -252,8 +272,9 @@ class EynollahXmlWriter():
|
|||
|
||||
counter = EynollahIdCounter()
|
||||
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
|
||||
id_of_marginalia = [_counter_marginals.next_region_id for _ in found_polygons_marginals]
|
||||
xml_reading_order(page, order_of_texts, id_of_marginalia)
|
||||
id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left]
|
||||
id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right]
|
||||
xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
|
||||
|
||||
for mm in range(len(found_polygons_text_region)):
|
||||
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
||||
|
|
@ -272,25 +293,43 @@ class EynollahXmlWriter():
|
|||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
|
||||
page.add_TextRegion(textregion)
|
||||
|
||||
if ocr_all_textlines:
|
||||
ocr_textlines = ocr_all_textlines[mm]
|
||||
if ocr_all_textlines_h:
|
||||
ocr_textlines = ocr_all_textlines_h[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals)):
|
||||
for mm in range(len(found_polygons_marginals_left)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
|
||||
|
||||
if ocr_all_textlines_marginals_left:
|
||||
ocr_textlines = ocr_all_textlines_marginals_left[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_marginals_right)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_right:
|
||||
ocr_textlines = ocr_all_textlines_marginals_right[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_drop_capitals)):
|
||||
dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))
|
||||
page.add_TextRegion(dropcapital)
|
||||
###all_box_coord_drop = None
|
||||
###slopes_drop = None
|
||||
###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
|
||||
all_box_coord_drop = None
|
||||
slopes_drop = None
|
||||
if ocr_all_textlines_drop:
|
||||
ocr_textlines = ocr_all_textlines_drop[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=ocr_textlines)
|
||||
|
||||
for mm in range(len(found_polygons_text_region_img)):
|
||||
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
|
||||
|
|
@ -303,18 +342,28 @@ class EynollahXmlWriter():
|
|||
|
||||
return pcgts
|
||||
|
||||
def calculate_polygon_coords(self, contour, page_coord):
|
||||
def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
|
||||
self.logger.debug('enter calculate_polygon_coords')
|
||||
coords = ''
|
||||
for value_bbox in contour:
|
||||
if len(value_bbox) == 2:
|
||||
coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
|
||||
if skip_layout_reading_order:
|
||||
if len(value_bbox) == 2:
|
||||
coords += str(int((value_bbox[0]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[1]) / self.scale_y))
|
||||
else:
|
||||
coords += str(int((value_bbox[0][0]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[0][1]) / self.scale_y))
|
||||
else:
|
||||
coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
|
||||
if len(value_bbox) == 2:
|
||||
coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
|
||||
coords += ','
|
||||
coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
|
||||
coords=coords + ' '
|
||||
return coords[:-1]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue