mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-01 08:44:13 +01:00
factor out extract_only_images as eynollah extract-images
This commit is contained in:
parent
83e8b289da
commit
177d555ded
7 changed files with 471 additions and 199 deletions
|
|
@ -109,7 +109,6 @@ The following options can be used to further configure the processing:
|
||||||
| `-cl` | apply contour detection for curved text lines instead of bounding boxes |
|
| `-cl` | apply contour detection for curved text lines instead of bounding boxes |
|
||||||
| `-ib` | apply binarization (the resulting image is saved to the output directory) |
|
| `-ib` | apply binarization (the resulting image is saved to the output directory) |
|
||||||
| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) |
|
| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) |
|
||||||
| `-eoi` | extract only images to output directory (other processing will not be done) |
|
|
||||||
| `-ho` | ignore headers for reading order dectection |
|
| `-ho` | ignore headers for reading order dectection |
|
||||||
| `-si <directory>` | save image regions detected to this directory |
|
| `-si <directory>` | save image regions detected to this directory |
|
||||||
| `-sd <directory>` | save deskewed image to this directory |
|
| `-sd <directory>` | save deskewed image to this directory |
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
# NOTE: For predictable order of imports of torch/shapely/tensorflow
|
# NOTE: For predictable order of imports of torch/shapely/tensorflow
|
||||||
# this must be the first import of the CLI!
|
# this must be the first import of the CLI!
|
||||||
from ..eynollah_imports import imported_libs
|
from ..eynollah_imports import imported_libs
|
||||||
|
|
||||||
from .cli_models import models_cli
|
from .cli_models import models_cli
|
||||||
from .cli_binarize import binarize_cli
|
from .cli_binarize import binarize_cli
|
||||||
|
|
||||||
from .cli import main
|
from .cli import main
|
||||||
from .cli_binarize import binarize_cli
|
from .cli_binarize import binarize_cli
|
||||||
from .cli_enhance import enhance_cli
|
from .cli_enhance import enhance_cli
|
||||||
|
from .cli_extract_images import extract_images_cli
|
||||||
from .cli_layout import layout_cli
|
from .cli_layout import layout_cli
|
||||||
from .cli_ocr import ocr_cli
|
from .cli_ocr import ocr_cli
|
||||||
from .cli_readingorder import readingorder_cli
|
from .cli_readingorder import readingorder_cli
|
||||||
|
|
@ -17,5 +19,4 @@ main.add_command(layout_cli, 'layout')
|
||||||
main.add_command(readingorder_cli, 'machine-based-reading-order')
|
main.add_command(readingorder_cli, 'machine-based-reading-order')
|
||||||
main.add_command(models_cli, 'models')
|
main.add_command(models_cli, 'models')
|
||||||
main.add_command(ocr_cli, 'ocr')
|
main.add_command(ocr_cli, 'ocr')
|
||||||
|
main.add_command(extract_images_cli, 'extract-images')
|
||||||
|
|
||||||
|
|
|
||||||
167
src/eynollah/cli/cli_extract_images.py
Normal file
167
src/eynollah/cli/cli_extract_images.py
Normal file
|
|
@ -0,0 +1,167 @@
|
||||||
|
import click
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option(
|
||||||
|
"--image",
|
||||||
|
"-i",
|
||||||
|
help="input image filename",
|
||||||
|
type=click.Path(exists=True, dir_okay=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
@click.option(
|
||||||
|
"--out",
|
||||||
|
"-o",
|
||||||
|
help="directory for output PAGE-XML files",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--overwrite",
|
||||||
|
"-O",
|
||||||
|
help="overwrite (instead of skipping) if output xml exists",
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--dir_in",
|
||||||
|
"-di",
|
||||||
|
help="directory of input images (instead of --image)",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--save_images",
|
||||||
|
"-si",
|
||||||
|
help="if a directory is given, images in documents will be cropped and saved there",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--save_layout",
|
||||||
|
"-sl",
|
||||||
|
help="if a directory is given, plot of layout will be saved there",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--save_deskewed",
|
||||||
|
"-sd",
|
||||||
|
help="if a directory is given, deskewed image will be saved there",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--save_all",
|
||||||
|
"-sa",
|
||||||
|
help="if a directory is given, all plots needed for documentation will be saved there",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--save_page",
|
||||||
|
"-sp",
|
||||||
|
help="if a directory is given, page crop of image will be saved there",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--enable-plotting/--disable-plotting",
|
||||||
|
"-ep/-noep",
|
||||||
|
is_flag=True,
|
||||||
|
help="If set, will plot intermediary files and images",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--input_binary/--input-RGB",
|
||||||
|
"-ib/-irgb",
|
||||||
|
is_flag=True,
|
||||||
|
help="In general, eynollah uses RGB as input but if the input document is very dark, very bright or for any other reason you can turn on input binarization. When this flag is set, eynollah will binarize the RGB input document, you should always provide RGB images to eynollah.",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--ignore_page_extraction/--extract_page_included",
|
||||||
|
"-ipe/-epi",
|
||||||
|
is_flag=True,
|
||||||
|
help="if this parameter set to true, this tool would ignore page extraction",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--reading_order_machine_based/--heuristic_reading_order",
|
||||||
|
"-romb/-hro",
|
||||||
|
is_flag=True,
|
||||||
|
help="if this parameter set to true, this tool would apply machine based reading order detection",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--num_col_upper",
|
||||||
|
"-ncu",
|
||||||
|
help="lower limit of columns in document image",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--num_col_lower",
|
||||||
|
"-ncl",
|
||||||
|
help="upper limit of columns in document image",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--threshold_art_class_layout",
|
||||||
|
"-tharl",
|
||||||
|
help="threshold of artifical class in the case of layout detection. The default value is 0.1",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--threshold_art_class_textline",
|
||||||
|
"-thart",
|
||||||
|
help="threshold of artifical class in the case of textline detection. The default value is 0.1",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--skip_layout_and_reading_order",
|
||||||
|
"-slro/-noslro",
|
||||||
|
is_flag=True,
|
||||||
|
help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.",
|
||||||
|
)
|
||||||
|
@click.pass_context
|
||||||
|
def extract_images_cli(
|
||||||
|
ctx,
|
||||||
|
image,
|
||||||
|
out,
|
||||||
|
overwrite,
|
||||||
|
dir_in,
|
||||||
|
save_images,
|
||||||
|
save_layout,
|
||||||
|
save_deskewed,
|
||||||
|
save_all,
|
||||||
|
save_page,
|
||||||
|
enable_plotting,
|
||||||
|
input_binary,
|
||||||
|
reading_order_machine_based,
|
||||||
|
num_col_upper,
|
||||||
|
num_col_lower,
|
||||||
|
threshold_art_class_textline,
|
||||||
|
threshold_art_class_layout,
|
||||||
|
skip_layout_and_reading_order,
|
||||||
|
ignore_page_extraction,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Detect Layout (with optional image enhancement and reading order detection)
|
||||||
|
"""
|
||||||
|
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
|
||||||
|
assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep"
|
||||||
|
assert enable_plotting or not save_all, "Plotting with -sa also requires -ep"
|
||||||
|
assert enable_plotting or not save_page, "Plotting with -sp also requires -ep"
|
||||||
|
assert enable_plotting or not save_images, "Plotting with -si also requires -ep"
|
||||||
|
assert not enable_plotting or save_layout or save_deskewed or save_all or save_page or save_images, \
|
||||||
|
"Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae"
|
||||||
|
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||||
|
|
||||||
|
from ..extract_images import EynollahImageExtractor
|
||||||
|
extractor = EynollahImageExtractor(
|
||||||
|
model_zoo=ctx.obj.model_zoo,
|
||||||
|
enable_plotting=enable_plotting,
|
||||||
|
input_binary=input_binary,
|
||||||
|
ignore_page_extraction=ignore_page_extraction,
|
||||||
|
reading_order_machine_based=reading_order_machine_based,
|
||||||
|
num_col_upper=num_col_upper,
|
||||||
|
num_col_lower=num_col_lower,
|
||||||
|
skip_layout_and_reading_order=skip_layout_and_reading_order,
|
||||||
|
threshold_art_class_textline=threshold_art_class_textline,
|
||||||
|
threshold_art_class_layout=threshold_art_class_layout,
|
||||||
|
)
|
||||||
|
extractor.run(overwrite=overwrite,
|
||||||
|
image_filename=image,
|
||||||
|
dir_in=dir_in,
|
||||||
|
dir_out=out,
|
||||||
|
dir_of_cropped_images=save_images,
|
||||||
|
dir_of_layout=save_layout,
|
||||||
|
dir_of_deskewed=save_deskewed,
|
||||||
|
dir_of_all=save_all,
|
||||||
|
dir_save_page=save_page,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
@ -63,12 +63,6 @@ import click
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="If set, will plot intermediary files and images",
|
help="If set, will plot intermediary files and images",
|
||||||
)
|
)
|
||||||
@click.option(
|
|
||||||
"--extract_only_images/--disable-extracting_only_images",
|
|
||||||
"-eoi/-noeoi",
|
|
||||||
is_flag=True,
|
|
||||||
help="If a directory is given, only images in documents will be cropped and saved there and the other processing will not be done",
|
|
||||||
)
|
|
||||||
@click.option(
|
@click.option(
|
||||||
"--allow-enhancement/--no-allow-enhancement",
|
"--allow-enhancement/--no-allow-enhancement",
|
||||||
"-ae/-noae",
|
"-ae/-noae",
|
||||||
|
|
@ -166,7 +160,6 @@ def layout_cli(
|
||||||
save_layout,
|
save_layout,
|
||||||
save_deskewed,
|
save_deskewed,
|
||||||
save_all,
|
save_all,
|
||||||
extract_only_images,
|
|
||||||
save_page,
|
save_page,
|
||||||
enable_plotting,
|
enable_plotting,
|
||||||
allow_enhancement,
|
allow_enhancement,
|
||||||
|
|
@ -197,17 +190,9 @@ def layout_cli(
|
||||||
assert enable_plotting or not allow_enhancement, "Plotting with -ae also requires -ep"
|
assert enable_plotting or not allow_enhancement, "Plotting with -ae also requires -ep"
|
||||||
assert not enable_plotting or save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement, \
|
assert not enable_plotting or save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement, \
|
||||||
"Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae"
|
"Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae"
|
||||||
assert not extract_only_images or not allow_enhancement, "Image extraction -eoi can not be set alongside allow_enhancement -ae"
|
|
||||||
assert not extract_only_images or not allow_scaling, "Image extraction -eoi can not be set alongside allow_scaling -as"
|
|
||||||
assert not extract_only_images or not curved_line, "Image extraction -eoi can not be set alongside curved_line -cl"
|
|
||||||
assert not extract_only_images or not full_layout, "Image extraction -eoi can not be set alongside full_layout -fl"
|
|
||||||
assert not extract_only_images or not tables, "Image extraction -eoi can not be set alongside tables -tab"
|
|
||||||
assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l"
|
|
||||||
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
|
|
||||||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||||
eynollah = Eynollah(
|
eynollah = Eynollah(
|
||||||
model_zoo=ctx.obj.model_zoo,
|
model_zoo=ctx.obj.model_zoo,
|
||||||
extract_only_images=extract_only_images,
|
|
||||||
enable_plotting=enable_plotting,
|
enable_plotting=enable_plotting,
|
||||||
allow_enhancement=allow_enhancement,
|
allow_enhancement=allow_enhancement,
|
||||||
curved_line=curved_line,
|
curved_line=curved_line,
|
||||||
|
|
|
||||||
272
src/eynollah/extract_images.py
Normal file
272
src/eynollah/extract_images.py
Normal file
|
|
@ -0,0 +1,272 @@
|
||||||
|
"""
|
||||||
|
extract images?
|
||||||
|
"""
|
||||||
|
|
||||||
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
import logging
|
||||||
|
from multiprocessing import cpu_count
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
from pathlib import Path
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
from eynollah.utils.contour import filter_contours_area_of_image, return_contours_of_image, return_contours_of_interested_region
|
||||||
|
from eynollah.utils.resize import resize_image
|
||||||
|
|
||||||
|
from .model_zoo.model_zoo import EynollahModelZoo
|
||||||
|
from .eynollah import Eynollah
|
||||||
|
from .utils import box2rect, is_image_filename
|
||||||
|
from .plot import EynollahPlotter
|
||||||
|
|
||||||
|
class EynollahImageExtractor(Eynollah):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
model_zoo: EynollahModelZoo,
|
||||||
|
enable_plotting : bool = False,
|
||||||
|
input_binary : bool = False,
|
||||||
|
ignore_page_extraction : bool = False,
|
||||||
|
reading_order_machine_based : bool = False,
|
||||||
|
num_col_upper : Optional[int] = None,
|
||||||
|
num_col_lower : Optional[int] = None,
|
||||||
|
threshold_art_class_layout: Optional[float] = None,
|
||||||
|
threshold_art_class_textline: Optional[float] = None,
|
||||||
|
skip_layout_and_reading_order : bool = False,
|
||||||
|
):
|
||||||
|
self.logger = logging.getLogger('eynollah.extract_images')
|
||||||
|
self.model_zoo = model_zoo
|
||||||
|
self.plotter = None
|
||||||
|
|
||||||
|
self.reading_order_machine_based = reading_order_machine_based
|
||||||
|
self.enable_plotting = enable_plotting
|
||||||
|
# --input-binary sensible if image is very dark, if layout is not working.
|
||||||
|
self.input_binary = input_binary
|
||||||
|
self.ignore_page_extraction = ignore_page_extraction
|
||||||
|
self.skip_layout_and_reading_order = skip_layout_and_reading_order
|
||||||
|
if num_col_upper:
|
||||||
|
self.num_col_upper = int(num_col_upper)
|
||||||
|
else:
|
||||||
|
self.num_col_upper = num_col_upper
|
||||||
|
if num_col_lower:
|
||||||
|
self.num_col_lower = int(num_col_lower)
|
||||||
|
else:
|
||||||
|
self.num_col_lower = num_col_lower
|
||||||
|
|
||||||
|
# for parallelization of CPU-intensive tasks:
|
||||||
|
self.executor = ProcessPoolExecutor(max_workers=cpu_count())
|
||||||
|
|
||||||
|
if threshold_art_class_layout:
|
||||||
|
self.threshold_art_class_layout = float(threshold_art_class_layout)
|
||||||
|
else:
|
||||||
|
self.threshold_art_class_layout = 0.1
|
||||||
|
|
||||||
|
if threshold_art_class_textline:
|
||||||
|
self.threshold_art_class_textline = float(threshold_art_class_textline)
|
||||||
|
else:
|
||||||
|
self.threshold_art_class_textline = 0.1
|
||||||
|
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
for device in tf.config.list_physical_devices('GPU'):
|
||||||
|
tf.config.experimental.set_memory_growth(device, True)
|
||||||
|
except:
|
||||||
|
self.logger.warning("no GPU device available")
|
||||||
|
|
||||||
|
self.logger.info("Loading models...")
|
||||||
|
self.setup_models()
|
||||||
|
self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
|
||||||
|
|
||||||
|
def setup_models(self):
|
||||||
|
|
||||||
|
loadable = [
|
||||||
|
"col_classifier",
|
||||||
|
"binarization",
|
||||||
|
"page",
|
||||||
|
"extract_images",
|
||||||
|
]
|
||||||
|
self.model_zoo.load_models(*loadable)
|
||||||
|
|
||||||
|
def get_regions_light_v_extract_only_images(self,img, num_col_classifier):
|
||||||
|
self.logger.debug("enter get_regions_extract_images_only")
|
||||||
|
erosion_hurts = False
|
||||||
|
img_org = np.copy(img)
|
||||||
|
img_height_h = img_org.shape[0]
|
||||||
|
img_width_h = img_org.shape[1]
|
||||||
|
|
||||||
|
if num_col_classifier == 1:
|
||||||
|
img_w_new = 700
|
||||||
|
elif num_col_classifier == 2:
|
||||||
|
img_w_new = 900
|
||||||
|
elif num_col_classifier == 3:
|
||||||
|
img_w_new = 1500
|
||||||
|
elif num_col_classifier == 4:
|
||||||
|
img_w_new = 1800
|
||||||
|
elif num_col_classifier == 5:
|
||||||
|
img_w_new = 2200
|
||||||
|
elif num_col_classifier == 6:
|
||||||
|
img_w_new = 2500
|
||||||
|
else:
|
||||||
|
raise ValueError("num_col_classifier must be in range 1..6")
|
||||||
|
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
|
||||||
|
img_resized = resize_image(img,img_h_new, img_w_new )
|
||||||
|
|
||||||
|
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_zoo.get("region"))
|
||||||
|
|
||||||
|
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
|
||||||
|
image_page, page_coord, cont_page = self.extract_page()
|
||||||
|
|
||||||
|
prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
||||||
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
||||||
|
|
||||||
|
mask_lines_only = (prediction_regions_org[:,:] ==3)*1
|
||||||
|
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
|
||||||
|
mask_images_only=(prediction_regions_org[:,:] ==2)*1
|
||||||
|
|
||||||
|
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
|
||||||
|
polygons_seplines = filter_contours_area_of_image(
|
||||||
|
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
|
||||||
|
|
||||||
|
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
|
||||||
|
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
|
||||||
|
|
||||||
|
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
||||||
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
|
||||||
|
|
||||||
|
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
|
||||||
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1))
|
||||||
|
|
||||||
|
text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0
|
||||||
|
text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0
|
||||||
|
|
||||||
|
##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
|
||||||
|
polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001)
|
||||||
|
|
||||||
|
polygons_of_images_fin = []
|
||||||
|
for ploy_img_ind in polygons_of_images:
|
||||||
|
box = _, _, w, h = cv2.boundingRect(ploy_img_ind)
|
||||||
|
if h < 150 or w < 150:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
page_coord_img = box2rect(box) # type: ignore
|
||||||
|
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
|
||||||
|
[page_coord_img[3], page_coord_img[0]],
|
||||||
|
[page_coord_img[3], page_coord_img[1]],
|
||||||
|
[page_coord_img[2], page_coord_img[1]]]))
|
||||||
|
|
||||||
|
self.logger.debug("exit get_regions_extract_images_only")
|
||||||
|
return (text_regions_p_true,
|
||||||
|
erosion_hurts,
|
||||||
|
polygons_seplines,
|
||||||
|
polygons_of_images_fin,
|
||||||
|
image_page,
|
||||||
|
page_coord,
|
||||||
|
cont_page)
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
overwrite: bool = False,
|
||||||
|
image_filename: Optional[str] = None,
|
||||||
|
dir_in: Optional[str] = None,
|
||||||
|
dir_out: Optional[str] = None,
|
||||||
|
dir_of_cropped_images: Optional[str] = None,
|
||||||
|
dir_of_layout: Optional[str] = None,
|
||||||
|
dir_of_deskewed: Optional[str] = None,
|
||||||
|
dir_of_all: Optional[str] = None,
|
||||||
|
dir_save_page: Optional[str] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Get image and scales, then extract the page of scanned image
|
||||||
|
"""
|
||||||
|
self.logger.debug("enter run")
|
||||||
|
t0_tot = time.time()
|
||||||
|
|
||||||
|
# Log enabled features directly
|
||||||
|
enabled_modes = []
|
||||||
|
if self.full_layout:
|
||||||
|
enabled_modes.append("Full layout analysis")
|
||||||
|
if self.tables:
|
||||||
|
enabled_modes.append("Table detection")
|
||||||
|
if enabled_modes:
|
||||||
|
self.logger.info("Enabled modes: " + ", ".join(enabled_modes))
|
||||||
|
if self.enable_plotting:
|
||||||
|
self.logger.info("Saving debug plots")
|
||||||
|
if dir_of_cropped_images:
|
||||||
|
self.logger.info(f"Saving cropped images to: {dir_of_cropped_images}")
|
||||||
|
if dir_of_layout:
|
||||||
|
self.logger.info(f"Saving layout plots to: {dir_of_layout}")
|
||||||
|
if dir_of_deskewed:
|
||||||
|
self.logger.info(f"Saving deskewed images to: {dir_of_deskewed}")
|
||||||
|
|
||||||
|
if dir_in:
|
||||||
|
ls_imgs = [os.path.join(dir_in, image_filename)
|
||||||
|
for image_filename in filter(is_image_filename,
|
||||||
|
os.listdir(dir_in))]
|
||||||
|
elif image_filename:
|
||||||
|
ls_imgs = [image_filename]
|
||||||
|
else:
|
||||||
|
raise ValueError("run requires either a single image filename or a directory")
|
||||||
|
|
||||||
|
for img_filename in ls_imgs:
|
||||||
|
self.logger.info(img_filename)
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
self.reset_file_name_dir(img_filename, dir_out)
|
||||||
|
if self.enable_plotting:
|
||||||
|
self.plotter = EynollahPlotter(dir_out=dir_out,
|
||||||
|
dir_of_all=dir_of_all,
|
||||||
|
dir_save_page=dir_save_page,
|
||||||
|
dir_of_deskewed=dir_of_deskewed,
|
||||||
|
dir_of_cropped_images=dir_of_cropped_images,
|
||||||
|
dir_of_layout=dir_of_layout,
|
||||||
|
image_filename_stem=Path(img_filename).stem)
|
||||||
|
#print("text region early -11 in %.1fs", time.time() - t0)
|
||||||
|
if os.path.exists(self.writer.output_filename):
|
||||||
|
if overwrite:
|
||||||
|
self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename)
|
||||||
|
else:
|
||||||
|
self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename)
|
||||||
|
continue
|
||||||
|
|
||||||
|
pcgts = self.run_single()
|
||||||
|
self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||||
|
self.writer.write_pagexml(pcgts)
|
||||||
|
|
||||||
|
if dir_in:
|
||||||
|
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
||||||
|
|
||||||
|
def run_single(self):
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
self.logger.info(f"Processing file: {self.writer.image_filename}")
|
||||||
|
self.logger.info("Step 1/5: Image Enhancement")
|
||||||
|
|
||||||
|
img_res, is_image_enhanced, num_col_classifier, _ = \
|
||||||
|
self.run_enhancement()
|
||||||
|
|
||||||
|
self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, "
|
||||||
|
f"{self.dpi} DPI, {num_col_classifier} columns")
|
||||||
|
if is_image_enhanced:
|
||||||
|
self.logger.info("Enhancement applied")
|
||||||
|
|
||||||
|
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
|
||||||
|
|
||||||
|
|
||||||
|
# Image Extraction Mode
|
||||||
|
self.logger.info("Step 2/5: Image Extraction Mode")
|
||||||
|
|
||||||
|
_, _, _, polygons_of_images, \
|
||||||
|
image_page, page_coord, cont_page = \
|
||||||
|
self.get_regions_light_v_extract_only_images(img_res, num_col_classifier)
|
||||||
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||||
|
[], page_coord, [], [], [], [],
|
||||||
|
polygons_of_images, [], [], [], [], [], [], [], [], [],
|
||||||
|
cont_page, [], [])
|
||||||
|
if self.plotter:
|
||||||
|
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
||||||
|
|
||||||
|
self.logger.info("Image extraction complete")
|
||||||
|
return pcgts
|
||||||
|
|
@ -128,7 +128,6 @@ class Eynollah:
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
model_zoo: EynollahModelZoo,
|
model_zoo: EynollahModelZoo,
|
||||||
extract_only_images : bool =False,
|
|
||||||
enable_plotting : bool = False,
|
enable_plotting : bool = False,
|
||||||
allow_enhancement : bool = False,
|
allow_enhancement : bool = False,
|
||||||
curved_line : bool = False,
|
curved_line : bool = False,
|
||||||
|
|
@ -162,7 +161,6 @@ class Eynollah:
|
||||||
self.input_binary = input_binary
|
self.input_binary = input_binary
|
||||||
self.allow_scaling = allow_scaling
|
self.allow_scaling = allow_scaling
|
||||||
self.headers_off = headers_off
|
self.headers_off = headers_off
|
||||||
self.extract_only_images = extract_only_images
|
|
||||||
self.ignore_page_extraction = ignore_page_extraction
|
self.ignore_page_extraction = ignore_page_extraction
|
||||||
self.skip_layout_and_reading_order = skip_layout_and_reading_order
|
self.skip_layout_and_reading_order = skip_layout_and_reading_order
|
||||||
if num_col_upper:
|
if num_col_upper:
|
||||||
|
|
@ -216,9 +214,8 @@ class Eynollah:
|
||||||
"col_classifier",
|
"col_classifier",
|
||||||
"binarization",
|
"binarization",
|
||||||
"page",
|
"page",
|
||||||
("region", 'extract_only_images' if self.extract_only_images else '')
|
"region"
|
||||||
]
|
]
|
||||||
if not self.extract_only_images:
|
|
||||||
loadable.append(("textline"))
|
loadable.append(("textline"))
|
||||||
loadable.append("region_1_2")
|
loadable.append("region_1_2")
|
||||||
if self.full_layout:
|
if self.full_layout:
|
||||||
|
|
@ -452,27 +449,6 @@ class Eynollah:
|
||||||
|
|
||||||
return img_new, num_column_is_classified
|
return img_new, num_column_is_classified
|
||||||
|
|
||||||
def calculate_width_height_by_columns_extract_only_images(self, img, num_col, width_early, label_p_pred):
|
|
||||||
self.logger.debug("enter calculate_width_height_by_columns")
|
|
||||||
if num_col == 1:
|
|
||||||
img_w_new = 700
|
|
||||||
elif num_col == 2:
|
|
||||||
img_w_new = 900
|
|
||||||
elif num_col == 3:
|
|
||||||
img_w_new = 1500
|
|
||||||
elif num_col == 4:
|
|
||||||
img_w_new = 1800
|
|
||||||
elif num_col == 5:
|
|
||||||
img_w_new = 2200
|
|
||||||
elif num_col == 6:
|
|
||||||
img_w_new = 2500
|
|
||||||
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
|
||||||
|
|
||||||
img_new = resize_image(img, img_h_new, img_w_new)
|
|
||||||
num_column_is_classified = True
|
|
||||||
|
|
||||||
return img_new, num_column_is_classified
|
|
||||||
|
|
||||||
def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
|
def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
|
||||||
self.logger.debug("enter resize_image_with_column_classifier")
|
self.logger.debug("enter resize_image_with_column_classifier")
|
||||||
if self.input_binary:
|
if self.input_binary:
|
||||||
|
|
@ -596,7 +572,6 @@ class Eynollah:
|
||||||
label_p_pred = [np.ones(6)]
|
label_p_pred = [np.ones(6)]
|
||||||
|
|
||||||
self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
|
self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
|
||||||
if not self.extract_only_images:
|
|
||||||
if dpi < DPI_THRESHOLD:
|
if dpi < DPI_THRESHOLD:
|
||||||
if num_col in (1,2):
|
if num_col in (1,2):
|
||||||
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
|
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
|
||||||
|
|
@ -616,10 +591,6 @@ class Eynollah:
|
||||||
num_column_is_classified = True
|
num_column_is_classified = True
|
||||||
image_res = np.copy(img)
|
image_res = np.copy(img)
|
||||||
is_image_enhanced = False
|
is_image_enhanced = False
|
||||||
else:
|
|
||||||
num_column_is_classified = True
|
|
||||||
image_res = np.copy(img)
|
|
||||||
is_image_enhanced = False
|
|
||||||
|
|
||||||
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
|
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
|
||||||
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
|
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
|
||||||
|
|
@ -1790,113 +1761,7 @@ class Eynollah:
|
||||||
(prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8))
|
(prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8))
|
||||||
|
|
||||||
|
|
||||||
def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier):
|
|
||||||
self.logger.debug("enter get_regions_extract_images_only")
|
|
||||||
erosion_hurts = False
|
|
||||||
img_org = np.copy(img)
|
|
||||||
img_height_h = img_org.shape[0]
|
|
||||||
img_width_h = img_org.shape[1]
|
|
||||||
|
|
||||||
if num_col_classifier == 1:
|
|
||||||
img_w_new = 700
|
|
||||||
elif num_col_classifier == 2:
|
|
||||||
img_w_new = 900
|
|
||||||
elif num_col_classifier == 3:
|
|
||||||
img_w_new = 1500
|
|
||||||
elif num_col_classifier == 4:
|
|
||||||
img_w_new = 1800
|
|
||||||
elif num_col_classifier == 5:
|
|
||||||
img_w_new = 2200
|
|
||||||
elif num_col_classifier == 6:
|
|
||||||
img_w_new = 2500
|
|
||||||
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
|
|
||||||
img_resized = resize_image(img,img_h_new, img_w_new )
|
|
||||||
|
|
||||||
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_zoo.get("region"))
|
|
||||||
|
|
||||||
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
|
|
||||||
image_page, page_coord, cont_page = self.extract_page()
|
|
||||||
|
|
||||||
prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
||||||
prediction_regions_org=prediction_regions_org[:,:,0]
|
|
||||||
|
|
||||||
mask_lines_only = (prediction_regions_org[:,:] ==3)*1
|
|
||||||
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
|
|
||||||
mask_images_only=(prediction_regions_org[:,:] ==2)*1
|
|
||||||
|
|
||||||
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
|
|
||||||
polygons_seplines = filter_contours_area_of_image(
|
|
||||||
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
|
|
||||||
|
|
||||||
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
|
|
||||||
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
|
|
||||||
|
|
||||||
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
|
||||||
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
|
|
||||||
|
|
||||||
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
|
|
||||||
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1))
|
|
||||||
|
|
||||||
text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0
|
|
||||||
text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0
|
|
||||||
|
|
||||||
##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
|
|
||||||
polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001)
|
|
||||||
image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
|
|
||||||
|
|
||||||
###image_boundary_of_doc[:6, :] = 1
|
|
||||||
###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1
|
|
||||||
|
|
||||||
###image_boundary_of_doc[:, :6] = 1
|
|
||||||
###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1
|
|
||||||
|
|
||||||
polygons_of_images_fin = []
|
|
||||||
for ploy_img_ind in polygons_of_images:
|
|
||||||
"""
|
|
||||||
test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
|
|
||||||
test_poly_image = cv2.fillPoly(test_poly_image, pts=[ploy_img_ind], color=(1,1,1))
|
|
||||||
|
|
||||||
test_poly_image = test_poly_image + image_boundary_of_doc
|
|
||||||
test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1
|
|
||||||
|
|
||||||
test_poly_image_intersected_area = test_poly_image_intersected_area.sum()
|
|
||||||
|
|
||||||
if test_poly_image_intersected_area==0:
|
|
||||||
##polygons_of_images_fin.append(ploy_img_ind)
|
|
||||||
|
|
||||||
box = cv2.boundingRect(ploy_img_ind)
|
|
||||||
page_coord_img = box2rect(box)
|
|
||||||
# cont_page.append(np.array([[page_coord[2], page_coord[0]],
|
|
||||||
# [page_coord[3], page_coord[0]],
|
|
||||||
# [page_coord[3], page_coord[1]],
|
|
||||||
# [page_coord[2], page_coord[1]]]))
|
|
||||||
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
|
|
||||||
[page_coord_img[3], page_coord_img[0]],
|
|
||||||
[page_coord_img[3], page_coord_img[1]],
|
|
||||||
[page_coord_img[2], page_coord_img[1]]]) )
|
|
||||||
"""
|
|
||||||
box = x, y, w, h = cv2.boundingRect(ploy_img_ind)
|
|
||||||
if h < 150 or w < 150:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
page_coord_img = box2rect(box)
|
|
||||||
# cont_page.append(np.array([[page_coord[2], page_coord[0]],
|
|
||||||
# [page_coord[3], page_coord[0]],
|
|
||||||
# [page_coord[3], page_coord[1]],
|
|
||||||
# [page_coord[2], page_coord[1]]]))
|
|
||||||
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
|
|
||||||
[page_coord_img[3], page_coord_img[0]],
|
|
||||||
[page_coord_img[3], page_coord_img[1]],
|
|
||||||
[page_coord_img[2], page_coord_img[1]]]))
|
|
||||||
|
|
||||||
self.logger.debug("exit get_regions_extract_images_only")
|
|
||||||
return (text_regions_p_true,
|
|
||||||
erosion_hurts,
|
|
||||||
polygons_seplines,
|
|
||||||
polygons_of_images_fin,
|
|
||||||
image_page,
|
|
||||||
page_coord,
|
|
||||||
cont_page)
|
|
||||||
|
|
||||||
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
|
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
|
||||||
self.logger.debug("enter get_regions_light_v")
|
self.logger.debug("enter get_regions_light_v")
|
||||||
|
|
@ -3513,23 +3378,6 @@ class Eynollah:
|
||||||
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
|
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
|
||||||
|
|
||||||
|
|
||||||
# Image Extraction Mode
|
|
||||||
if self.extract_only_images:
|
|
||||||
self.logger.info("Step 2/5: Image Extraction Mode")
|
|
||||||
|
|
||||||
text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, \
|
|
||||||
image_page, page_coord, cont_page = \
|
|
||||||
self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
|
|
||||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
|
||||||
[], page_coord, [], [], [], [],
|
|
||||||
polygons_of_images, [], [], [], [], [], [], [], [], [],
|
|
||||||
cont_page, [], [])
|
|
||||||
if self.plotter:
|
|
||||||
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
|
||||||
|
|
||||||
self.logger.info("Image extraction complete")
|
|
||||||
return pcgts
|
|
||||||
|
|
||||||
# Basic Processing Mode
|
# Basic Processing Mode
|
||||||
if self.skip_layout_and_reading_order:
|
if self.skip_layout_and_reading_order:
|
||||||
self.logger.info("Step 2/5: Basic Processing Mode")
|
self.logger.info("Step 2/5: Basic Processing Mode")
|
||||||
|
|
|
||||||
|
|
@ -66,8 +66,8 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
||||||
),
|
),
|
||||||
|
|
||||||
EynollahModelSpec(
|
EynollahModelSpec(
|
||||||
category="region",
|
category="extract_images",
|
||||||
variant='extract_only_images',
|
variant='',
|
||||||
filename="models_eynollah/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18",
|
filename="models_eynollah/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18",
|
||||||
dist_url=dist_url(),
|
dist_url=dist_url(),
|
||||||
type='Keras',
|
type='Keras',
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue