factor out extract_only_images as eynollah extract-images

This commit is contained in:
kba 2025-11-26 21:35:45 +01:00 committed by kba
parent 83e8b289da
commit 177d555ded
7 changed files with 471 additions and 199 deletions

View file

@ -109,7 +109,6 @@ The following options can be used to further configure the processing:
| `-cl` | apply contour detection for curved text lines instead of bounding boxes |
| `-ib` | apply binarization (the resulting image is saved to the output directory) |
| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) |
| `-eoi` | extract only images to output directory (other processing will not be done) |
| `-ho` | ignore headers for reading order dectection |
| `-si <directory>` | save image regions detected to this directory |
| `-sd <directory>` | save deskewed image to this directory |

View file

@ -1,12 +1,14 @@
# NOTE: For predictable order of imports of torch/shapely/tensorflow
# this must be the first import of the CLI!
from ..eynollah_imports import imported_libs
from .cli_models import models_cli
from .cli_binarize import binarize_cli
from .cli import main
from .cli_binarize import binarize_cli
from .cli_enhance import enhance_cli
from .cli_extract_images import extract_images_cli
from .cli_layout import layout_cli
from .cli_ocr import ocr_cli
from .cli_readingorder import readingorder_cli
@ -17,5 +19,4 @@ main.add_command(layout_cli, 'layout')
main.add_command(readingorder_cli, 'machine-based-reading-order')
main.add_command(models_cli, 'models')
main.add_command(ocr_cli, 'ocr')
main.add_command(extract_images_cli, 'extract-images')

View file

@ -0,0 +1,167 @@
import click
@click.command()
@click.option(
"--image",
"-i",
help="input image filename",
type=click.Path(exists=True, dir_okay=False),
)
@click.option(
"--out",
"-o",
help="directory for output PAGE-XML files",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--overwrite",
"-O",
help="overwrite (instead of skipping) if output xml exists",
is_flag=True,
)
@click.option(
"--dir_in",
"-di",
help="directory of input images (instead of --image)",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_images",
"-si",
help="if a directory is given, images in documents will be cropped and saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_layout",
"-sl",
help="if a directory is given, plot of layout will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_deskewed",
"-sd",
help="if a directory is given, deskewed image will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_all",
"-sa",
help="if a directory is given, all plots needed for documentation will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_page",
"-sp",
help="if a directory is given, page crop of image will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--enable-plotting/--disable-plotting",
"-ep/-noep",
is_flag=True,
help="If set, will plot intermediary files and images",
)
@click.option(
"--input_binary/--input-RGB",
"-ib/-irgb",
is_flag=True,
help="In general, eynollah uses RGB as input but if the input document is very dark, very bright or for any other reason you can turn on input binarization. When this flag is set, eynollah will binarize the RGB input document, you should always provide RGB images to eynollah.",
)
@click.option(
"--ignore_page_extraction/--extract_page_included",
"-ipe/-epi",
is_flag=True,
help="if this parameter set to true, this tool would ignore page extraction",
)
@click.option(
"--reading_order_machine_based/--heuristic_reading_order",
"-romb/-hro",
is_flag=True,
help="if this parameter set to true, this tool would apply machine based reading order detection",
)
@click.option(
"--num_col_upper",
"-ncu",
help="lower limit of columns in document image",
)
@click.option(
"--num_col_lower",
"-ncl",
help="upper limit of columns in document image",
)
@click.option(
"--threshold_art_class_layout",
"-tharl",
help="threshold of artifical class in the case of layout detection. The default value is 0.1",
)
@click.option(
"--threshold_art_class_textline",
"-thart",
help="threshold of artifical class in the case of textline detection. The default value is 0.1",
)
@click.option(
"--skip_layout_and_reading_order",
"-slro/-noslro",
is_flag=True,
help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.",
)
@click.pass_context
def extract_images_cli(
ctx,
image,
out,
overwrite,
dir_in,
save_images,
save_layout,
save_deskewed,
save_all,
save_page,
enable_plotting,
input_binary,
reading_order_machine_based,
num_col_upper,
num_col_lower,
threshold_art_class_textline,
threshold_art_class_layout,
skip_layout_and_reading_order,
ignore_page_extraction,
):
"""
Detect Layout (with optional image enhancement and reading order detection)
"""
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep"
assert enable_plotting or not save_all, "Plotting with -sa also requires -ep"
assert enable_plotting or not save_page, "Plotting with -sp also requires -ep"
assert enable_plotting or not save_images, "Plotting with -si also requires -ep"
assert not enable_plotting or save_layout or save_deskewed or save_all or save_page or save_images, \
"Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae"
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
from ..extract_images import EynollahImageExtractor
extractor = EynollahImageExtractor(
model_zoo=ctx.obj.model_zoo,
enable_plotting=enable_plotting,
input_binary=input_binary,
ignore_page_extraction=ignore_page_extraction,
reading_order_machine_based=reading_order_machine_based,
num_col_upper=num_col_upper,
num_col_lower=num_col_lower,
skip_layout_and_reading_order=skip_layout_and_reading_order,
threshold_art_class_textline=threshold_art_class_textline,
threshold_art_class_layout=threshold_art_class_layout,
)
extractor.run(overwrite=overwrite,
image_filename=image,
dir_in=dir_in,
dir_out=out,
dir_of_cropped_images=save_images,
dir_of_layout=save_layout,
dir_of_deskewed=save_deskewed,
dir_of_all=save_all,
dir_save_page=save_page,
)

View file

@ -63,12 +63,6 @@ import click
is_flag=True,
help="If set, will plot intermediary files and images",
)
@click.option(
"--extract_only_images/--disable-extracting_only_images",
"-eoi/-noeoi",
is_flag=True,
help="If a directory is given, only images in documents will be cropped and saved there and the other processing will not be done",
)
@click.option(
"--allow-enhancement/--no-allow-enhancement",
"-ae/-noae",
@ -166,7 +160,6 @@ def layout_cli(
save_layout,
save_deskewed,
save_all,
extract_only_images,
save_page,
enable_plotting,
allow_enhancement,
@ -197,17 +190,9 @@ def layout_cli(
assert enable_plotting or not allow_enhancement, "Plotting with -ae also requires -ep"
assert not enable_plotting or save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement, \
"Plotting with -ep also requires -sl, -sd, -sa, -sp, -si or -ae"
assert not extract_only_images or not allow_enhancement, "Image extraction -eoi can not be set alongside allow_enhancement -ae"
assert not extract_only_images or not allow_scaling, "Image extraction -eoi can not be set alongside allow_scaling -as"
assert not extract_only_images or not curved_line, "Image extraction -eoi can not be set alongside curved_line -cl"
assert not extract_only_images or not full_layout, "Image extraction -eoi can not be set alongside full_layout -fl"
assert not extract_only_images or not tables, "Image extraction -eoi can not be set alongside tables -tab"
assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l"
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
eynollah = Eynollah(
model_zoo=ctx.obj.model_zoo,
extract_only_images=extract_only_images,
enable_plotting=enable_plotting,
allow_enhancement=allow_enhancement,
curved_line=curved_line,

View file

@ -0,0 +1,272 @@
"""
extract images?
"""
from concurrent.futures import ProcessPoolExecutor
import logging
from multiprocessing import cpu_count
import os
import time
from typing import Optional
from pathlib import Path
import tensorflow as tf
import numpy as np
import cv2
from eynollah.utils.contour import filter_contours_area_of_image, return_contours_of_image, return_contours_of_interested_region
from eynollah.utils.resize import resize_image
from .model_zoo.model_zoo import EynollahModelZoo
from .eynollah import Eynollah
from .utils import box2rect, is_image_filename
from .plot import EynollahPlotter
class EynollahImageExtractor(Eynollah):
def __init__(
self,
*,
model_zoo: EynollahModelZoo,
enable_plotting : bool = False,
input_binary : bool = False,
ignore_page_extraction : bool = False,
reading_order_machine_based : bool = False,
num_col_upper : Optional[int] = None,
num_col_lower : Optional[int] = None,
threshold_art_class_layout: Optional[float] = None,
threshold_art_class_textline: Optional[float] = None,
skip_layout_and_reading_order : bool = False,
):
self.logger = logging.getLogger('eynollah.extract_images')
self.model_zoo = model_zoo
self.plotter = None
self.reading_order_machine_based = reading_order_machine_based
self.enable_plotting = enable_plotting
# --input-binary sensible if image is very dark, if layout is not working.
self.input_binary = input_binary
self.ignore_page_extraction = ignore_page_extraction
self.skip_layout_and_reading_order = skip_layout_and_reading_order
if num_col_upper:
self.num_col_upper = int(num_col_upper)
else:
self.num_col_upper = num_col_upper
if num_col_lower:
self.num_col_lower = int(num_col_lower)
else:
self.num_col_lower = num_col_lower
# for parallelization of CPU-intensive tasks:
self.executor = ProcessPoolExecutor(max_workers=cpu_count())
if threshold_art_class_layout:
self.threshold_art_class_layout = float(threshold_art_class_layout)
else:
self.threshold_art_class_layout = 0.1
if threshold_art_class_textline:
self.threshold_art_class_textline = float(threshold_art_class_textline)
else:
self.threshold_art_class_textline = 0.1
t_start = time.time()
try:
for device in tf.config.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(device, True)
except:
self.logger.warning("no GPU device available")
self.logger.info("Loading models...")
self.setup_models()
self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
def setup_models(self):
loadable = [
"col_classifier",
"binarization",
"page",
"extract_images",
]
self.model_zoo.load_models(*loadable)
def get_regions_light_v_extract_only_images(self,img, num_col_classifier):
self.logger.debug("enter get_regions_extract_images_only")
erosion_hurts = False
img_org = np.copy(img)
img_height_h = img_org.shape[0]
img_width_h = img_org.shape[1]
if num_col_classifier == 1:
img_w_new = 700
elif num_col_classifier == 2:
img_w_new = 900
elif num_col_classifier == 3:
img_w_new = 1500
elif num_col_classifier == 4:
img_w_new = 1800
elif num_col_classifier == 5:
img_w_new = 2200
elif num_col_classifier == 6:
img_w_new = 2500
else:
raise ValueError("num_col_classifier must be in range 1..6")
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
img_resized = resize_image(img,img_h_new, img_w_new )
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_zoo.get("region"))
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
image_page, page_coord, cont_page = self.extract_page()
prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
prediction_regions_org=prediction_regions_org[:,:,0]
mask_lines_only = (prediction_regions_org[:,:] ==3)*1
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
mask_images_only=(prediction_regions_org[:,:] ==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
polygons_seplines = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
text_regions_p_true = np.zeros(prediction_regions_org.shape)
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1))
text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0
text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0
##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001)
polygons_of_images_fin = []
for ploy_img_ind in polygons_of_images:
box = _, _, w, h = cv2.boundingRect(ploy_img_ind)
if h < 150 or w < 150:
pass
else:
page_coord_img = box2rect(box) # type: ignore
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
[page_coord_img[3], page_coord_img[0]],
[page_coord_img[3], page_coord_img[1]],
[page_coord_img[2], page_coord_img[1]]]))
self.logger.debug("exit get_regions_extract_images_only")
return (text_regions_p_true,
erosion_hurts,
polygons_seplines,
polygons_of_images_fin,
image_page,
page_coord,
cont_page)
def run(self,
overwrite: bool = False,
image_filename: Optional[str] = None,
dir_in: Optional[str] = None,
dir_out: Optional[str] = None,
dir_of_cropped_images: Optional[str] = None,
dir_of_layout: Optional[str] = None,
dir_of_deskewed: Optional[str] = None,
dir_of_all: Optional[str] = None,
dir_save_page: Optional[str] = None,
):
"""
Get image and scales, then extract the page of scanned image
"""
self.logger.debug("enter run")
t0_tot = time.time()
# Log enabled features directly
enabled_modes = []
if self.full_layout:
enabled_modes.append("Full layout analysis")
if self.tables:
enabled_modes.append("Table detection")
if enabled_modes:
self.logger.info("Enabled modes: " + ", ".join(enabled_modes))
if self.enable_plotting:
self.logger.info("Saving debug plots")
if dir_of_cropped_images:
self.logger.info(f"Saving cropped images to: {dir_of_cropped_images}")
if dir_of_layout:
self.logger.info(f"Saving layout plots to: {dir_of_layout}")
if dir_of_deskewed:
self.logger.info(f"Saving deskewed images to: {dir_of_deskewed}")
if dir_in:
ls_imgs = [os.path.join(dir_in, image_filename)
for image_filename in filter(is_image_filename,
os.listdir(dir_in))]
elif image_filename:
ls_imgs = [image_filename]
else:
raise ValueError("run requires either a single image filename or a directory")
for img_filename in ls_imgs:
self.logger.info(img_filename)
t0 = time.time()
self.reset_file_name_dir(img_filename, dir_out)
if self.enable_plotting:
self.plotter = EynollahPlotter(dir_out=dir_out,
dir_of_all=dir_of_all,
dir_save_page=dir_save_page,
dir_of_deskewed=dir_of_deskewed,
dir_of_cropped_images=dir_of_cropped_images,
dir_of_layout=dir_of_layout,
image_filename_stem=Path(img_filename).stem)
#print("text region early -11 in %.1fs", time.time() - t0)
if os.path.exists(self.writer.output_filename):
if overwrite:
self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename)
else:
self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename)
continue
pcgts = self.run_single()
self.logger.info("Job done in %.1fs", time.time() - t0)
self.writer.write_pagexml(pcgts)
if dir_in:
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
def run_single(self):
t0 = time.time()
self.logger.info(f"Processing file: {self.writer.image_filename}")
self.logger.info("Step 1/5: Image Enhancement")
img_res, is_image_enhanced, num_col_classifier, _ = \
self.run_enhancement()
self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, "
f"{self.dpi} DPI, {num_col_classifier} columns")
if is_image_enhanced:
self.logger.info("Enhancement applied")
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
# Image Extraction Mode
self.logger.info("Step 2/5: Image Extraction Mode")
_, _, _, polygons_of_images, \
image_page, page_coord, cont_page = \
self.get_regions_light_v_extract_only_images(img_res, num_col_classifier)
pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [],
polygons_of_images, [], [], [], [], [], [], [], [], [],
cont_page, [], [])
if self.plotter:
self.plotter.write_images_into_directory(polygons_of_images, image_page)
self.logger.info("Image extraction complete")
return pcgts

View file

@ -128,7 +128,6 @@ class Eynollah:
self,
*,
model_zoo: EynollahModelZoo,
extract_only_images : bool =False,
enable_plotting : bool = False,
allow_enhancement : bool = False,
curved_line : bool = False,
@ -162,7 +161,6 @@ class Eynollah:
self.input_binary = input_binary
self.allow_scaling = allow_scaling
self.headers_off = headers_off
self.extract_only_images = extract_only_images
self.ignore_page_extraction = ignore_page_extraction
self.skip_layout_and_reading_order = skip_layout_and_reading_order
if num_col_upper:
@ -216,18 +214,17 @@ class Eynollah:
"col_classifier",
"binarization",
"page",
("region", 'extract_only_images' if self.extract_only_images else '')
"region"
]
if not self.extract_only_images:
loadable.append(("textline"))
loadable.append("region_1_2")
if self.full_layout:
loadable.append("region_fl_np")
#loadable.append("region_fl")
if self.reading_order_machine_based:
loadable.append("reading_order")
if self.tables:
loadable.append(("table"))
loadable.append(("textline"))
loadable.append("region_1_2")
if self.full_layout:
loadable.append("region_fl_np")
#loadable.append("region_fl")
if self.reading_order_machine_based:
loadable.append("reading_order")
if self.tables:
loadable.append(("table"))
self.model_zoo.load_models(*loadable)
@ -452,27 +449,6 @@ class Eynollah:
return img_new, num_column_is_classified
def calculate_width_height_by_columns_extract_only_images(self, img, num_col, width_early, label_p_pred):
self.logger.debug("enter calculate_width_height_by_columns")
if num_col == 1:
img_w_new = 700
elif num_col == 2:
img_w_new = 900
elif num_col == 3:
img_w_new = 1500
elif num_col == 4:
img_w_new = 1800
elif num_col == 5:
img_w_new = 2200
elif num_col == 6:
img_w_new = 2500
img_h_new = img_w_new * img.shape[0] // img.shape[1]
img_new = resize_image(img, img_h_new, img_w_new)
num_column_is_classified = True
return img_new, num_column_is_classified
def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
self.logger.debug("enter resize_image_with_column_classifier")
if self.input_binary:
@ -596,30 +572,25 @@ class Eynollah:
label_p_pred = [np.ones(6)]
self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
if not self.extract_only_images:
if dpi < DPI_THRESHOLD:
if num_col in (1,2):
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
img, num_col, width_early, label_p_pred)
else:
img_new, num_column_is_classified = self.calculate_width_height_by_columns(
img, num_col, width_early, label_p_pred)
if dpi < DPI_THRESHOLD:
if num_col in (1,2):
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
img, num_col, width_early, label_p_pred)
else:
img_new, num_column_is_classified = self.calculate_width_height_by_columns(
img, num_col, width_early, label_p_pred)
image_res = np.copy(img_new)
is_image_enhanced = True
else:
if num_col in (1,2):
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
img, num_col, width_early, label_p_pred)
image_res = np.copy(img_new)
is_image_enhanced = True
else:
if num_col in (1,2):
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
img, num_col, width_early, label_p_pred)
image_res = np.copy(img_new)
is_image_enhanced = True
else:
num_column_is_classified = True
image_res = np.copy(img)
is_image_enhanced = False
else:
num_column_is_classified = True
image_res = np.copy(img)
is_image_enhanced = False
num_column_is_classified = True
image_res = np.copy(img)
is_image_enhanced = False
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
@ -1790,113 +1761,7 @@ class Eynollah:
(prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8))
def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier):
self.logger.debug("enter get_regions_extract_images_only")
erosion_hurts = False
img_org = np.copy(img)
img_height_h = img_org.shape[0]
img_width_h = img_org.shape[1]
if num_col_classifier == 1:
img_w_new = 700
elif num_col_classifier == 2:
img_w_new = 900
elif num_col_classifier == 3:
img_w_new = 1500
elif num_col_classifier == 4:
img_w_new = 1800
elif num_col_classifier == 5:
img_w_new = 2200
elif num_col_classifier == 6:
img_w_new = 2500
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
img_resized = resize_image(img,img_h_new, img_w_new )
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_zoo.get("region"))
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
image_page, page_coord, cont_page = self.extract_page()
prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
prediction_regions_org=prediction_regions_org[:,:,0]
mask_lines_only = (prediction_regions_org[:,:] ==3)*1
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
mask_images_only=(prediction_regions_org[:,:] ==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
polygons_seplines = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
text_regions_p_true = np.zeros(prediction_regions_org.shape)
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1))
text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0
text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0
##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001)
image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
###image_boundary_of_doc[:6, :] = 1
###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1
###image_boundary_of_doc[:, :6] = 1
###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1
polygons_of_images_fin = []
for ploy_img_ind in polygons_of_images:
"""
test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
test_poly_image = cv2.fillPoly(test_poly_image, pts=[ploy_img_ind], color=(1,1,1))
test_poly_image = test_poly_image + image_boundary_of_doc
test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1
test_poly_image_intersected_area = test_poly_image_intersected_area.sum()
if test_poly_image_intersected_area==0:
##polygons_of_images_fin.append(ploy_img_ind)
box = cv2.boundingRect(ploy_img_ind)
page_coord_img = box2rect(box)
# cont_page.append(np.array([[page_coord[2], page_coord[0]],
# [page_coord[3], page_coord[0]],
# [page_coord[3], page_coord[1]],
# [page_coord[2], page_coord[1]]]))
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
[page_coord_img[3], page_coord_img[0]],
[page_coord_img[3], page_coord_img[1]],
[page_coord_img[2], page_coord_img[1]]]) )
"""
box = x, y, w, h = cv2.boundingRect(ploy_img_ind)
if h < 150 or w < 150:
pass
else:
page_coord_img = box2rect(box)
# cont_page.append(np.array([[page_coord[2], page_coord[0]],
# [page_coord[3], page_coord[0]],
# [page_coord[3], page_coord[1]],
# [page_coord[2], page_coord[1]]]))
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
[page_coord_img[3], page_coord_img[0]],
[page_coord_img[3], page_coord_img[1]],
[page_coord_img[2], page_coord_img[1]]]))
self.logger.debug("exit get_regions_extract_images_only")
return (text_regions_p_true,
erosion_hurts,
polygons_seplines,
polygons_of_images_fin,
image_page,
page_coord,
cont_page)
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
self.logger.debug("enter get_regions_light_v")
@ -3513,23 +3378,6 @@ class Eynollah:
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
# Image Extraction Mode
if self.extract_only_images:
self.logger.info("Step 2/5: Image Extraction Mode")
text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, \
image_page, page_coord, cont_page = \
self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [],
polygons_of_images, [], [], [], [], [], [], [], [], [],
cont_page, [], [])
if self.plotter:
self.plotter.write_images_into_directory(polygons_of_images, image_page)
self.logger.info("Image extraction complete")
return pcgts
# Basic Processing Mode
if self.skip_layout_and_reading_order:
self.logger.info("Step 2/5: Basic Processing Mode")

View file

@ -66,8 +66,8 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
),
EynollahModelSpec(
category="region",
variant='extract_only_images',
category="extract_images",
variant='',
filename="models_eynollah/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18",
dist_url=dist_url(),
type='Keras',