CLI: drop redundant negative option forms, add --num-jobs

This commit is contained in:
Robert Sachunsky 2026-03-13 18:22:25 +01:00
parent 576e120ba6
commit b7aa1d24cc
2 changed files with 77 additions and 69 deletions

View file

@ -1,13 +1,14 @@
import click
@click.command()
@click.command(context_settings=dict(
help_option_names=['-h', '--help'],
show_default=True))
@click.option(
"--image",
"-i",
help="input image filename",
type=click.Path(exists=True, dir_okay=False),
)
@click.option(
"--out",
"-o",
@ -30,124 +31,139 @@ import click
@click.option(
"--save_images",
"-si",
help="if a directory is given, images in documents will be cropped and saved there",
help="if a directory is given, cropped images of pages will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_layout",
"-sl",
help="if a directory is given, plot of layout will be saved there",
help="if a directory is given, plots of layout detection will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_deskewed",
"-sd",
help="if a directory is given, deskewed image will be saved there",
help="if a directory is given, plots of page deskewing will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_all",
"-sa",
help="if a directory is given, all plots needed for documentation will be saved there",
help="if a directory is given, all plots needed will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_page",
"-sp",
help="if a directory is given, page crop of image will be saved there",
help="if a directory is given, plots of page cropping will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--enable-plotting/--disable-plotting",
"-ep/-noep",
"--enable-plotting",
"-ep",
is_flag=True,
help="If set, will plot intermediary files and images",
help="plot intermediary diagnostic images to files",
)
@click.option(
"--allow-enhancement/--no-allow-enhancement",
"-ae/-noae",
"--allow-enhancement",
"-ae",
is_flag=True,
help="if this parameter set to true, this tool would check that input image need resizing and enhancement or not. If so output of resized and enhanced image and corresponding layout data will be written in out directory",
help="check whether input image need resizing and enhancement. If so, output of resized and enhanced image and corresponding layout data will be written in out directory",
)
@click.option(
"--curved-line/--no-curvedline",
"-cl/-nocl",
"--curved-line",
"-cl",
is_flag=True,
help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
help="try to return most precise textline contours by deskewing and detecting textlines for all text regions individually. Requires much more computation.",
)
@click.option(
"--full-layout/--no-full-layout",
"-fl/-nofl",
"--full-layout",
"-fl",
is_flag=True,
help="if this parameter set to true, this tool will try to return all elements of layout.",
help="return all elements of layout, including headings and drop-capitals",
)
@click.option(
"--tables/--no-tables",
"-tab/-notab",
"--tables",
"-tab",
is_flag=True,
help="if this parameter set to true, this tool will try to detect tables.",
help="try to detect table regions",
)
@click.option(
"--right2left/--left2right",
"-r2l/-l2r",
"--right2left",
"-r2l",
is_flag=True,
help="if this parameter set to true, this tool will extract right-to-left reading order.",
help="extract right-to-left reading order (instead of left-to-right)",
)
@click.option(
"--input_binary/--input-RGB",
"-ib/-irgb",
"--input_binary",
"-ib",
is_flag=True,
help="In general, eynollah uses RGB as input but if the input document is very dark, very bright or for any other reason you can turn on input binarization. When this flag is set, eynollah will binarize the RGB input document, you should always provide RGB images to eynollah.",
help="In general, eynollah uses RGB as input, but if the input document is very dark, very bright or for any other reason you can turn on internal binarization here. When set, eynollah will binarize the RGB input document first.",
)
@click.option(
"--allow_scaling/--no-allow-scaling",
"-as/-noas",
"--allow_scaling",
"-as",
is_flag=True,
help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection",
help="check the scale and if needed it will scale it to perform better layout detection",
)
@click.option(
"--headers_off/--headers-on",
"-ho/-noho",
"--headers_off",
"-ho",
is_flag=True,
help="if this parameter set to true, this tool would ignore headers role in reading order",
help="ignore headers role in reading order",
)
@click.option(
"--ignore_page_extraction/--extract_page_included",
"-ipe/-epi",
"--ignore_page_extraction",
"-ipe",
is_flag=True,
help="if this parameter set to true, this tool would ignore page extraction",
help="ignore page extraction (cropping via page frame detection model)",
)
@click.option(
"--reading_order_machine_based/--heuristic_reading_order",
"-romb/-hro",
"--reading_order_machine_based",
"-romb",
is_flag=True,
help="if this parameter set to true, this tool would apply machine based reading order detection",
help="apply model based reading order detection",
)
@click.option(
"--num_col_upper",
"-ncu",
help="lower limit of columns in document image",
default=0,
type=click.IntRange(min=0),
help="lower limit of columns in document image; 0 means autodetected from model",
)
@click.option(
"--num_col_lower",
"-ncl",
help="upper limit of columns in document image",
default=0,
type=click.IntRange(min=0),
help="upper limit of columns in document image; 0 means autodetected from model",
)
@click.option(
"--threshold_art_class_layout",
"-tharl",
help="threshold of artifical class in the case of layout detection. The default value is 0.1",
default=0.1,
type=click.FloatRange(min=0.0, max=1.0),
help="confidence threshold of artifical boundary class during region detection",
)
@click.option(
"--threshold_art_class_textline",
"-thart",
help="threshold of artifical class in the case of textline detection. The default value is 0.1",
default=0.1,
type=click.FloatRange(min=0.0, max=1.0),
help="confidence threshold of artifical boundary class during textline detection",
)
@click.option(
"--skip_layout_and_reading_order",
"-slro/-noslro",
"-slro",
is_flag=True,
help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.",
help="ignore layout detection and reading order, i.e. textline detection will be done within entire printspace, and textline contours will be written into a single overall text region.",
)
@click.option(
"--num-jobs",
"-j",
default=0,
type=click.IntRange(min=0),
help="number of parallel images to process (also helps better utilise GPU if available); 0 means based on autodetected number of processor cores",
)
@click.pass_context
def layout_cli(
@ -177,6 +193,7 @@ def layout_cli(
threshold_art_class_layout,
skip_layout_and_reading_order,
ignore_page_extraction,
num_jobs,
):
"""
Detect Layout (with optional image enhancement and reading order detection)
@ -218,5 +235,6 @@ def layout_cli(
dir_of_deskewed=save_deskewed,
dir_of_all=save_all,
dir_save_page=save_page,
num_jobs=num_jobs,
)

View file

@ -121,11 +121,12 @@ class Eynollah:
headers_off : bool = False,
ignore_page_extraction : bool = False,
reading_order_machine_based : bool = False,
num_col_upper : Optional[int] = None,
num_col_lower : Optional[int] = None,
threshold_art_class_layout: Optional[float] = None,
threshold_art_class_textline: Optional[float] = None,
num_col_upper : int = 0,
num_col_lower : int = 0,
threshold_art_class_layout: float = 0.1,
threshold_art_class_textline: float = 0.1,
skip_layout_and_reading_order : bool = False,
num_jobs : int = 0,
logger : Optional[logging.Logger] = None,
):
self.logger = logger or logging.getLogger('eynollah')
@ -145,24 +146,10 @@ class Eynollah:
self.headers_off = headers_off
self.ignore_page_extraction = ignore_page_extraction
self.skip_layout_and_reading_order = skip_layout_and_reading_order
if num_col_upper:
self.num_col_upper = int(num_col_upper)
else:
self.num_col_upper = num_col_upper
if num_col_lower:
self.num_col_lower = int(num_col_lower)
else:
self.num_col_lower = num_col_lower
if threshold_art_class_layout:
self.threshold_art_class_layout = float(threshold_art_class_layout)
else:
self.threshold_art_class_layout = 0.1
if threshold_art_class_textline:
self.threshold_art_class_textline = float(threshold_art_class_textline)
else:
self.threshold_art_class_textline = 0.1
self.num_col_upper = int(num_col_upper)
self.num_col_lower = int(num_col_lower)
self.threshold_art_class_layout = float(threshold_art_class_layout)
self.threshold_art_class_textline = float(threshold_art_class_textline)
t_start = time.time()
@ -337,6 +324,7 @@ class Eynollah:
if img_new.shape[1] > img.shape[1]:
img_new = self.do_prediction(True, img_new, self.model_zoo.get("enhancement"),
marginal_of_patch_percent=0,
n_batch_inference=3,
is_enhancement=True)
self.logger.info("Enhancement applied")
@ -2239,6 +2227,7 @@ class Eynollah:
dir_of_deskewed: Optional[str] = None,
dir_of_all: Optional[str] = None,
dir_save_page: Optional[str] = None,
num_jobs: int = 0,
):
"""
Get image and scales, then extract the page of scanned image
@ -2276,7 +2265,8 @@ class Eynollah:
ls_imgs = [os.path.join(dir_in, image_filename)
for image_filename in filter(is_image_filename,
os.listdir(dir_in))]
with ProcessPoolExecutor(mp_context=mp.get_context('fork'),
with ProcessPoolExecutor(max_workers=num_jobs or None,
mp_context=mp.get_context('fork'),
initializer=_set_instance,
initargs=(self,)
) as exe: