From 12b127148741dddfa0f15314a5933c25a47386ef Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 13 Apr 2026 01:19:47 +0200 Subject: [PATCH] layout cli: add option `--halt-fail` --- src/eynollah/cli/cli_layout.py | 11 ++++++++++- src/eynollah/eynollah.py | 16 +++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/eynollah/cli/cli_layout.py b/src/eynollah/cli/cli_layout.py index 03cf9c8..417b202 100644 --- a/src/eynollah/cli/cli_layout.py +++ b/src/eynollah/cli/cli_layout.py @@ -163,7 +163,14 @@ import click "-j", default=0, type=click.IntRange(min=0), - help="number of parallel images to process (also helps better utilise GPU if available); 0 means based on autodetected number of processor cores", + help="number of parallel images to process (for --dir_in mode; also helps better utilise GPU if available); 0 means based on autodetected number of processor cores", +) +@click.option( + "--halt-fail", + "-H", + default=0, + type=click.FloatRange(min=0), + help="abort when number of failed images exceeds this value (if >=1) or ratio of failed over total images exceeds this value (if <1); 0 means ignore failures", ) @click.option( "--device", @@ -199,6 +206,7 @@ def layout_cli( skip_layout_and_reading_order, ignore_page_extraction, num_jobs, + halt_fail, device, ): """ @@ -243,5 +251,6 @@ def layout_cli( dir_of_all=save_all, dir_save_page=save_page, num_jobs=num_jobs, + halt_fail=halt_fail, ) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index a500925..9f118a7 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -2250,6 +2250,7 @@ class Eynollah: dir_of_all: Optional[str] = None, dir_save_page: Optional[str] = None, num_jobs: int = 0, + halt_fail: float = 0, ): """ Get image and scales, then extract the page of scanned image @@ -2294,6 +2295,7 @@ class Eynollah: ) as exe: jobs = {} mngr = mp.get_context('fork').Manager() + n_success = n_fail = 0 for img_filename in ls_imgs: logq = mngr.Queue() jobs[exe.submit(_run_single, img_filename, @@ -2307,16 +2309,20 @@ class Eynollah: try: loglistener.start() job.result() - jobs[job] = True + n_success += 1 except: self.logger.exception("Job %s failed", img_filename) - jobs[job] = False + n_fail += 1 + if (halt_fail and + n_fail >= halt_fail * (len(jobs) if halt_fail < 1 else 1)): + self.logger.fatal("terminating after %d failures", n_fail) + for job in jobs: + job.cancel() + break finally: loglistener.stop() - results = list(jobs.values()) - success = list(filter(None, results)) # for img_filename, result in zip(ls_imgs, results) ... - self.logger.info("%d of %d jobs successful", len(success), len(results)) + self.logger.info("%d of %d jobs successful", n_success, len(jobs)) self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) elif image_filename: try: