From ec08004fb03a82f4bfcdf5d4cf90c16788edfb6d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 14 Mar 2026 00:43:58 +0100 Subject: [PATCH] =?UTF-8?q?run:=20add=20QueueListener=20to=20pool=20/=20Qu?= =?UTF-8?q?eueHandler=20to=20workers=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - set up a Queue and QueueListener along with ProcessPoolExecutor, delegating messages from the queue to all handlers - in forked subprocesses, instead of just inheriting handlers, replace them with a single QueueHandler, and make sure log messages get prefixes by the respective job id (img_filename) so concurrent messages will still be readable - in the predictor, make sure to pass on the log level to the spawned subprocess, too --- src/eynollah/eynollah.py | 19 +++++++++++++++++-- src/eynollah/predictor.py | 2 ++ src/eynollah/writer.py | 2 -- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d3dabea..3fb4feb 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -17,6 +17,7 @@ document layout analysis (segmentation) with output in PAGE-XML # pyright: reportOptionalSubscript=false import logging +import logging.handlers import sys from difflib import SequenceMatcher as sq @@ -99,10 +100,21 @@ KERNEL = np.ones((5, 5), np.uint8) _instance = None -def _set_instance(instance): +def _set_instance(instance, logq): global _instance _instance = instance + # replace all inherited handlers with queue handler + logging.root.handlers.clear() + instance.logger.handlers.clear() + handler = logging.handlers.QueueHandler(logq) + handler.setFormatter(logging.Formatter(fmt="[%(jobId)s] %(message)s")) + logging.root.addHandler(handler) def _run_single(*args, **kwargs): + # prefix log messages with img_filename + def log_filter(record: logging.LogRecord): + record.jobId = args[0] + return record + logging.root.handlers[0].filters = [log_filter] return _instance.run_single(*args, **kwargs) class Eynollah: @@ -2265,11 +2277,14 @@ class Eynollah: ls_imgs = [os.path.join(dir_in, image_filename) for image_filename in filter(is_image_filename, os.listdir(dir_in))] + logq = mp.get_context('fork').Queue() with ProcessPoolExecutor(max_workers=num_jobs or None, mp_context=mp.get_context('fork'), initializer=_set_instance, - initargs=(self,) + initargs=(self, logq) ) as exe: + logging.handlers.QueueListener(logq, *self.logger.handlers, + respect_handler_level=False).start() jobs = {exe.submit(_run_single, img_filename, dir_out=dir_out, overwrite=overwrite): img_filename diff --git a/src/eynollah/predictor.py b/src/eynollah/predictor.py index ff0dfc3..9faa1be 100644 --- a/src/eynollah/predictor.py +++ b/src/eynollah/predictor.py @@ -36,6 +36,7 @@ class Predictor(mp.context.SpawnProcess): def __init__(self, logger, model_zoo): self.logger = logger + self.loglevel = logger.level self.model_zoo = model_zoo ctxt = mp.get_context('spawn') self.taskq = ctxt.Queue(maxsize=QSIZE) @@ -147,6 +148,7 @@ class Predictor(mp.context.SpawnProcess): def setup(self): logging.root.handlers = [logging.handlers.QueueHandler(self.logq)] + self.logger.setLevel(self.loglevel) self.model_zoo.load_models(*self.loadable) def shutdown(self): diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index d15a130..00ba236 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -44,7 +44,6 @@ class EynollahXmlWriter: return Path(Path(self.image_filename).name).stem def calculate_points(self, contour, offset=None): - self.logger.debug('enter calculate_points') poly = contour2polygon(contour) if offset is not None: poly = affinity.translate(poly, *offset) @@ -53,7 +52,6 @@ class EynollahXmlWriter: return points_from_polygon(poly.exterior.coords[:-1]) def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): - self.logger.debug('enter serialize_lines_in_region') for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords)