diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d3dabea..3fb4feb 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -17,6 +17,7 @@ document layout analysis (segmentation) with output in PAGE-XML # pyright: reportOptionalSubscript=false import logging +import logging.handlers import sys from difflib import SequenceMatcher as sq @@ -99,10 +100,21 @@ KERNEL = np.ones((5, 5), np.uint8) _instance = None -def _set_instance(instance): +def _set_instance(instance, logq): global _instance _instance = instance + # replace all inherited handlers with queue handler + logging.root.handlers.clear() + instance.logger.handlers.clear() + handler = logging.handlers.QueueHandler(logq) + handler.setFormatter(logging.Formatter(fmt="[%(jobId)s] %(message)s")) + logging.root.addHandler(handler) def _run_single(*args, **kwargs): + # prefix log messages with img_filename + def log_filter(record: logging.LogRecord): + record.jobId = args[0] + return record + logging.root.handlers[0].filters = [log_filter] return _instance.run_single(*args, **kwargs) class Eynollah: @@ -2265,11 +2277,14 @@ class Eynollah: ls_imgs = [os.path.join(dir_in, image_filename) for image_filename in filter(is_image_filename, os.listdir(dir_in))] + logq = mp.get_context('fork').Queue() with ProcessPoolExecutor(max_workers=num_jobs or None, mp_context=mp.get_context('fork'), initializer=_set_instance, - initargs=(self,) + initargs=(self, logq) ) as exe: + logging.handlers.QueueListener(logq, *self.logger.handlers, + respect_handler_level=False).start() jobs = {exe.submit(_run_single, img_filename, dir_out=dir_out, overwrite=overwrite): img_filename diff --git a/src/eynollah/predictor.py b/src/eynollah/predictor.py index ff0dfc3..9faa1be 100644 --- a/src/eynollah/predictor.py +++ b/src/eynollah/predictor.py @@ -36,6 +36,7 @@ class Predictor(mp.context.SpawnProcess): def __init__(self, logger, model_zoo): self.logger = logger + self.loglevel = logger.level self.model_zoo = model_zoo ctxt = mp.get_context('spawn') self.taskq = ctxt.Queue(maxsize=QSIZE) @@ -147,6 +148,7 @@ class Predictor(mp.context.SpawnProcess): def setup(self): logging.root.handlers = [logging.handlers.QueueHandler(self.logq)] + self.logger.setLevel(self.loglevel) self.model_zoo.load_models(*self.loadable) def shutdown(self): diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index d15a130..00ba236 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -44,7 +44,6 @@ class EynollahXmlWriter: return Path(Path(self.image_filename).name).stem def calculate_points(self, contour, offset=None): - self.logger.debug('enter calculate_points') poly = contour2polygon(contour) if offset is not None: poly = affinity.translate(poly, *offset) @@ -53,7 +52,6 @@ class EynollahXmlWriter: return points_from_polygon(poly.exterior.coords[:-1]) def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): - self.logger.debug('enter serialize_lines_in_region') for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords)