model_zoo/predictor: use one subprocess per model…

- Eynollah: instead of one `Predictor` instance as stand-in for entire `ModelZoo`, keep the latter but have each model in `_loaded` dict become an independent predictor instance - `ModelZoo.load_models()`: instantiate `Predictor`s for each `model_category` and then call `Predictor.load_model()` on them - `Predictor.load_model()`: set args/kwargs for `ModelZoo.load_model()`, then spawn subprocess via `.start()`, which first enters `setup()`... - `Predictor.setup()`: call `ModelZoo.load_model()` instead of (plural) `.load_models()`; save to `self.model` instead of `self.model_zoo` - `ModelZoo.load_model()`: move _all_ CUDA configuration and TF/Keras-specific module initialization here (to be used only by predictor subprocess) - `Predictor`: drop stand-in `SingleModelPredictor` retrieved by `get()`; directly provide `predict()` and `output_shape` via `self.call()` - `Predictor`: drop `model` arg from all queues - now implicit; use `self.name` for model name in messages - `Predictor`: no need for requeuing other tasks (only same model now) - `Predictor`: reduce rebatching batch sizes due to increased VRAM footprint - `Eynollah.setup_models()`: set up loading `_patched` / `_resized` here instead of during `ModelZoo.load_model()` - `ModelZoo.load_models()`: for resized/patched models, call `Predictor.load_model()` with kwarg instead of resp. model name suffix - `ModelZoo.load_model()`: expect boolean kwargs `patched/resized` for `wrap_layout_model_patched/resized` model wrappers, respectively
2026-06-02 11:09:16 +02:00 · 2026-03-15 02:53:37 +01:00 · 2026-03-15 02:53:37 +01:00 · f54deff452
commit f54deff452
parent c514bbc661
3 changed files with 131 additions and 122 deletions
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -44,7 +44,6 @@ except ImportError:
    plt = None

 from .model_zoo import EynollahModelZoo
-from .predictor import Predictor
 from .utils.contour import (
    filter_contours_area_of_image,
    filter_contours_area_of_image_tables,
@ -142,7 +141,7 @@ class Eynollah:
        logger : Optional[logging.Logger] = None,
    ):
        self.logger = logger or logging.getLogger('eynollah')
-        self.model_zoo = Predictor(self.logger, model_zoo)
+        self.model_zoo = model_zoo
        self.plotter = None

        self.reading_order_machine_based = reading_order_machine_based
@ -174,26 +173,34 @@ class Eynollah:
        # load models, depending on modes
        # (note: loading too many models can cause OOM on GPU/CUDA,
        #  thus, we try set up the minimal configuration for the current mode)
+        # autosized variants: _resized or _patched (which one may depend on num_cols)
+        # (but _resized for full page images is too slow - better resize on CPU in numpy)
        loadable = [
            "col_classifier",
-            "binarization",
-            #"enhancement",
+            #"enhancement", # todo: enhancement_patched
            "page",
            #"region"
        ]
-        loadable.append(("textline"))
+        if self.input_binary:
+            loadable.append("binarization") # todo: binarization_patched
+        loadable.append("textline_patched") # textline
        loadable.append("region_1_2")
+        loadable.append("region_1_2_patched")
        if self.full_layout:
            loadable.append("region_fl_np")
-            #loadable.append("region_fl")
+            #loadable.append("region_fl_patched")
        if self.reading_order_machine_based:
-            loadable.append("reading_order")
+            loadable.append("reading_order") # todo: reading_order_patched
        if self.tables:
            loadable.append("table")

        self.model_zoo.load_models(*loadable)
        for model in loadable:
            # retrieve and cache output shapes
+            if model.endswith(('_resized', '_patched')):
+                # autosized models do not have a predefined output_shape
+                # (and don't need one)
+                continue
            self.logger.debug("model %s has output shape %s", model,
                              self.model_zoo.get(model).output_shape)

--- a/src/eynollah/model_zoo/model_zoo.py
+++ b/src/eynollah/model_zoo/model_zoo.py
@ -5,21 +5,9 @@ from copy import deepcopy
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Type, Union

-os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
-from ocrd_utils import tf_disable_interactive_logs
-tf_disable_interactive_logs()
-
-from tensorflow.keras.layers import StringLookup
-from tensorflow.keras.models import Model as KerasModel
-from tensorflow.keras.models import load_model
 from tabulate import tabulate

-from ..patch_encoder import (
-    PatchEncoder,
-    Patches,
-    wrap_layout_model_patched,
-    wrap_layout_model_resized,
-)
+from ..predictor import Predictor
 from .specs import EynollahModelSpecSet
 from .default_specs import DEFAULT_MODEL_SPECS
 from .types import AnyModel, T
@ -46,7 +34,7 @@ class EynollahModelZoo:
        self._overrides = []
        if model_overrides:
            self.override_models(*model_overrides)
-        self._loaded: Dict[str, AnyModel] = {}
+        self._loaded: Dict[str, Predictor] = {}

    @property
    def model_overrides(self):
@ -90,34 +78,60 @@ class EynollahModelZoo:
        """
        Load all models by calling load_model and return a dictionary mapping model_category to loaded model
        """
-        import tensorflow as tf
-        cuda = False
-        try:
-            for device in tf.config.list_physical_devices('GPU'):
-                tf.config.experimental.set_memory_growth(device, True)
-                cuda = True
-                self.logger.info("using GPU %s", device.name)
-        except RuntimeError:
-            self.logger.exception("cannot configure GPU devices")
-        if not cuda:
-            self.logger.warning("no GPU device available")
-        ret = {}
+        ret = {} # cannot use self._loaded here, yet – first spawn all predictors
        for load_args in all_load_args:
            if isinstance(load_args, str):
-                ret[load_args] = self.load_model(load_args)
+                model_category = load_args
+                load_args = [model_category]
            else:
-                ret[load_args[0]] = self.load_model(*load_args)
-        return ret
+                model_category = load_args[0]
+            load_kwargs = {}
+            if model_category.endswith('_resized'):
+                load_args[0] = model_category[:-8]
+                load_kwargs["resized"] = True
+            elif model_category.endswith('_patched'):
+                load_args[0] = model_category[:-8]
+                load_kwargs["patched"] = True
+            ret[model_category] = Predictor(self.logger, self)
+            ret[model_category].load_model(*load_args, **load_kwargs)
+        self._loaded.update(ret)
+        return self._loaded

    def load_model(
        self,
        model_category: str,
        model_variant: str = '',
        model_path_override: Optional[str] = None,
+            patched: bool = False,
+            resized: bool = False,
    ) -> AnyModel:
        """
        Load any model
        """
+        os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
+        from ocrd_utils import tf_disable_interactive_logs
+        tf_disable_interactive_logs()
+
+        import tensorflow as tf
+        from tensorflow.keras.models import load_model
+
+        from ..patch_encoder import (
+            PatchEncoder,
+            Patches,
+            wrap_layout_model_patched,
+            wrap_layout_model_resized,
+        )
+        cuda = False
+        try:
+            device = tf.config.list_physical_devices('GPU')[0]
+            tf.config.experimental.set_memory_growth(device, True)
+            cuda = True
+            self.logger.info("using GPU %s", device.name)
+        except RuntimeError:
+            self.logger.exception("cannot configure GPU devices")
+        if not cuda:
+            self.logger.warning("no GPU device available")
+
        if model_path_override:
            self.override_models((model_category, model_variant, model_path_override))
        model_path = self.model_path(model_category, model_variant)
@ -142,26 +156,26 @@ class EynollahModelZoo:
                    model_path, compile=False, custom_objects={"PatchEncoder": PatchEncoder, "Patches": Patches}
                )
            model._name = model_category
-        self._loaded[model_category] = model
-        # autosized for full page images is too slow (better resize on CPU in numpy):
-        # if model_category in ['region_1_2', 'table', 'region_fl_np']:
-        #     self._loaded[model_category + '_resized'] = wrap_layout_model_resized(model)
-        if model_category in ['region_1_2', 'textline']:
-            self._loaded[model_category + '_patched'] = wrap_layout_model_patched(model)
-        return model  # type: ignore
+            if resized:
+                model = wrap_layout_model_resized(model)
+                model._name = model_category + '_resized'
+            elif patched:
+                model = wrap_layout_model_patched(model)
+                model._name = model_category + '_patched'
+        return model

-    def get(self, model_category: str, model_type: Optional[Type[T]] = None) -> T:
+    def get(self, model_category: str) -> Predictor:
        if model_category not in self._loaded:
-            raise ValueError(f'Model "{model_category} not previously loaded with "load_model(..)"')
-        ret = self._loaded[model_category]
-        if model_type:
-            assert isinstance(ret, model_type)
-        return ret  # type: ignore # FIXME: convince typing that we're returning generic type
+            raise ValueError(f'Model "{model_category}" not previously loaded with "load_model(..)"')
+        return self._loaded[model_category]

    def _load_ocr_model(self, variant: str) -> AnyModel:
        """
        Load OCR model
        """
+        from tensorflow.keras.models import Model as KerasModel
+        from tensorflow.keras.models import load_model
+
        ocr_model_dir = self.model_path('ocr', variant)
        if variant == 'tr':
            from transformers import VisionEncoderDecoderModel
@ -183,10 +197,12 @@ class EynollahModelZoo:
        with open(self.model_path('num_to_char'), "r") as config_file:
            return json.load(config_file)

-    def _load_num_to_char(self) -> StringLookup:
+    def _load_num_to_char(self) -> 'StringLookup':
        """
        Load decoder for OCR
        """
+        from tensorflow.keras.layers import StringLookup
+
        characters = self._load_characters()
        # Mapping characters to integers.
        char_to_num = StringLookup(vocabulary=characters, mask_token=None)
@ -225,4 +241,5 @@ class EynollahModelZoo:
        """
        if hasattr(self, '_loaded') and getattr(self, '_loaded'):
            for needle in list(self._loaded.keys()):
+                self._loaded[needle].shutdown()
                del self._loaded[needle]
--- a/src/eynollah/predictor.py
+++ b/src/eynollah/predictor.py
@ -1,7 +1,5 @@
-import threading
 from contextlib import ExitStack
-from functools import lru_cache
-from typing import List
+from typing import List, Dict
 import logging
 import logging.handlers
 import multiprocessing as mp
@ -16,27 +14,12 @@ class Predictor(mp.context.SpawnProcess):
    """
    singleton subprocess solely responsible for prediction with TensorFlow,
    communicates with any number of worker processes,
-    acts as a shallow replacement for EynollahModelZoo
+    acting as a shallow replacement for various model types in EynollahModelZoo's
+    _loaded dict for each single model
    """
-    class SingleModelPredictor:
-        """
-        acts as a shallow replacement for EynollahModelZoo
-        """
-        def __init__(self, predictor: 'Predictor', model: str):
-            self.predictor = predictor
-            self.model = model
-        @property
-        def name(self):
-            return self.model
-        @property
-        def output_shape(self):
-            return self.predictor(self.model, {})
-        def predict(self, data: dict, verbose=0):
-            return self.predictor(self.model, data)
-
    def __init__(self, logger, model_zoo):
        self.logger = logger
-        self.loglevel = logger.level
+        self.loglevel = logger.parent.level
        self.model_zoo = model_zoo
        ctxt = mp.get_context('spawn')
        self.taskq = ctxt.Queue(maxsize=QSIZE)
@ -47,17 +30,20 @@ class Predictor(mp.context.SpawnProcess):
                # as per ocrd_utils.initLogging():
                logging.root.handlers +
                # as per eynollah_cli.main():
-                self.logger.handlers
+                self.logger.parent.handlers
            ), respect_handler_level=False).start()
        self.stopped = ctxt.Event()
        self.closable = ctxt.Manager().list()
        super().__init__(name="EynollahPredictor", daemon=True)

-    @lru_cache
-    def get(self, model: str):
-        return Predictor.SingleModelPredictor(self, model)
+    @property
+    def output_shape(self):
+        return self({})

-    def __call__(self, model: str, data: dict):
+    def predict(self, data: dict, verbose=0):
+        return self(data)
+
+    def __call__(self, data: dict):
        # unusable as per python/cpython#79967
        #with self.jobid.get_lock():
        # would work, but not public:
@ -66,12 +52,12 @@ class Predictor(mp.context.SpawnProcess):
            self.jobid.value += 1
            jobid = self.jobid.value
        if not len(data):
-            self.taskq.put((jobid, model, data))
-            #self.logger.debug("sent shape query task '%d' for model '%s'", jobid, model)
+            self.taskq.put((jobid, data))
+            #self.logger.debug("sent shape query task '%d' for model '%s'", jobid, self.name)
            return self.result(jobid)
        with share_ndarray(data) as shared_data:
-            self.taskq.put((jobid, model, shared_data))
-            #self.logger.debug("sent prediction task '%d' for model '%s': %s", jobid, model, shared_data)
+            self.taskq.put((jobid, shared_data))
+            #self.logger.debug("sent prediction task '%d' for model '%s': %s", jobid, self.name, shared_data)
            return self.result(jobid)

    def result(self, jobid):
@ -80,7 +66,7 @@ class Predictor(mp.context.SpawnProcess):
                #self.logger.debug("received result for '%d'", jobid)
                result = self.results.pop(jobid)
                if isinstance(result, Exception):
-                    raise Exception(f"predictor failed for {jobid}") from result
+                    raise Exception(f"predictor {self.name} failed for {jobid}") from result
                elif isinstance(result, dict):
                    with ndarray_shared(result) as shared_result:
                        result = np.copy(shared_result)
@ -92,7 +78,7 @@ class Predictor(mp.context.SpawnProcess):
                continue
            #self.logger.debug("storing results for '%d': '%s'", jobid0, result)
            self.results[jobid0] = result
-        raise Exception(f"predictor terminated while waiting on results for {jobid}")
+        raise Exception(f"predictor {self.name} terminated while waiting on results for {jobid}")

    def run(self):
        try:
@ -100,6 +86,7 @@ class Predictor(mp.context.SpawnProcess):
        except Exception as e:
            self.logger.exception("setup failed")
            self.stopped.set()
+            return
        closing = {}
        def close_all():
            for jobid in list(self.closable):
@ -110,63 +97,58 @@ class Predictor(mp.context.SpawnProcess):
            close_all()
            try:
                TIMEOUT = 4.5 # 1.1 too is greedy - not enough for rebatching
-                jobid, model, shared_data = self.taskq.get(timeout=TIMEOUT)
+                jobid, shared_data = self.taskq.get(timeout=TIMEOUT)
            except mp.queues.Empty:
                continue
            try:
                # up to what batch size fits into small (8GB) VRAM?
                # (notice we are not listing _resized/_patched models here,
-                #  because here inputs/outputs will have varying shapes)
+                #  because its inputs/outputs will have varying shapes)
                REBATCH_SIZE = {
                    # small models (448x448)...
-                    "col_classifier": 4,
-                    "page": 4,
-                    "binarization": 5,
-                    "enhancement": 5,
-                    "reading_order": 5,
+                    "col_classifier": 2,
+                    "page": 2,
+                    "binarization": 4,
+                    "enhancement": 4,
+                    "reading_order": 4,
                    # medium size (672x672)...
-                    "textline": 3,
+                    "textline": 2,
                    # large models...
-                    "table": 2,
-                    "region_1_2": 2,
-                    "region_fl_np": 2,
-                    "region_fl": 2,
-                }.get(model, 1)
-                loaded_model = self.model_zoo.get(model)
+                    "table": 1,
+                    "region_1_2": 1,
+                    "region_fl_np": 1,
+                    "region_fl": 1,
+                }.get(self.name, 1)
                if not len(shared_data):
-                    #self.logger.debug("getting '%d' output shape of model '%s'", jobid, model)
-                    result = loaded_model.output_shape
+                    #self.logger.debug("getting '%d' output shape of model '%s'", jobid, self.name)
+                    result = self.model.output_shape
                    self.resultq.put((jobid, result))
                    #self.logger.debug("sent result for '%d': %s", jobid, result)
                else:
-                    other_tasks = [] # other model, put back on q
-                    model_tasks = [] # same model, for rebatching
-                    model_tasks.append((jobid, shared_data))
+                    tasks = [(jobid, shared_data)]
                    batch_size = shared_data['shape'][0]
                    while (not self.taskq.empty() and
                           # climb to target batch size
-                           batch_size * len(model_tasks) < REBATCH_SIZE):
-                        jobid0, model0, shared_data0 = self.taskq.get()
-                        if model0 == model and len(shared_data0):
+                           batch_size * len(tasks) < REBATCH_SIZE):
+                        jobid0, shared_data0 = self.taskq.get()
+                        if len(shared_data0):
                            # add to our batch
-                            model_tasks.append((jobid0, shared_data0))
+                            tasks.append((jobid0, shared_data0))
                        else:
-                            other_tasks.append((jobid0, model0, shared_data0))
-                    if len(other_tasks):
-                        self.logger.debug("requeuing %d other tasks", len(other_tasks))
-                    for task in other_tasks:
-                        self.taskq.put(task)
-                    if len(model_tasks) > 1:
-                        self.logger.debug("rebatching %d %s tasks of batch size %d", len(model_tasks), model, batch_size)
+                            # immediately anser
+                            self.resultq.put((jobid0, self.model.output_shape))
+                    if len(tasks) > 1:
+                        self.logger.debug("rebatching %d '%s' tasks of batch size %d",
+                                          len(tasks), self.name, batch_size)
                    with ExitStack() as stack:
                        data = []
                        jobs = []
-                        for jobid, shared_data in model_tasks:
-                            #self.logger.debug("predicting '%d' with model '%s': ", jobid, model, shared_data)
+                        for jobid, shared_data in tasks:
+                            #self.logger.debug("predicting '%d' with model '%s': %s", jobid, self.name, shared_data)
                            jobs.append(jobid)
                            data.append(stack.enter_context(ndarray_shared(shared_data)))
                        data = np.concatenate(data)
-                        result = loaded_model.predict(data, verbose=0)
+                        result = self.model.predict(data, verbose=0)
                    results = np.split(result, len(jobs))
                    #self.logger.debug("sharing result array for '%d'", jobid)
                    with ExitStack() as stack:
@ -180,14 +162,17 @@ class Predictor(mp.context.SpawnProcess):
                            self.resultq.put((jobid, result))
                            #self.logger.debug("sent result for '%d': %s", jobid, result)
            except Exception as e:
-                self.logger.error("prediction failed: %s", e.__class__.__name__)
+                self.logger.error("prediction for %s failed: %s", self.name, e.__class__.__name__)
                result = e
                self.resultq.put((jobid, result))
        close_all()
        #self.logger.debug("predictor terminated")

-    def load_models(self, *loadable: List[str]):
-        self.loadable = loadable
+    def load_model(self, *load_args, **load_kwargs):
+        assert len(load_args)
+        self.name = '_'.join(list(load_args[:1]) + list(load_kwargs))
+        self.load_args = load_args
+        self.load_kwargs = load_kwargs
        self.start() # call run() in subprocess
        # parent context here
        del self.model_zoo # only in subprocess
@ -200,20 +185,20 @@ class Predictor(mp.context.SpawnProcess):
    def setup(self):
        logging.root.handlers = [logging.handlers.QueueHandler(self.logq)]
        self.logger.setLevel(self.loglevel)
-        self.model_zoo.load_models(*self.loadable)
+        self.model = self.model_zoo.load_model(*self.load_args, **self.load_kwargs)

    def shutdown(self):
        # do not terminate from forked processor instances
        if mp.parent_process() is None:
            self.stopped.set()
-            self.terminate()
-            self.logq.close()
            self.taskq.close()
            self.taskq.cancel_join_thread()
            self.resultq.close()
            self.resultq.cancel_join_thread()
+            self.logq.close()
+            self.terminate()
        else:
-            self.model_zoo.shutdown()
+            del self.model

    def __del__(self):
        #self.logger.debug(f"deinit of {self} in {mp.current_process().name}")