diff --git a/src/eynollah/eynollah_ocr.py b/src/eynollah/eynollah_ocr.py index 3c918e5..6eab7f6 100644 --- a/src/eynollah/eynollah_ocr.py +++ b/src/eynollah/eynollah_ocr.py @@ -72,7 +72,7 @@ class Eynollah_ocr: self.model_zoo.load_model('ocr', '') self.model_zoo.load_model('num_to_char') self.model_zoo.load_model('characters') - self.end_character = len(self.model_zoo.get('characters', list)) + 2 + self.end_character = len(self.model_zoo.get('characters')) + 2 @property def device(self): diff --git a/src/eynollah/model_zoo/model_zoo.py b/src/eynollah/model_zoo/model_zoo.py index fffd389..7945c55 100644 --- a/src/eynollah/model_zoo/model_zoo.py +++ b/src/eynollah/model_zoo/model_zoo.py @@ -197,9 +197,12 @@ class EynollahModelZoo: return model def get(self, model_category: str) -> Predictor: - if model_category not in self._loaded: - raise ValueError(f'Model "{model_category}" not previously loaded with "load_model(..)"') - return self._loaded[model_category] + # if model_category not in self._loaded: + # raise ValueError(f'Model "{model_category}" not previously loaded with "load_model(..)"') + if model_category in self._loaded: + return self._loaded[model_category] + else: + return self.load_model(model_category) def _load_ocr_model(self, variant: str) -> AnyModel: """ diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index c946541..51f56b7 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -162,43 +162,50 @@ "version_range": "< v0.7.0" } ] - } - }, - "ocrd-eynollah-recognize": { - "executable": "ocrd-eynollah-recognize", - "categories": ["Text recognition and optimization"], - "steps": ["recognition/text-recognition"], - "input_file_grp_cardinality": 1, - "output_file_grp_cardinality": 1, - "parameters": { - "models": { - "type": "string", - "format": "uri", - "content-type": "text/directory", - "cacheable": true, - "description": "Directory containing the eynollah_models directory", - "required": true - }, - "do_not_mask_with_textline_contour": { - "type": "boolean", - "description": "if this parameter set to true, cropped textline images will not be masked with textline contour.", - "default": false - }, - "tr_ocr": { - "type": "boolean", - "description": "Whether to use (much more resource-intensive) transformer model", - "default": false - } }, - "resources": [ - { - "url": "https://zenodo.org/records/17580627/files/models_ocr_v0_6_0.tar.gz?download=1", - "name": "models_ocr_v0_6_0", - "type": "archive", - "size": 6119874002, - "description": "Models for OCR", - "version_range": ">= v0.6.0" - } - ] + "ocrd-eynollah-recognize": { + "executable": "ocrd-eynollah-recognize", + "categories": ["Text recognition and optimization"], + "steps": ["recognition/text-recognition"], + "input_file_grp_cardinality": 1, + "output_file_grp_cardinality": 1, + "description": "Recognize text with eynollah (CNN/RNN or Transformer)", + "parameters": { + "models": { + "type": "string", + "format": "uri", + "content-type": "text/directory", + "cacheable": true, + "description": "Directory containing the eynollah_models directory", + "required": true + }, + "do_not_mask_with_textline_contour": { + "type": "boolean", + "description": "if this parameter set to true, cropped textline images will not be masked with textline contour.", + "default": false + }, + "tr_ocr": { + "type": "boolean", + "description": "Whether to use (much more resource-intensive) transformer model", + "default": false + }, + "batch_size": { + "type": "number", + "format": "integer", + "description": "Batch size, leave as 0 for builtin default (2 for CNN/RNN, 2 for TrOCR)", + "default": 0 + } + }, + "resources": [ + { + "url": "https://zenodo.org/records/17580627/files/models_ocr_v0_6_0.tar.gz?download=1", + "name": "models_ocr_v0_6_0", + "type": "archive", + "size": 6119874002, + "description": "Models for OCR", + "version_range": ">= v0.6.0" + } + ] + } } } diff --git a/src/eynollah/ocrd_cli_recognize.py b/src/eynollah/ocrd_cli_recognize.py index aed5bf2..8e3cc6f 100644 --- a/src/eynollah/ocrd_cli_recognize.py +++ b/src/eynollah/ocrd_cli_recognize.py @@ -34,8 +34,8 @@ class EynollahRecognizeProcessor(Processor): model_zoo=model_zoo, tr_ocr=self.parameter['tr_ocr'], do_not_mask_with_textline_contour=self.parameter['do_not_mask_with_textline_contour'], - batch_size=self.parameter['batch_size'], - min_conf_value_of_textline_text=self.parameter['min_conf_value_of_textline_text']) + batch_size=self.parameter['batch_size'] if self.parameter['batch_size'] >= 0 else 2 if self.parameter['tr_ocr'] else 8, + min_conf_value_of_textline_text=0) # FIXME: This is just a proof-of-concept, very inefficient and non-conformant # TODO: OCR writing should use PAGE API once result dataclass mechanism is settled,