From 4181e03bc9798dd796e4aabe440f7791d49ffe90 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 12 Jun 2026 14:48:47 +0200 Subject: [PATCH] =?UTF-8?q?`training=20convert=20--rebuild`=20for=20cnn-rn?= =?UTF-8?q?n-ocr:=20override=20charset=20file=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when rebuilding the inference model for cnn-rnn-ocr, - open the old `characters_org.txt` file for the charset - use it to pass the actual `n_classes` (overriding the config) - use its path to pass the `characters_txt_file` (overriding the config) --- src/eynollah/training/convert.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/eynollah/training/convert.py b/src/eynollah/training/convert.py index 140079e..d2d7b49 100644 --- a/src/eynollah/training/convert.py +++ b/src/eynollah/training/convert.py @@ -68,6 +68,12 @@ def convert_cli(rebuild, format_, in_, out): ex.add_config(str(config_path)) # some models deviate between training and inference ex.add_config(inference=True) + # make sure the local vocab file gets re-used + characters_txt_file = model_path / "characters_org.txt" + with open(characters_txt_file, "r") as voc_file: + voc = json.load(voc_file) + ex.add_config(characters_txt_file=characters_txt_file) + ex.add_config(n_classes=len(voc) + 3) # just retrieve final config (via pseudo-run) ex.main(lambda: 0) config = ex.run(options={'--loglevel': 'ERROR'}).config