generate or update list of characters in the case of cnn-rnn ocr training

This commit is contained in:
vahidrezanezhad 2026-02-03 20:20:20 +01:00
parent 60f0fb541d
commit fff4253352
2 changed files with 61 additions and 0 deletions

View file

@ -10,6 +10,7 @@ from .inference import main as inference_cli
from .train import ex
from .extract_line_gt import linegt_cli
from .weights_ensembling import main as ensemble_cli
from .generate_or_update_cnn_rnn_ocr_character_list import main as update_ocr_characters_cli
@click.command(context_settings=dict(
ignore_unknown_options=True,
@ -28,3 +29,4 @@ main.add_command(inference_cli, 'inference')
main.add_command(train_cli, 'train')
main.add_command(linegt_cli, 'export_textline_images_and_text')
main.add_command(ensemble_cli, 'ensembling')
main.add_command(update_ocr_characters_cli, 'generate_or_update_cnn_rnn_ocr_character_list')

View file

@ -0,0 +1,59 @@
import os
import numpy as np
import json
import click
import logging
def run_character_list_update(dir_labels, out, current_character_list):
ls_labels = os.listdir(dir_labels)
ls_labels = [ind for ind in ls_labels if ind.endswith('.txt')]
if current_character_list:
with open(current_character_list, 'r') as f_name:
characters = json.load(f_name)
characters = set(characters)
else:
characters = set()
for ind in ls_labels:
label = open(os.path.join(dir_labels,ind),'r').read().split('\n')[0]
for char in label:
characters.add(char)
characters = sorted(list(set(characters)))
with open(out, 'w') as f_name:
json.dump(characters, f_name)
@click.command()
@click.option(
"--dir_labels",
"-dl",
help="directory of labels which are txt files",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--current_character_list",
"-ccl",
help="current exsiting character list which is txt file and wished to be updated with a set of labels",
type=click.Path(exists=True, file_okay=True),
required=False,
)
@click.option(
"--out",
"-o",
help="output file which is a txt file where generated or updated character list will be written",
type=click.Path(exists=False, file_okay=True),
)
def main(dir_labels, out, current_character_list):
run_character_list_update(dir_labels, out, current_character_list)