mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-02-20 16:32:03 +01:00
generate or update list of characters in the case of cnn-rnn ocr training
This commit is contained in:
parent
60f0fb541d
commit
fff4253352
2 changed files with 61 additions and 0 deletions
|
|
@ -10,6 +10,7 @@ from .inference import main as inference_cli
|
||||||
from .train import ex
|
from .train import ex
|
||||||
from .extract_line_gt import linegt_cli
|
from .extract_line_gt import linegt_cli
|
||||||
from .weights_ensembling import main as ensemble_cli
|
from .weights_ensembling import main as ensemble_cli
|
||||||
|
from .generate_or_update_cnn_rnn_ocr_character_list import main as update_ocr_characters_cli
|
||||||
|
|
||||||
@click.command(context_settings=dict(
|
@click.command(context_settings=dict(
|
||||||
ignore_unknown_options=True,
|
ignore_unknown_options=True,
|
||||||
|
|
@ -28,3 +29,4 @@ main.add_command(inference_cli, 'inference')
|
||||||
main.add_command(train_cli, 'train')
|
main.add_command(train_cli, 'train')
|
||||||
main.add_command(linegt_cli, 'export_textline_images_and_text')
|
main.add_command(linegt_cli, 'export_textline_images_and_text')
|
||||||
main.add_command(ensemble_cli, 'ensembling')
|
main.add_command(ensemble_cli, 'ensembling')
|
||||||
|
main.add_command(update_ocr_characters_cli, 'generate_or_update_cnn_rnn_ocr_character_list')
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import json
|
||||||
|
import click
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def run_character_list_update(dir_labels, out, current_character_list):
|
||||||
|
ls_labels = os.listdir(dir_labels)
|
||||||
|
ls_labels = [ind for ind in ls_labels if ind.endswith('.txt')]
|
||||||
|
|
||||||
|
if current_character_list:
|
||||||
|
with open(current_character_list, 'r') as f_name:
|
||||||
|
characters = json.load(f_name)
|
||||||
|
|
||||||
|
characters = set(characters)
|
||||||
|
else:
|
||||||
|
characters = set()
|
||||||
|
|
||||||
|
|
||||||
|
for ind in ls_labels:
|
||||||
|
label = open(os.path.join(dir_labels,ind),'r').read().split('\n')[0]
|
||||||
|
|
||||||
|
for char in label:
|
||||||
|
characters.add(char)
|
||||||
|
|
||||||
|
|
||||||
|
characters = sorted(list(set(characters)))
|
||||||
|
|
||||||
|
with open(out, 'w') as f_name:
|
||||||
|
json.dump(characters, f_name)
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option(
|
||||||
|
"--dir_labels",
|
||||||
|
"-dl",
|
||||||
|
help="directory of labels which are txt files",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--current_character_list",
|
||||||
|
"-ccl",
|
||||||
|
help="current exsiting character list which is txt file and wished to be updated with a set of labels",
|
||||||
|
type=click.Path(exists=True, file_okay=True),
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--out",
|
||||||
|
"-o",
|
||||||
|
help="output file which is a txt file where generated or updated character list will be written",
|
||||||
|
type=click.Path(exists=False, file_okay=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
def main(dir_labels, out, current_character_list):
|
||||||
|
run_character_list_update(dir_labels, out, current_character_list)
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue