mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-02-20 16:32:03 +01:00
generate or update list of characters in the case of cnn-rnn ocr training
This commit is contained in:
parent
60f0fb541d
commit
fff4253352
2 changed files with 61 additions and 0 deletions
|
|
@ -10,6 +10,7 @@ from .inference import main as inference_cli
|
|||
from .train import ex
|
||||
from .extract_line_gt import linegt_cli
|
||||
from .weights_ensembling import main as ensemble_cli
|
||||
from .generate_or_update_cnn_rnn_ocr_character_list import main as update_ocr_characters_cli
|
||||
|
||||
@click.command(context_settings=dict(
|
||||
ignore_unknown_options=True,
|
||||
|
|
@ -28,3 +29,4 @@ main.add_command(inference_cli, 'inference')
|
|||
main.add_command(train_cli, 'train')
|
||||
main.add_command(linegt_cli, 'export_textline_images_and_text')
|
||||
main.add_command(ensemble_cli, 'ensembling')
|
||||
main.add_command(update_ocr_characters_cli, 'generate_or_update_cnn_rnn_ocr_character_list')
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
import click
|
||||
import logging
|
||||
|
||||
|
||||
|
||||
def run_character_list_update(dir_labels, out, current_character_list):
|
||||
ls_labels = os.listdir(dir_labels)
|
||||
ls_labels = [ind for ind in ls_labels if ind.endswith('.txt')]
|
||||
|
||||
if current_character_list:
|
||||
with open(current_character_list, 'r') as f_name:
|
||||
characters = json.load(f_name)
|
||||
|
||||
characters = set(characters)
|
||||
else:
|
||||
characters = set()
|
||||
|
||||
|
||||
for ind in ls_labels:
|
||||
label = open(os.path.join(dir_labels,ind),'r').read().split('\n')[0]
|
||||
|
||||
for char in label:
|
||||
characters.add(char)
|
||||
|
||||
|
||||
characters = sorted(list(set(characters)))
|
||||
|
||||
with open(out, 'w') as f_name:
|
||||
json.dump(characters, f_name)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
"--dir_labels",
|
||||
"-dl",
|
||||
help="directory of labels which are txt files",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--current_character_list",
|
||||
"-ccl",
|
||||
help="current exsiting character list which is txt file and wished to be updated with a set of labels",
|
||||
type=click.Path(exists=True, file_okay=True),
|
||||
required=False,
|
||||
)
|
||||
@click.option(
|
||||
"--out",
|
||||
"-o",
|
||||
help="output file which is a txt file where generated or updated character list will be written",
|
||||
type=click.Path(exists=False, file_okay=True),
|
||||
)
|
||||
|
||||
def main(dir_labels, out, current_character_list):
|
||||
run_character_list_update(dir_labels, out, current_character_list)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue