mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 03:40:12 +02:00
🚧 Add option for text encoding to line dir cli
This commit is contained in:
parent
0920247a45
commit
eca76a7db2
1 changed files with 17 additions and 2 deletions
|
@ -114,6 +114,7 @@ def process(
|
|||
metrics: bool = True,
|
||||
differences: bool = False,
|
||||
textequiv_level: str = "region",
|
||||
plain_encoding: str = "autodetect",
|
||||
) -> None:
|
||||
"""Check OCR result against GT.
|
||||
|
||||
|
@ -121,8 +122,12 @@ def process(
|
|||
this undecorated version and use Click on a wrapper.
|
||||
"""
|
||||
|
||||
gt_text = extract(gt, textequiv_level=textequiv_level)
|
||||
ocr_text = extract(ocr, textequiv_level=textequiv_level)
|
||||
gt_text = extract(
|
||||
gt, textequiv_level=textequiv_level, plain_encoding=plain_encoding
|
||||
)
|
||||
ocr_text = extract(
|
||||
ocr, textequiv_level=textequiv_level, plain_encoding=plain_encoding
|
||||
)
|
||||
gt_words: List[str] = list(words_normalized(gt_text))
|
||||
ocr_words: List[str] = list(words_normalized(ocr_text))
|
||||
|
||||
|
@ -195,6 +200,7 @@ def process_dir(
|
|||
metrics: bool = True,
|
||||
differences: bool = False,
|
||||
textequiv_level: str = "region",
|
||||
plain_encoding: str = "autodetect",
|
||||
) -> None:
|
||||
for gt_file in os.listdir(gt):
|
||||
gt_file_path = os.path.join(gt, gt_file)
|
||||
|
@ -209,6 +215,7 @@ def process_dir(
|
|||
metrics=metrics,
|
||||
differences=differences,
|
||||
textequiv_level=textequiv_level,
|
||||
plain_encoding=plain_encoding,
|
||||
)
|
||||
else:
|
||||
print("Skipping {0} and {1}".format(gt_file_path, ocr_file_path))
|
||||
|
@ -233,6 +240,11 @@ def process_dir(
|
|||
help="PAGE TextEquiv level to extract text from",
|
||||
metavar="LEVEL",
|
||||
)
|
||||
@click.option(
|
||||
"--plain-encoding",
|
||||
default="autodetect",
|
||||
help='Encoding (e.g. "utf-8") of plain text files',
|
||||
)
|
||||
@click.option("--progress", default=False, is_flag=True, help="Show progress bar")
|
||||
@click.version_option()
|
||||
def main(
|
||||
|
@ -243,6 +255,7 @@ def main(
|
|||
metrics,
|
||||
differences,
|
||||
textequiv_level,
|
||||
plain_encoding,
|
||||
progress,
|
||||
):
|
||||
"""
|
||||
|
@ -280,6 +293,7 @@ def main(
|
|||
metrics=metrics,
|
||||
differences=differences,
|
||||
textequiv_level=textequiv_level,
|
||||
plain_encoding=plain_encoding,
|
||||
)
|
||||
else:
|
||||
process(
|
||||
|
@ -290,6 +304,7 @@ def main(
|
|||
metrics=metrics,
|
||||
differences=differences,
|
||||
textequiv_level=textequiv_level,
|
||||
plain_encoding=plain_encoding,
|
||||
)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue