mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 11:50:00 +02:00
🚧 Add option for text encoding to line dir cli
This commit is contained in:
parent
0920247a45
commit
eca76a7db2
1 changed files with 17 additions and 2 deletions
|
@ -114,6 +114,7 @@ def process(
|
||||||
metrics: bool = True,
|
metrics: bool = True,
|
||||||
differences: bool = False,
|
differences: bool = False,
|
||||||
textequiv_level: str = "region",
|
textequiv_level: str = "region",
|
||||||
|
plain_encoding: str = "autodetect",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Check OCR result against GT.
|
"""Check OCR result against GT.
|
||||||
|
|
||||||
|
@ -121,8 +122,12 @@ def process(
|
||||||
this undecorated version and use Click on a wrapper.
|
this undecorated version and use Click on a wrapper.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
gt_text = extract(gt, textequiv_level=textequiv_level)
|
gt_text = extract(
|
||||||
ocr_text = extract(ocr, textequiv_level=textequiv_level)
|
gt, textequiv_level=textequiv_level, plain_encoding=plain_encoding
|
||||||
|
)
|
||||||
|
ocr_text = extract(
|
||||||
|
ocr, textequiv_level=textequiv_level, plain_encoding=plain_encoding
|
||||||
|
)
|
||||||
gt_words: List[str] = list(words_normalized(gt_text))
|
gt_words: List[str] = list(words_normalized(gt_text))
|
||||||
ocr_words: List[str] = list(words_normalized(ocr_text))
|
ocr_words: List[str] = list(words_normalized(ocr_text))
|
||||||
|
|
||||||
|
@ -195,6 +200,7 @@ def process_dir(
|
||||||
metrics: bool = True,
|
metrics: bool = True,
|
||||||
differences: bool = False,
|
differences: bool = False,
|
||||||
textequiv_level: str = "region",
|
textequiv_level: str = "region",
|
||||||
|
plain_encoding: str = "autodetect",
|
||||||
) -> None:
|
) -> None:
|
||||||
for gt_file in os.listdir(gt):
|
for gt_file in os.listdir(gt):
|
||||||
gt_file_path = os.path.join(gt, gt_file)
|
gt_file_path = os.path.join(gt, gt_file)
|
||||||
|
@ -209,6 +215,7 @@ def process_dir(
|
||||||
metrics=metrics,
|
metrics=metrics,
|
||||||
differences=differences,
|
differences=differences,
|
||||||
textequiv_level=textequiv_level,
|
textequiv_level=textequiv_level,
|
||||||
|
plain_encoding=plain_encoding,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
print("Skipping {0} and {1}".format(gt_file_path, ocr_file_path))
|
print("Skipping {0} and {1}".format(gt_file_path, ocr_file_path))
|
||||||
|
@ -233,6 +240,11 @@ def process_dir(
|
||||||
help="PAGE TextEquiv level to extract text from",
|
help="PAGE TextEquiv level to extract text from",
|
||||||
metavar="LEVEL",
|
metavar="LEVEL",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--plain-encoding",
|
||||||
|
default="autodetect",
|
||||||
|
help='Encoding (e.g. "utf-8") of plain text files',
|
||||||
|
)
|
||||||
@click.option("--progress", default=False, is_flag=True, help="Show progress bar")
|
@click.option("--progress", default=False, is_flag=True, help="Show progress bar")
|
||||||
@click.version_option()
|
@click.version_option()
|
||||||
def main(
|
def main(
|
||||||
|
@ -243,6 +255,7 @@ def main(
|
||||||
metrics,
|
metrics,
|
||||||
differences,
|
differences,
|
||||||
textequiv_level,
|
textequiv_level,
|
||||||
|
plain_encoding,
|
||||||
progress,
|
progress,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -280,6 +293,7 @@ def main(
|
||||||
metrics=metrics,
|
metrics=metrics,
|
||||||
differences=differences,
|
differences=differences,
|
||||||
textequiv_level=textequiv_level,
|
textequiv_level=textequiv_level,
|
||||||
|
plain_encoding=plain_encoding,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
process(
|
process(
|
||||||
|
@ -290,6 +304,7 @@ def main(
|
||||||
metrics=metrics,
|
metrics=metrics,
|
||||||
differences=differences,
|
differences=differences,
|
||||||
textequiv_level=textequiv_level,
|
textequiv_level=textequiv_level,
|
||||||
|
plain_encoding=plain_encoding,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue