|
|
@ -32,7 +32,7 @@ def common_suffix(its):
|
|
|
|
|
|
|
|
|
|
|
|
def removesuffix(text, suffix):
|
|
|
|
def removesuffix(text, suffix):
|
|
|
|
if suffix and text.endswith(suffix):
|
|
|
|
if suffix and text.endswith(suffix):
|
|
|
|
return text[:-len(suffix)]
|
|
|
|
return text[: -len(suffix)]
|
|
|
|
return text
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -52,7 +52,9 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
|
|
|
|
ocr = removesuffix(gt, gt_suffix) + ocr_suffix
|
|
|
|
ocr = removesuffix(gt, gt_suffix) + ocr_suffix
|
|
|
|
|
|
|
|
|
|
|
|
gt_text = plain_extract(os.path.join(gt_dir, gt), include_filename_in_id=True)
|
|
|
|
gt_text = plain_extract(os.path.join(gt_dir, gt), include_filename_in_id=True)
|
|
|
|
ocr_text = plain_extract(os.path.join(ocr_dir, ocr), include_filename_in_id=True)
|
|
|
|
ocr_text = plain_extract(
|
|
|
|
|
|
|
|
os.path.join(ocr_dir, ocr), include_filename_in_id=True
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Compute CER
|
|
|
|
# Compute CER
|
|
|
|
l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)
|
|
|
|
l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)
|
|
|
@ -60,7 +62,9 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
|
|
|
|
cer, n_characters = l_cer, l_n_characters
|
|
|
|
cer, n_characters = l_cer, l_n_characters
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
# Rolling update
|
|
|
|
# Rolling update
|
|
|
|
cer = (cer * n_characters + l_cer * l_n_characters) / (n_characters + l_n_characters)
|
|
|
|
cer = (cer * n_characters + l_cer * l_n_characters) / (
|
|
|
|
|
|
|
|
n_characters + l_n_characters
|
|
|
|
|
|
|
|
)
|
|
|
|
n_characters = n_characters + l_n_characters
|
|
|
|
n_characters = n_characters + l_n_characters
|
|
|
|
|
|
|
|
|
|
|
|
# Compute WER
|
|
|
|
# Compute WER
|
|
|
|