🐛 dinglehopper: Always work with NFC text

pull/3/head
Gerber, Mike 5 years ago
parent 715b813bbc
commit df93c80e5d

@ -60,8 +60,8 @@ def process(gt, ocr, report_prefix):
char_diff_report = gen_diff_report(gt_text, ocr_text, css_prefix='c', joiner='', none='·', align=align) char_diff_report = gen_diff_report(gt_text, ocr_text, css_prefix='c', joiner='', none='·', align=align)
gt_words = words(gt_text) gt_words = words_normalized(gt_text)
ocr_words = words(ocr_text) ocr_words = words_normalized(ocr_text)
word_diff_report = gen_diff_report(gt_words, ocr_words, css_prefix='w', joiner=' ', none='', align=seq_align) word_diff_report = gen_diff_report(gt_words, ocr_words, css_prefix='w', joiner=' ', none='', align=seq_align)
env = Environment(loader=FileSystemLoader(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates'))) env = Environment(loader=FileSystemLoader(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates')))

@ -36,6 +36,7 @@ def substitute_equivalences(s):
'\uF50E': '' # U+F50E LATIN SMALL LETTER Q WITH ACUTE ACCENT '\uF50E': '' # U+F50E LATIN SMALL LETTER Q WITH ACUTE ACCENT
} }
s = unicodedata.normalize('NFC', s)
for fr, to in equivalences.items(): for fr, to in equivalences.items():
s = s.replace(fr, to) s = s.replace(fr, to)
return s return s

Loading…
Cancel
Save