mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 11:50:00 +02:00
🐛 dinglehopper: Always work with NFC text
This commit is contained in:
parent
715b813bbc
commit
df93c80e5d
2 changed files with 3 additions and 2 deletions
|
@ -60,8 +60,8 @@ def process(gt, ocr, report_prefix):
|
||||||
|
|
||||||
char_diff_report = gen_diff_report(gt_text, ocr_text, css_prefix='c', joiner='', none='·', align=align)
|
char_diff_report = gen_diff_report(gt_text, ocr_text, css_prefix='c', joiner='', none='·', align=align)
|
||||||
|
|
||||||
gt_words = words(gt_text)
|
gt_words = words_normalized(gt_text)
|
||||||
ocr_words = words(ocr_text)
|
ocr_words = words_normalized(ocr_text)
|
||||||
word_diff_report = gen_diff_report(gt_words, ocr_words, css_prefix='w', joiner=' ', none='⋯', align=seq_align)
|
word_diff_report = gen_diff_report(gt_words, ocr_words, css_prefix='w', joiner=' ', none='⋯', align=seq_align)
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates')))
|
env = Environment(loader=FileSystemLoader(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates')))
|
||||||
|
|
|
@ -36,6 +36,7 @@ def substitute_equivalences(s):
|
||||||
'\uF50E': 'q́' # U+F50E LATIN SMALL LETTER Q WITH ACUTE ACCENT
|
'\uF50E': 'q́' # U+F50E LATIN SMALL LETTER Q WITH ACUTE ACCENT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s = unicodedata.normalize('NFC', s)
|
||||||
for fr, to in equivalences.items():
|
for fr, to in equivalences.items():
|
||||||
s = s.replace(fr, to)
|
s = s.replace(fr, to)
|
||||||
return s
|
return s
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue