1
0
Fork 0
mirror of https://github.com/qurator-spk/dinglehopper.git synced 2025-07-01 06:29:59 +02:00

🚧 dinglehopper: WIP data structure for extracted text

This commit is contained in:
Gerber, Mike 2020-06-10 19:49:12 +02:00
parent 4bd30e6686
commit 89852314dc
2 changed files with 3 additions and 2 deletions

View file

@ -2,6 +2,9 @@ import attr
import unicodedata
# TODO handle grapheme cluster positions?
@attr.s(frozen=True)
class ExtractedText:
segments = attr.ib()

View file

@ -13,7 +13,5 @@ def test_text():
assert test1.segment_id_for_pos(3) is None
assert test1.segment_id_for_pos(10) == 's2'
# TODO handle grapheme cluster positions?
# ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ'))
ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))