🚧 dinglehopper: WIP data structure for extracted text

pull/38/head
Gerber, Mike 4 years ago
parent 4e9b0aeef1
commit 96273b026d

@ -2,6 +2,9 @@ import attr
import unicodedata import unicodedata
# TODO handle grapheme cluster positions?
@attr.s(frozen=True) @attr.s(frozen=True)
class ExtractedText: class ExtractedText:
segments = attr.ib() segments = attr.ib()

@ -13,7 +13,5 @@ def test_text():
assert test1.segment_id_for_pos(3) is None assert test1.segment_id_for_pos(3) is None
assert test1.segment_id_for_pos(10) == 's2' assert test1.segment_id_for_pos(10) == 's2'
# TODO handle grapheme cluster positions?
# ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ')) # ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ'))
ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ')) ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))

Loading…
Cancel
Save