mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-01 14:40:00 +02:00
🚧 dinglehopper: WIP data structure for extracted text
This commit is contained in:
parent
4e9b0aeef1
commit
96273b026d
2 changed files with 3 additions and 2 deletions
|
@ -2,6 +2,9 @@ import attr
|
|||
import unicodedata
|
||||
|
||||
|
||||
# TODO handle grapheme cluster positions?
|
||||
|
||||
|
||||
@attr.s(frozen=True)
|
||||
class ExtractedText:
|
||||
segments = attr.ib()
|
||||
|
|
|
@ -13,7 +13,5 @@ def test_text():
|
|||
assert test1.segment_id_for_pos(3) is None
|
||||
assert test1.segment_id_for_pos(10) == 's2'
|
||||
|
||||
# TODO handle grapheme cluster positions?
|
||||
|
||||
# ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ'))
|
||||
ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue