mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-04 07:59:59 +02:00
🚧 dinglehopper: WIP data structure for extracted text
This commit is contained in:
parent
93608ba697
commit
8e3a19d7e9
2 changed files with 3 additions and 2 deletions
|
@ -2,6 +2,9 @@ import attr
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
|
|
||||||
|
# TODO handle grapheme cluster positions?
|
||||||
|
|
||||||
|
|
||||||
@attr.s(frozen=True)
|
@attr.s(frozen=True)
|
||||||
class ExtractedText:
|
class ExtractedText:
|
||||||
segments = attr.ib()
|
segments = attr.ib()
|
||||||
|
|
|
@ -13,7 +13,5 @@ def test_text():
|
||||||
assert test1.segment_id_for_pos(3) is None
|
assert test1.segment_id_for_pos(3) is None
|
||||||
assert test1.segment_id_for_pos(10) == 's2'
|
assert test1.segment_id_for_pos(10) == 's2'
|
||||||
|
|
||||||
# TODO handle grapheme cluster positions?
|
|
||||||
|
|
||||||
# ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ'))
|
# ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ'))
|
||||||
ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))
|
ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue