🧹 dinglehopper: Remove merged text extraction test code

2026-06-19 03:19:16 +02:00 · 2020-10-07 16:07:27 +02:00 · 2020-10-07 16:07:27 +02:00 · db6292611f
commit db6292611f
parent 7e3dafd3bc
2 changed files with 0 additions and 73 deletions
--- a/extracted_text.py
+++ b/extracted_text.py
@ -1,51 +0,0 @@
-import attr
-import unicodedata
-import enum
-
-
-# TODO handle grapheme cluster positions?
-# TODO Use type annotations for attr.ib types when support for Python 3.5 is dropped
-# TODO types are not validated (attr does not do this yet)
-
-
-@attr.s(frozen=True)
-class ExtractedText:
-    segments = attr.ib()
-    joiner = attr.ib(type=str)
-
-    @property
-    def text(self):
-        return self.joiner.join(s.text for s in self.segments)
-
-    def segment_id_for_pos(self, pos):
-        i = 0
-        for s in self.segments:
-            if i <= pos < i + len(s.text):
-                return s.id
-            i += len(s.text)
-            if i <= pos < i + len(self.joiner):
-                return None
-            i += len(self.joiner)
-
-
-class Normalization(enum.Enum):
-    NFC = 1
-    NFC_MUFI = 2
-
-
-def normalize(text, normalization):
-    if normalization == Normalization.NFC:
-        return unicodedata.normalize('NFC', text)
-    else:
-        raise ValueError()
-
-
-@attr.s(frozen=True)
-class ExtractedTextSegment:
-    id = attr.ib(type=str)
-    text = attr.ib(type=str)
-    @text.validator
-    def check(self, attribute, value):
-        if normalize(value, self.normalization) != value:
-            raise ValueError('String "{}" is not normalized.'.format(value))
-    normalization = attr.ib(converter=Normalization, default=Normalization.NFC)
--- a/extracted_text_test.py
+++ b/extracted_text_test.py
@ -1,22 +0,0 @@
-import unicodedata
-import pytest
-from extracted_text import ExtractedText, ExtractedTextSegment
-
-
-def test_text():
-    test1 = ExtractedText([
-        ExtractedTextSegment('s0', 'foo'),
-        ExtractedTextSegment('s1', 'bar'),
-        ExtractedTextSegment('s2', 'bazinga')
-    ], ' ')
-
-    assert test1.text == 'foo bar bazinga'
-    assert test1.segment_id_for_pos(0) == 's0'
-    assert test1.segment_id_for_pos(3) is None
-    assert test1.segment_id_for_pos(10) == 's2'
-
-
-def test_normalization_check():
-    with pytest.raises(ValueError, match=r'.*is not normalized.*'):
-        ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ'))
-    assert ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))