From db6292611fb739baf20038ac0a7e63847bd6a96f Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Wed, 7 Oct 2020 16:07:27 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20dinglehopper:=20Remove=20merged?=
 =?UTF-8?q?=20text=20extraction=20test=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 extracted_text.py      | 51 ------------------------------------------
 extracted_text_test.py | 22 ------------------
 2 files changed, 73 deletions(-)
 delete mode 100644 extracted_text.py
 delete mode 100644 extracted_text_test.py

diff --git a/extracted_text.py b/extracted_text.py
deleted file mode 100644
index c84c77b..0000000
--- a/extracted_text.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import attr
-import unicodedata
-import enum
-
-
-# TODO handle grapheme cluster positions?
-# TODO Use type annotations for attr.ib types when support for Python 3.5 is dropped
-# TODO types are not validated (attr does not do this yet)
-
-
-@attr.s(frozen=True)
-class ExtractedText:
-    segments = attr.ib()
-    joiner = attr.ib(type=str)
-
-    @property
-    def text(self):
-        return self.joiner.join(s.text for s in self.segments)
-
-    def segment_id_for_pos(self, pos):
-        i = 0
-        for s in self.segments:
-            if i <= pos < i + len(s.text):
-                return s.id
-            i += len(s.text)
-            if i <= pos < i + len(self.joiner):
-                return None
-            i += len(self.joiner)
-
-
-class Normalization(enum.Enum):
-    NFC = 1
-    NFC_MUFI = 2
-
-
-def normalize(text, normalization):
-    if normalization == Normalization.NFC:
-        return unicodedata.normalize('NFC', text)
-    else:
-        raise ValueError()
-
-
-@attr.s(frozen=True)
-class ExtractedTextSegment:
-    id = attr.ib(type=str)
-    text = attr.ib(type=str)
-    @text.validator
-    def check(self, attribute, value):
-        if normalize(value, self.normalization) != value:
-            raise ValueError('String "{}" is not normalized.'.format(value))
-    normalization = attr.ib(converter=Normalization, default=Normalization.NFC)
diff --git a/extracted_text_test.py b/extracted_text_test.py
deleted file mode 100644
index 4919a76..0000000
--- a/extracted_text_test.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import unicodedata
-import pytest
-from extracted_text import ExtractedText, ExtractedTextSegment
-
-
-def test_text():
-    test1 = ExtractedText([
-        ExtractedTextSegment('s0', 'foo'),
-        ExtractedTextSegment('s1', 'bar'),
-        ExtractedTextSegment('s2', 'bazinga')
-    ], ' ')
-
-    assert test1.text == 'foo bar bazinga'
-    assert test1.segment_id_for_pos(0) == 's0'
-    assert test1.segment_id_for_pos(3) is None
-    assert test1.segment_id_for_pos(10) == 's2'
-
-
-def test_normalization_check():
-    with pytest.raises(ValueError, match=r'.*is not normalized.*'):
-        ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ'))
-    assert ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))