diff --git a/extracted_text.py b/extracted_text.py index f99c8ac..a76f402 100644 --- a/extracted_text.py +++ b/extracted_text.py @@ -43,19 +43,3 @@ class ExtractedTextSegment: if normalize(value, self.normalization) != value: raise ValueError('String "{}" is not normalized.'.format(value)) normalization = attr.ib(default=NORM_NFC) - - -test1 = ExtractedText([ - ExtractedTextSegment('s0', 'foo'), - ExtractedTextSegment('s1', 'bar'), - ExtractedTextSegment('s2', 'bazinga') -], ' ') - - -assert test1.text == 'foo bar bazinga' -assert test1.segment_id_for_pos(0) == 's0' -assert test1.segment_id_for_pos(3) == None -assert test1.segment_id_for_pos(10) == 's2' - -# ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ')) -ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ')) diff --git a/extracted_text_test.py b/extracted_text_test.py new file mode 100644 index 0000000..29fabfe --- /dev/null +++ b/extracted_text_test.py @@ -0,0 +1,19 @@ +from extracted_text import * + +def test_text(): + test1 = ExtractedText([ + ExtractedTextSegment('s0', 'foo'), + ExtractedTextSegment('s1', 'bar'), + ExtractedTextSegment('s2', 'bazinga') + ], ' ') + + + assert test1.text == 'foo bar bazinga' + assert test1.segment_id_for_pos(0) == 's0' + assert test1.segment_id_for_pos(3) is None + assert test1.segment_id_for_pos(10) == 's2' + +# TODO handle grapheme cluster positions? + +# ExtractedTextSegment('foo', unicodedata.normalize('NFD', 'Schlyñ')) +ExtractedTextSegment('foo', unicodedata.normalize('NFC', 'Schlyñ'))