From f3aafb6fdfc86a35315e5ededdac69e01d6ac8b4 Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Thu, 8 Oct 2020 12:20:27 +0200
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20dinglehopper:=20Validate=20Extracte?=
 =?UTF-8?q?dText.{segments,=5Ftext}=20in=20both=20directions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 qurator/dinglehopper/ocr_files.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py
index 5824dda..6cda10c 100644
--- a/qurator/dinglehopper/ocr_files.py
+++ b/qurator/dinglehopper/ocr_files.py
@@ -41,7 +41,6 @@ class ExtractedText:
     # An object contains either
     # a. _text itself
     # b. or segments (ExtractedText) and a joiner
-    # TODO validator
 
     segments = attr.ib(type=Optional[list], converter=attr.converters.optional(list))
     joiner = attr.ib(type=Optional[str])
@@ -54,6 +53,8 @@ class ExtractedText:
 
     @_text.validator
     def check(self, _, value):
+        if value is not None and self.segments is not None:
+            raise ValueError("Can't have both segments and text")
         if value is not None and unicodedata.normalize('NFC', value) != value:
             raise ValueError('String "{}" is not in NFC.'.format(value))
         if value is not None and normalize(value, self.normalization) != value: