From f3aafb6fdfc86a35315e5ededdac69e01d6ac8b4 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 8 Oct 2020 12:20:27 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20dinglehopper:=20Validate=20Extracte?= =?UTF-8?q?dText.{segments,=5Ftext}=20in=20both=20directions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/ocr_files.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py index 5824dda..6cda10c 100644 --- a/qurator/dinglehopper/ocr_files.py +++ b/qurator/dinglehopper/ocr_files.py @@ -41,7 +41,6 @@ class ExtractedText: # An object contains either # a. _text itself # b. or segments (ExtractedText) and a joiner - # TODO validator segments = attr.ib(type=Optional[list], converter=attr.converters.optional(list)) joiner = attr.ib(type=Optional[str]) @@ -54,6 +53,8 @@ class ExtractedText: @_text.validator def check(self, _, value): + if value is not None and self.segments is not None: + raise ValueError("Can't have both segments and text") if value is not None and unicodedata.normalize('NFC', value) != value: raise ValueError('String "{}" is not in NFC.'.format(value)) if value is not None and normalize(value, self.normalization) != value: