From bc05f830881d1dc573e3f91495f8c6a42837c5bc Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 12 Jun 2020 17:04:07 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20dinglehopper:=20Remove=20obsolet?= =?UTF-8?q?e=20XXX=20about=20the=20PAGE=20hierarchy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/ocr_files.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py index 2ceebfd..5ce0bcd 100644 --- a/qurator/dinglehopper/ocr_files.py +++ b/qurator/dinglehopper/ocr_files.py @@ -151,7 +151,6 @@ def page_extract(tree): for region in tree.iterfind('.//page:TextRegion', namespaces=nsmap): regions.append(ExtractedTextSegment.from_text_segment(region, nsmap)) - # XXX Does a file have to have regions etc.? region vs lines etc. # Filter empty region texts regions = [r for r in regions if r.text is not None]