From f626a2ebe65a1bb39c68a3dcbd0e4cdc771ab59c Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 21 Oct 2020 17:03:55 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20dinglehopper:=20Remove=20warning?= =?UTF-8?q?=20when=20there=20is=20a=20non-TextRegion=20in=20the=20ReadingO?= =?UTF-8?q?rder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/ocr_files.py | 2 +- qurator/dinglehopper/tests/test_ocr_files.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py index 9a9f058..9cb2475 100644 --- a/qurator/dinglehopper/ocr_files.py +++ b/qurator/dinglehopper/ocr_files.py @@ -74,7 +74,7 @@ def page_extract(tree, *, textequiv_level='region'): if region is not None: regions.append(ExtractedText.from_text_segment(region, nsmap, textequiv_level=textequiv_level)) else: - warn('Not a TextRegion: "%s"' % region_id) + pass # Not a TextRegion else: raise NotImplementedError else: diff --git a/qurator/dinglehopper/tests/test_ocr_files.py b/qurator/dinglehopper/tests/test_ocr_files.py index dd0a1fa..6848fa1 100644 --- a/qurator/dinglehopper/tests/test_ocr_files.py +++ b/qurator/dinglehopper/tests/test_ocr_files.py @@ -124,8 +124,7 @@ def test_page_order(): def test_page_mixed_regions(): # This file contains ImageRegions and TextRegions in the ReadingOrder tree = ET.parse(os.path.join(data_dir, 'mixed-regions.page.xml')) - with pytest.warns(UserWarning, match=r'Not a TextRegion'): - result = page_text(tree) + result = page_text(tree) assert 'non exaudiam uos. Chriſtiani uero quia orant iuxta' in result