1
0
Fork 0
mirror of https://github.com/qurator-spk/dinglehopper.git synced 2025-06-20 09:09:59 +02:00

🧹 dinglehopper: Remove warning when there is a non-TextRegion in the ReadingOrder

This commit is contained in:
Gerber, Mike 2020-10-21 17:03:55 +02:00
parent 0f3857d8d3
commit f626a2ebe6
2 changed files with 2 additions and 3 deletions

View file

@ -74,7 +74,7 @@ def page_extract(tree, *, textequiv_level='region'):
if region is not None: if region is not None:
regions.append(ExtractedText.from_text_segment(region, nsmap, textequiv_level=textequiv_level)) regions.append(ExtractedText.from_text_segment(region, nsmap, textequiv_level=textequiv_level))
else: else:
warn('Not a TextRegion: "%s"' % region_id) pass # Not a TextRegion
else: else:
raise NotImplementedError raise NotImplementedError
else: else:

View file

@ -124,7 +124,6 @@ def test_page_order():
def test_page_mixed_regions(): def test_page_mixed_regions():
# This file contains ImageRegions and TextRegions in the ReadingOrder # This file contains ImageRegions and TextRegions in the ReadingOrder
tree = ET.parse(os.path.join(data_dir, 'mixed-regions.page.xml')) tree = ET.parse(os.path.join(data_dir, 'mixed-regions.page.xml'))
with pytest.warns(UserWarning, match=r'Not a TextRegion'):
result = page_text(tree) result = page_text(tree)
assert 'non exaudiam uos. Chriſtiani uero quia orant iuxta' in result assert 'non exaudiam uos. Chriſtiani uero quia orant iuxta' in result