mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-20 00:59:58 +02:00
🧹 dinglehopper: Remove warning when there is a non-TextRegion in the ReadingOrder
This commit is contained in:
parent
0f3857d8d3
commit
f626a2ebe6
2 changed files with 2 additions and 3 deletions
|
@ -74,7 +74,7 @@ def page_extract(tree, *, textequiv_level='region'):
|
||||||
if region is not None:
|
if region is not None:
|
||||||
regions.append(ExtractedText.from_text_segment(region, nsmap, textequiv_level=textequiv_level))
|
regions.append(ExtractedText.from_text_segment(region, nsmap, textequiv_level=textequiv_level))
|
||||||
else:
|
else:
|
||||||
warn('Not a TextRegion: "%s"' % region_id)
|
pass # Not a TextRegion
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -124,8 +124,7 @@ def test_page_order():
|
||||||
def test_page_mixed_regions():
|
def test_page_mixed_regions():
|
||||||
# This file contains ImageRegions and TextRegions in the ReadingOrder
|
# This file contains ImageRegions and TextRegions in the ReadingOrder
|
||||||
tree = ET.parse(os.path.join(data_dir, 'mixed-regions.page.xml'))
|
tree = ET.parse(os.path.join(data_dir, 'mixed-regions.page.xml'))
|
||||||
with pytest.warns(UserWarning, match=r'Not a TextRegion'):
|
result = page_text(tree)
|
||||||
result = page_text(tree)
|
|
||||||
|
|
||||||
assert 'non exaudiam uos. Chriſtiani uero quia orant iuxta' in result
|
assert 'non exaudiam uos. Chriſtiani uero quia orant iuxta' in result
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue