replace lxml with OCR-D/core PAGE API

This commit is contained in:
Konstantin Baierer 2021-04-12 13:25:29 +02:00
parent 7a859ffae4
commit 416a84e542
4 changed files with 125 additions and 132 deletions

View file

@ -1,11 +1,14 @@
from lxml import etree as ET
from qurator.eynollah.utils.xml import create_page_xml, NAMESPACES
from pytest import main
from qurator.eynollah.utils.xml import create_page_xml
from ocrd_models.ocrd_page import to_xml
def tostring(el):
return ET.tostring(el).decode('utf-8')
PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'
def test_create_xml():
pcgts, page = create_page_xml('/path/to/img.tif', 100, 100)
xmlstr = tostring(pcgts)
assert 'xmlns="%s"' % NAMESPACES[None] in xmlstr
pcgts = create_page_xml('/path/to/img.tif', 100, 100)
xmlstr = to_xml(pcgts)
assert 'xmlns:pc="%s"' % PAGE_2019 in xmlstr
assert 'Metadata' in xmlstr
if __name__ == '__main__':
main([__file__])