use lxml, factor PAGE creation to utils.xml
parent
14d80700d4
commit
ff7b5ce409
@ -0,0 +1,34 @@
|
|||||||
|
from lxml import etree as ET
|
||||||
|
|
||||||
|
NAMESPACES = {}
|
||||||
|
NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
|
||||||
|
NAMESPACES['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
NAMESPACES[None] = NAMESPACES['page']
|
||||||
|
|
||||||
|
def create_page_xml(imageFilename, height, width):
|
||||||
|
pcgts = ET.Element("PcGts", nsmap=NAMESPACES)
|
||||||
|
|
||||||
|
pcgts.set("{%s}schemaLocation" % NAMESPACES['xsi'], NAMESPACES['page'])
|
||||||
|
|
||||||
|
metadata = ET.SubElement(pcgts, "Metadata")
|
||||||
|
|
||||||
|
author = ET.SubElement(metadata, "Creator")
|
||||||
|
author.text = "SBB_QURATOR"
|
||||||
|
|
||||||
|
created = ET.SubElement(metadata, "Created")
|
||||||
|
created.text = "2019-06-17T18:15:12"
|
||||||
|
|
||||||
|
changetime = ET.SubElement(metadata, "LastChange")
|
||||||
|
changetime.text = "2019-06-17T18:15:12"
|
||||||
|
|
||||||
|
page = ET.SubElement(pcgts, "Page")
|
||||||
|
|
||||||
|
page.set("imageFilename", imageFilename)
|
||||||
|
page.set("imageHeight", str(height))
|
||||||
|
page.set("imageWidth", str(width))
|
||||||
|
page.set("type", "content")
|
||||||
|
page.set("readingDirection", "left-to-right")
|
||||||
|
page.set("textLineOrder", "top-to-bottom")
|
||||||
|
|
||||||
|
return pcgts, page
|
||||||
|
|
@ -0,0 +1,11 @@
|
|||||||
|
from lxml import etree as ET
|
||||||
|
from sbb_newspapers_org_image.utils.xml import create_page_xml, NAMESPACES
|
||||||
|
|
||||||
|
def tostring(el):
|
||||||
|
return ET.tostring(el).decode('utf-8')
|
||||||
|
|
||||||
|
def test_create_xml():
|
||||||
|
pcgts, page = create_page_xml('/path/to/img.tif', 100, 100)
|
||||||
|
xmlstr = tostring(pcgts)
|
||||||
|
assert 'xmlns="%s"' % NAMESPACES[None] in xmlstr
|
||||||
|
assert 'Metadata' in xmlstr
|
Loading…
Reference in New Issue