mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-11 05:09:53 +02:00
allow passing pcgts to eynollah and writer
This commit is contained in:
parent
2bc34891a5
commit
8c4e9b6068
3 changed files with 12 additions and 10 deletions
qurator/eynollah
|
@ -95,6 +95,7 @@ class Eynollah:
|
||||||
headers_off=False,
|
headers_off=False,
|
||||||
override_dpi=None,
|
override_dpi=None,
|
||||||
logger=None,
|
logger=None,
|
||||||
|
pcgts=None,
|
||||||
):
|
):
|
||||||
self.image_filename = image_filename
|
self.image_filename = image_filename
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
|
@ -117,7 +118,8 @@ class Eynollah:
|
||||||
self.writer = EynollahXmlWriter(
|
self.writer = EynollahXmlWriter(
|
||||||
dir_out=self.dir_out,
|
dir_out=self.dir_out,
|
||||||
image_filename=self.image_filename,
|
image_filename=self.image_filename,
|
||||||
curved_line=self.curved_line)
|
curved_line=self.curved_line,
|
||||||
|
pcgts=pcgts)
|
||||||
self.logger = logger if logger else getLogger('eynollah')
|
self.logger = logger if logger else getLogger('eynollah')
|
||||||
self.dir_models = dir_models
|
self.dir_models = dir_models
|
||||||
|
|
||||||
|
|
|
@ -30,10 +30,10 @@ class EynollahProcessor(Processor):
|
||||||
assert_file_grp_cardinality(self.output_file_grp, 1)
|
assert_file_grp_cardinality(self.output_file_grp, 1)
|
||||||
for n, input_file in enumerate(self.input_files):
|
for n, input_file in enumerate(self.input_files):
|
||||||
page_id = input_file.pageId or input_file.ID
|
page_id = input_file.pageId or input_file.ID
|
||||||
LOG.info("INPUT FILE %s / %s ", page_id, len(self.input_files))
|
LOG.info("INPUT FILE %s (%d/%d) ", page_id, n + 1, len(self.input_files))
|
||||||
pcgts_in = page_from_file(self.workspace.download_file(input_file))
|
pcgts = page_from_file(self.workspace.download_file(input_file))
|
||||||
self.add_metadata(pcgts_in)
|
self.add_metadata(pcgts)
|
||||||
page = pcgts_in.get_Page()
|
page = pcgts.get_Page()
|
||||||
page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
||||||
file_id = make_file_id(input_file, self.output_file_grp)
|
file_id = make_file_id(input_file, self.output_file_grp)
|
||||||
with NamedTemporaryFile(buffering=0, suffix='.tif') as f:
|
with NamedTemporaryFile(buffering=0, suffix='.tif') as f:
|
||||||
|
@ -47,13 +47,13 @@ class EynollahProcessor(Processor):
|
||||||
'headers_off': self.parameter['headers_off'],
|
'headers_off': self.parameter['headers_off'],
|
||||||
'override_dpi': self.parameter['dpi'] if self.parameter['dpi'] > 0 else None,
|
'override_dpi': self.parameter['dpi'] if self.parameter['dpi'] > 0 else None,
|
||||||
'logger': LOG,
|
'logger': LOG,
|
||||||
|
'pcgts': pcgts,
|
||||||
'image_filename': f.name}
|
'image_filename': f.name}
|
||||||
pcgts_out = Eynollah(**eynollah_kwargs).run()
|
Eynollah(**eynollah_kwargs).run()
|
||||||
pcgts_out.get_Page().imageFilename = pcgts_in.get_Page().imageFilename
|
|
||||||
self.workspace.add_file(
|
self.workspace.add_file(
|
||||||
ID=file_id,
|
ID=file_id,
|
||||||
file_grp=self.output_file_grp,
|
file_grp=self.output_file_grp,
|
||||||
pageId=page_id,
|
pageId=page_id,
|
||||||
mimetype=MIMETYPE_PAGE,
|
mimetype=MIMETYPE_PAGE,
|
||||||
local_filename=join(self.output_file_grp, file_id) + '.xml',
|
local_filename=join(self.output_file_grp, file_id) + '.xml',
|
||||||
content=to_xml(pcgts_out))
|
content=to_xml(pcgts))
|
||||||
|
|
|
@ -141,7 +141,7 @@ class EynollahXmlWriter():
|
||||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||||
|
|
||||||
# create the file structure
|
# create the file structure
|
||||||
pcgts = create_page_xml(self.image_filename, self.height_org, self.width_org)
|
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||||
page = pcgts.get_Page()
|
page = pcgts.get_Page()
|
||||||
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
||||||
|
|
||||||
|
@ -181,7 +181,7 @@ class EynollahXmlWriter():
|
||||||
self.logger.debug('enter build_pagexml_full_layout')
|
self.logger.debug('enter build_pagexml_full_layout')
|
||||||
|
|
||||||
# create the file structure
|
# create the file structure
|
||||||
pcgts = create_page_xml(self.image_filename, self.height_org, self.width_org)
|
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||||
page = pcgts.get_Page()
|
page = pcgts.get_Page()
|
||||||
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue