mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-07-01 06:59:54 +02:00
Merge pull request #129 from qurator-spk/resolving_issue_106
fix OCR-D regression
This commit is contained in:
commit
78bfa97c06
3 changed files with 14 additions and 8 deletions
|
@ -198,9 +198,11 @@ def main(
|
|||
light_version=light_version,
|
||||
ignore_page_extraction=ignore_page_extraction,
|
||||
)
|
||||
eynollah.run()
|
||||
#pcgts = eynollah.run()
|
||||
##eynollah.writer.write_pagexml(pcgts)
|
||||
if dir_in:
|
||||
eynollah.run()
|
||||
else:
|
||||
pcgts = eynollah.run()
|
||||
eynollah.writer.write_pagexml(pcgts)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -3091,7 +3091,8 @@ class Eynollah:
|
|||
|
||||
pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml)
|
||||
self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
##return pcgts
|
||||
if not self.dir_in:
|
||||
return pcgts
|
||||
else:
|
||||
contours_only_text_parent_h = None
|
||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||
|
@ -3101,8 +3102,11 @@ class Eynollah:
|
|||
order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables)
|
||||
self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
##return pcgts
|
||||
self.writer.write_pagexml(pcgts)
|
||||
#self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
if not self.dir_in:
|
||||
return pcgts
|
||||
|
||||
if self.dir_in:
|
||||
self.writer.write_pagexml(pcgts)
|
||||
#self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
if self.dir_in:
|
||||
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
||||
|
|
|
@ -42,7 +42,7 @@ class EynollahProcessor(Processor):
|
|||
page = pcgts.get_Page()
|
||||
# XXX loses DPI information
|
||||
# page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
||||
image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename
|
||||
image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(local_filename=page.imageFilename))).local_filename
|
||||
eynollah_kwargs = {
|
||||
'dir_models': self.resolve_resource(self.parameter['models']),
|
||||
'allow_enhancement': False,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue