diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a2a2ad0..822db18 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -198,9 +198,11 @@ def main( light_version=light_version, ignore_page_extraction=ignore_page_extraction, ) - eynollah.run() - #pcgts = eynollah.run() - ##eynollah.writer.write_pagexml(pcgts) + if dir_in: + eynollah.run() + else: + pcgts = eynollah.run() + eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c162af7..7f5561c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3091,7 +3091,8 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts else: contours_only_text_parent_h = None if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3101,8 +3102,11 @@ class Eynollah: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts - self.writer.write_pagexml(pcgts) - #self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + + if self.dir_in: + self.writer.write_pagexml(pcgts) + #self.logger.info("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/processor.py b/qurator/eynollah/processor.py index ccec456..1bd190e 100644 --- a/qurator/eynollah/processor.py +++ b/qurator/eynollah/processor.py @@ -42,7 +42,7 @@ class EynollahProcessor(Processor): page = pcgts.get_Page() # XXX loses DPI information # page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename + image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(local_filename=page.imageFilename))).local_filename eynollah_kwargs = { 'dir_models': self.resolve_resource(self.parameter['models']), 'allow_enhancement': False,