diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a2a2ad0..a422df9 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -133,6 +133,12 @@ from qurator.eynollah.eynollah import Eynollah is_flag=True, help="if this parameter set to true, this tool would ignore page extraction", ) +@click.option( + "--reading_order_machine_based/--heuristic_reading_order", + "-romb/-hro", + is_flag=True, + help="if this parameter set to true, this tool would apply machine based reading order detection", +) @click.option( "--log-level", "-l", @@ -160,6 +166,7 @@ def main( allow_scaling, headers_off, light_version, + reading_order_machine_based, ignore_page_extraction, log_level ): @@ -197,6 +204,7 @@ def main( headers_off=headers_off, light_version=light_version, ignore_page_extraction=ignore_page_extraction, + reading_order_machine_based=reading_order_machine_based, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c008476..5e06734 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -165,6 +165,7 @@ class Eynollah: headers_off=False, light_version=False, ignore_page_extraction=False, + reading_order_machine_based=False, override_dpi=None, logger=None, pcgts=None, @@ -181,6 +182,7 @@ class Eynollah: self.dir_in = dir_in self.dir_of_all = dir_of_all self.dir_save_page = dir_save_page + self.reading_order_machine_based = reading_order_machine_based self.dir_of_deskewed = dir_of_deskewed self.dir_of_deskewed = dir_of_deskewed self.dir_of_cropped_images=dir_of_cropped_images @@ -226,7 +228,7 @@ class Eynollah: self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" - self.model_reading_order_machine_dir = dir_models + "/model_6_reading_order_machine_based" + self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -3139,8 +3141,6 @@ class Eynollah: Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - self.reading_order_machine_based = True#False#True#True t0_tot = time.time()