From 960b11f51f98518feaa5b1989a71bc368e6c9fa4 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 24 Sep 2025 22:58:57 +0200 Subject: [PATCH 01/15] machine-based-reading-order CLI: no foreign logger, add --log-level --- src/eynollah/cli.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 1170465..420373a 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -37,14 +37,22 @@ def main(): type=click.Path(exists=True, file_okay=False), required=True, ) +@click.option( + "--log_level", + "-l", + type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), + help="Override log level globally to this", +) + +def machine_based_reading_order(dir_xml, xml_file, dir_out, model, log_level): + orderer = machine_based_reading_order_on_layout(model, dir_out=dir_out) + if log_level: + orderer.logger.setLevel(getLevelName(log_level)) -def machine_based_reading_order(dir_xml, xml_file, dir_out, model): - raedingorder_object = machine_based_reading_order_on_layout(model, dir_out=dir_out, logger=getLogger('enhancement')) - if dir_xml: - raedingorder_object.run(dir_in=dir_xml) + orderer.run(dir_in=dir_xml) else: - raedingorder_object.run(xml_filename=xml_file) + orderer.run(xml_filename=xml_file) @main.command() From 8a1e5a895057aac0d0dd878e58c0ce3e70c891fe Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 24 Sep 2025 23:03:11 +0200 Subject: [PATCH 02/15] enhancement / layout CLI: do not override logger name --- src/eynollah/cli.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 420373a..ab157d1 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -137,21 +137,20 @@ def binarization(patches, model_dir, input_image, dir_in, output): def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level): initLogging() - if log_level: - getLogger('enhancement').setLevel(getLevelName(log_level)) assert image or dir_in, "Either a single image -i or a dir_in -di is required" - enhancer_object = Enhancer( + enhancer = Enhancer( model, - logger=getLogger('enhancement'), dir_out=out, num_col_upper=num_col_upper, num_col_lower=num_col_lower, save_org_scale=save_org_scale, ) + if log_level: + enhancer.logger.setLevel(getLevelName(log_level)) if dir_in: - enhancer_object.run(dir_in=dir_in, overwrite=overwrite) + enhancer.run(dir_in=dir_in, overwrite=overwrite) else: - enhancer_object.run(image_filename=image, overwrite=overwrite) + enhancer.run(image_filename=image, overwrite=overwrite) @main.command() @click.option( @@ -368,8 +367,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ getLogger('eynollah').setLevel(logging.INFO) else: initLogging() - if log_level: - getLogger('eynollah').setLevel(getLevelName(log_level)) assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep" assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep" assert enable_plotting or not save_all, "Plotting with -sa also requires -ep" @@ -420,6 +417,8 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ threshold_art_class_textline=threshold_art_class_textline, threshold_art_class_layout=threshold_art_class_layout, ) + if log_level: + eynollah.logger.setLevel(getLevelName(log_level)) if dir_in: eynollah.run(dir_in=dir_in, overwrite=overwrite) else: @@ -529,8 +528,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): initLogging() - if log_level: - getLogger('eynollah').setLevel(getLevelName(log_level)) assert not model or not model_name, "model directory -m can not be set alongside specific model name --model_name" assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr" @@ -557,6 +554,8 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, pref_of_dataset=dataset_abbrevation, min_conf_value_of_textline_text=min_conf_value_of_textline_text, ) + if log_level: + eynollah_ocr.logger.setLevel(getLevelName(log_level)) eynollah_ocr.run(overwrite=overwrite) if __name__ == "__main__": From 93f7588bfa3787679fd5bb843176ea453c303f44 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 24 Sep 2025 23:08:50 +0200 Subject: [PATCH 03/15] binarizer CLI: add --log-level --- src/eynollah/cli.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index ab157d1..19beab5 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -71,10 +71,18 @@ def machine_based_reading_order(dir_xml, xml_file, dir_out, model, log_level): help="output image (if using -i) or output image directory (if using -di)", type=click.Path(file_okay=True, dir_okay=True), ) -def binarization(patches, model_dir, input_image, dir_in, output): +@click.option( + "--log_level", + "-l", + type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), + help="Override log level globally to this", +) +def binarization(patches, model_dir, input_image, dir_in, output, log_level): assert (dir_in is None) != (input_image is None), "Specify either -di and or -i not both" - SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in) - + binarizer = SbbBinarizer(model_dir) + if log_level: + binarizer.log.setLevel(getLevelName(log_level)) + binarizer.run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in) @main.command() From 96a0d22496eca2497abac64dcb931d9d45d3394c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 24 Sep 2025 23:52:35 +0200 Subject: [PATCH 04/15] mbreorder CLI: change options to mimic other commands --- src/eynollah/cli.py | 49 +++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 19beab5..71958df 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -13,20 +13,20 @@ def main(): @main.command() @click.option( - "--dir_xml", - "-dx", - help="directory of page-xml files", + "--dir_in", + "-di", + help="directory of PAGE-XML input files", type=click.Path(exists=True, file_okay=False), ) @click.option( - "--xml_file", - "-xml", - help="xml filename", + "--input", + "-i", + help="PAGE-XML input filename", type=click.Path(exists=True, dir_okay=False), ) @click.option( - "--dir_out", - "-do", + "--out", + "-o", help="directory for output images", type=click.Path(exists=True, file_okay=False), ) @@ -44,21 +44,26 @@ def main(): help="Override log level globally to this", ) -def machine_based_reading_order(dir_xml, xml_file, dir_out, model, log_level): - orderer = machine_based_reading_order_on_layout(model, dir_out=dir_out) +def machine_based_reading_order(dir_in, input, out, model, log_level): + orderer = machine_based_reading_order_on_layout(model, dir_out=out) if log_level: orderer.logger.setLevel(getLevelName(log_level)) - if dir_xml: - orderer.run(dir_in=dir_xml) + if dir_in: + orderer.run(dir_in=dir_in) else: - orderer.run(xml_filename=xml_file) + orderer.run(xml_filename=input) @main.command() @click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') @click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') -@click.option("--input-image", "-i", help="input image", type=click.Path(exists=True, dir_okay=False)) +@click.option( + "--input-image", "--image", + "-i", + help="input image filename", + type=click.Path(exists=True, dir_okay=False) +) @click.option( "--dir_in", "-di", @@ -89,14 +94,14 @@ def binarization(patches, model_dir, input_image, dir_in, output, log_level): @click.option( "--image", "-i", - help="image filename", + help="input image filename", type=click.Path(exists=True, dir_okay=False), ) @click.option( "--out", "-o", - help="directory to write output xml data", + help="directory for output PAGE-XML files", type=click.Path(exists=True, file_okay=False), required=True, ) @@ -109,7 +114,7 @@ def binarization(patches, model_dir, input_image, dir_in, output, log_level): @click.option( "--dir_in", "-di", - help="directory of images", + help="directory of input images", type=click.Path(exists=True, file_okay=False), ) @click.option( @@ -164,14 +169,14 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low @click.option( "--image", "-i", - help="image filename", + help="input image filename", type=click.Path(exists=True, dir_okay=False), ) @click.option( "--out", "-o", - help="directory to write output xml data", + help="directory for output PAGE-XML files", type=click.Path(exists=True, file_okay=False), required=True, ) @@ -184,7 +189,7 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low @click.option( "--dir_in", "-di", - help="directory of images", + help="directory of input images", type=click.Path(exists=True, file_okay=False), ) @click.option( @@ -437,7 +442,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ @click.option( "--image", "-i", - help="image filename", + help="input image filename", type=click.Path(exists=True, dir_okay=False), ) @click.option( @@ -449,7 +454,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ @click.option( "--dir_in", "-di", - help="directory of images", + help="directory of input images", type=click.Path(exists=True, file_okay=False), ) @click.option( From d6cdb69acbd1770c080ede18f52ed05c608a3693 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 00:11:23 +0200 Subject: [PATCH 05/15] binarize/enhance/layout/ocr ls_imgs: use the same file name suffix filter for dir-in mode --- src/eynollah/eynollah.py | 11 +++++------ src/eynollah/image_enhancer.py | 7 ++++--- src/eynollah/sbb_binarize.py | 3 ++- src/eynollah/utils/__init__.py | 8 ++++++++ 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 27277ee..9071f7a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -107,6 +107,7 @@ from .utils.drop_capitals import ( from .utils.marginals import get_marginals from .utils.resize import resize_image from .utils import ( + is_image_filename, boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, find_num_col, @@ -4547,14 +4548,13 @@ class Eynollah: self.logger.info("Enabled modes: " + ", ".join(enabled_modes)) if dir_in: - self.ls_imgs = os.listdir(dir_in) - self.ls_imgs = [ind_img for ind_img in self.ls_imgs if ind_img.endswith('.jpg') or ind_img.endswith('.jpeg') or ind_img.endswith('.png') or ind_img.endswith('.tif') or ind_img.endswith('.tiff') or ind_img.endswith('.JPG') or ind_img.endswith('.JPEG') or ind_img.endswith('.TIF') or ind_img.endswith('.TIFF') or ind_img.endswith('.PNG')] + ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in))) elif image_filename: - self.ls_imgs = [image_filename] + ls_imgs = [image_filename] else: raise ValueError("run requires either a single image filename or a directory") - for img_filename in self.ls_imgs: + for img_filename in ls_imgs: self.logger.info(img_filename) t0 = time.time() @@ -5394,8 +5394,7 @@ class Eynollah_ocr: def run(self, overwrite : bool = False): if self.dir_in: - ls_imgs = os.listdir(self.dir_in) - ls_imgs = [ind_img for ind_img in ls_imgs if ind_img.endswith('.jpg') or ind_img.endswith('.jpeg') or ind_img.endswith('.png') or ind_img.endswith('.tif') or ind_img.endswith('.tiff') or ind_img.endswith('.JPG') or ind_img.endswith('.JPEG') or ind_img.endswith('.TIF') or ind_img.endswith('.TIFF') or ind_img.endswith('.PNG')] + ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in))) else: ls_imgs = [self.image_filename] diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py index f577e52..5a06d59 100644 --- a/src/eynollah/image_enhancer.py +++ b/src/eynollah/image_enhancer.py @@ -21,6 +21,7 @@ from tensorflow.keras.models import load_model from .utils.resize import resize_image from .utils.pil_cv2 import pil2cv from .utils import ( + is_image_filename, crop_image_inside_box ) @@ -701,13 +702,13 @@ class Enhancer: t0_tot = time.time() if dir_in: - self.ls_imgs = os.listdir(dir_in) + ls_imgs = list(filter(is_image_filename, os.listdir(dir_in))) elif image_filename: - self.ls_imgs = [image_filename] + ls_imgs = [image_filename] else: raise ValueError("run requires either a single image filename or a directory") - for img_filename in self.ls_imgs: + for img_filename in ls_imgs: self.logger.info(img_filename) t0 = time.time() diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index 2d5035f..3716987 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -16,6 +16,7 @@ import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow.python.keras import backend as tensorflow_backend +from .utils import is_image_filename def resize_image(img_in, input_height, input_width): return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) @@ -347,7 +348,7 @@ class SbbBinarizer: cv2.imwrite(output, img_last) return img_last else: - ls_imgs = os.listdir(dir_in) + ls_imgs = list(filter(is_image_filename, os.listdir(dir_in))) for image_name in ls_imgs: image_stem = image_name.split('.')[0] print(image_name,'image_name') diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index ca86047..c154fe4 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -2194,3 +2194,11 @@ def return_boxes_of_images_by_order_of_reading_new( return boxes, peaks_neg_tot_tables_new else: return boxes, peaks_neg_tot_tables + +def is_image_filename(fname: str) -> bool: + return fname.lower().endswith(('.jpg', + '.jpeg', + '.png', + '.tif', + '.tiff', + )) From b094a6b77feb4e86f1ae07c1a5c96e5e88068523 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 00:51:45 +0200 Subject: [PATCH 06/15] mbreorder: avoid spaces in logger name --- src/eynollah/mb_ro_on_layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index c6c02cf..70f1402 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -39,7 +39,7 @@ class machine_based_reading_order_on_layout: ): self.dir_out = dir_out - self.logger = logger if logger else getLogger('mbro on layout') + self.logger = logger if logger else getLogger('mbreorder') # for parallelization of CPU-intensive tasks: self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) atexit.register(self.executor.shutdown) From 9967510327d33a49aa619ceba7a36f414fdc09e7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 00:52:16 +0200 Subject: [PATCH 07/15] mbreorder: filter by .xml suffix in dir-in mode --- src/eynollah/mb_ro_on_layout.py | 7 ++++--- src/eynollah/utils/__init__.py | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index 70f1402..6d72614 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -25,6 +25,7 @@ from .utils.contour import ( return_contours_of_image, return_parent_contours, ) +from .utils import is_xml_filename DPI_THRESHOLD = 298 KERNEL = np.ones((5, 5), np.uint8) @@ -751,13 +752,13 @@ class machine_based_reading_order_on_layout: t0_tot = time.time() if dir_in: - self.ls_xmls = os.listdir(dir_in) + ls_xmls = list(filter(is_xml_filename, os.listdir(dir_in))) elif xml_filename: - self.ls_xmls = [xml_filename] + ls_xmls = [xml_filename] else: raise ValueError("run requires either a single image filename or a directory") - for xml_filename in self.ls_xmls: + for xml_filename in ls_xmls: self.logger.info(xml_filename) t0 = time.time() diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index c154fe4..6eeabd0 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -2202,3 +2202,6 @@ def is_image_filename(fname: str) -> bool: '.tif', '.tiff', )) + +def is_xml_filename(fname: str) -> bool: + return fname.lower().endswith('.xml') From f07df080f08d93564eafa96c9d6299f181857fbe Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 01:13:48 +0200 Subject: [PATCH 08/15] add tests for enhancement and mbreorder --- .../euler_rechenkunst01_1738_0025.xml | 1626 +++++++++++++ .../resources/kant_aufklaerung_1784_0020.xml | 2129 +++++++++++++++++ tests/test_run.py | 129 +- 3 files changed, 3875 insertions(+), 9 deletions(-) create mode 100644 tests/resources/euler_rechenkunst01_1738_0025.xml create mode 100644 tests/resources/kant_aufklaerung_1784_0020.xml diff --git a/tests/resources/euler_rechenkunst01_1738_0025.xml b/tests/resources/euler_rechenkunst01_1738_0025.xml new file mode 100644 index 0000000..1a92f73 --- /dev/null +++ b/tests/resources/euler_rechenkunst01_1738_0025.xml @@ -0,0 +1,1626 @@ + + + OCR-D + 2016-09-29T14:32:09 + 2018-04-25T08:56:33 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 9 + + + 9 + + + 9 + + + + + + + + + der + + + + + rechten + + + + + gegen + + + + + der + + + + + lincken + + + + + Hand + + + + + bedeutet + + + der rechten gegen der lincken Hand bedeutet + + + + + + + + wie + + + + + folget: + + + wie folget: + + + der rechten gegen der lincken Hand bedeutet +wie folget: + + + + + + + + + I. + + + I. + + + I. + + + + + + + + + 0 + + + + + - + + + + + nichts + + + 0 - nichts + + + + + + + + 1 + + + + + - + + + + + eins + + + 1 - eins + + + + + + + + 2 + + + + + - + + + + + zwey + + + 2 - zwey + + + + + + + + 3 + + + + + - + + + + + drey + + + 3 - drey + + + + + + + + 4 + + + + + - + + + + + vier + + + 4 - vier + + + 0 - nichts +1 - eins +2 - zwey +3 - drey +4 - vier + + + + + + + + + 5 + + + + + - + + + + + fuͤnf + + + 5 - fuͤnf + + + + + + + + 6 + + + + + - + + + + + ſechs + + + 6 - ſechs + + + + + + + 7 + + + + + - + + + + + ſieben + + + 7 - ſieben + + + + + + + + 8 + + + + + - + + + + + acht + + + 8 - acht + + + + + + + + 9 + + + + + - + + + + + neun + + + 9 - neun + + + 5 - fuͤnf +6 - ſechs +7 - ſieben +8 - acht +9 - neun + + + + + + + + + Auf + + + + + der + + + + + zweyten + + + + + Stelle + + + + + aber + + + + + bedeutet. + + + Auf der zweyten Stelle aber bedeutet. + + + Auf der zweyten Stelle aber bedeutet. + + + + + + + + + II. + + + II. + + + II. + + + + + + + + + 0 + + + + + - + + + + + nichts + + + 0 - nichts + + + + + + + + 1 + + + + + - + + + + + zehen + + + 1 - zehen + + + + + + + + 2 + + + + + - + + + + + zwanzig + + + 2 - zwanzig + + + + + + + 3 + + + + + - + + + + + dreyßig + + + 3 - dreyßig + + + + + + + 4 + + + + + - + + + + + vierzig + + + 4 - vierzig + + 0 - nichts +1 - zehen +2 - zwanzig +3 - dreyßig +4 - vierzig + + + + + + + + + 5 + + + + + - + + + + + fuͤnfzig + + + 5 - fuͤnfzig + + + + + + + + 6 + + + + + - + + + + + ſechzig + + + 6 - ſechzig + + + + + + + 7 + + + + + - + + + + + ſiebenzig + + + 7 - ſiebenzig + + + + + + + 8 + + + + + - + + + + + achtzig + + + 8 - achtzig + + + + + + + 9 + + + + + - + + + + + neunzig + + + 9 - neunzig + + 5 - fuͤnfzig +6 - ſechzig +7 - ſiebenzig +8 - achtzig +9 - neunzig + + + + + + + + + Auf + + + + + der + + + + + dritten + + + + + Stelle + + + + + bedeutet. + + + Auf der dritten Stelle bedeutet. + + + Auf der dritten Stelle bedeutet. + + + + + + + + + III. + + + III. + + + III. + + + + + + + + + 0 + + + + + - + + + + + nichts + + + 0 - nichts + + + + + + + + 1 + + + + + - + + + + + hundert + + + 1 - hundert + + + + + + + + 2 + + + + + - + + + + + zwey + + + + + hundert + + + 2 - zwey hundert + + + + + + + + 3 + + + + + - + + + + + drey + + + + + hundert + + + 3 - drey hundert + + + + + + + + 4 + + + + + - + + + + + vier + + + + + hundert + + + 4 - vier hundert + + + 0 - nichts +1 - hundert +2 - zwey hundert +3 - drey hundert +4 - vier hundert + + + + + + + + + 5 + + + + + - + + + + + fuͤnf + + + + + hundert + + + 5 - fuͤnf hundert + + + + + + + + 6 + + + + + - + + + + + ſechs + + + + + hundert + + + 6 - ſechs hundert + + + + + + + 7 + + + + + - + + + + + ſieben + + + + + hundert + + + 7 - ſieben hundert + + + + + + + + 8 + + + + + - + + + + + acht + + + + + hundert + + + 8 - acht hundert + + + + + + + 9 + + + + + - + + + + + neun + + + + + hundert + + + 9 - neun hundert + + + 5 - fuͤnf hundert +6 - ſechs hundert +7 - ſieben hundert +8 - acht hundert +9 - neun hundert + + + + + + + + + Auf + + + + + der + + + + + vierten + + + + + Stelle + + + + + bedeutet. + + + Auf der vierten Stelle bedeutet. + + + Auf der vierten Stelle bedeutet. + + + + + + + + + IV. + + + IV. + + + IV. + + + + + + + + + 0 + + + + + - + + + + + nichts + + + 0 - nichts + + + + + + + + 1 + + + + + - + + + + + tauſend + + + 1 - tauſend + + + + + + + + 2 + + + + + - + + + + + zwey + + + + + tauſend + + + 2 - zwey tauſend + + + + + + + + 3 + + + + + - + + + + + drey + + + + + tauſend + + + 3 - drey tauſend + + + + + + + + 4 + + + + + - + + + + + vier + + + + + tauſend + + + 4 - vier tauſend + + + 0 - nichts +1 - tauſend +2 - zwey tauſend +3 - drey tauſend +4 - vier tauſend + + + + + + + + + 5 + + + + + - + + + + + fuͤnf + + + + + tauſend + + + 5 - fuͤnf tauſend + + + + + + + + 6 + + + + + - + + + + + ſechs + + + + + tauſend + + + 6 - ſechs tauſend + + + + + + + 7 + + + + + - + + + + + ſieben + + + + + tauſend + + + 7 - ſieben tauſend + + + + + + + + 8 + + + + + - + + + + + acht + + + + + tauſend + + + 8 - acht tauſend + + + + + + + 9 + + + + + - + + + + + neun + + + + + tauſend + + + 9 - neun tauſend + + 5 - fuͤnf tauſend +6 - ſechs tauſend +7 - ſieben tauſend +8 - acht tauſend +9 - neun tauſend + + + + + + + + + Auf + + + + + der + + + + + fuͤnften + + + + + Stelle + + + + + bedeutet. + + + Auf der fuͤnften Stelle bedeutet. + + + Auf der fuͤnften Stelle bedeutet. + + + + + + + + + V. + + + V. + + + V. + + + + + + + + + 0 + + + + + - + + + + + nichts + + + 0 - nichts + + + + + + + + 1 + + + + + - + + + + + zehen + + + + + tauſend + + + 1 - zehen tauſend + + + + + + + + 2 + + + + + - + + + + + zwanzig + + + + + tauſend + + + 2 - zwanzig tauſend + + + + + + + + 3 + + + + + - + + + + + dreyßig + + + + + tauſend + + + 3 - dreyßig tauſend + + + + + + + + 4 + + + + + - + + + + + vierzig + + + + + tauſend + + + 4 - vierzig tauſend + + + 0 - nichts +1 - zehen tauſend +2 - zwanzig tauſend +3 - dreyßig tauſend +4 - vierzig tauſend + + + + + + + + + 5 + + + + + - + + + + + fuͤnfzig + + + + + tauſend + + + 5 - fuͤnfzig tauſend + + + + + + + + 6 + + + + + - + + + + + ſechzig + + + + + tauſend + + + 6 - ſechzig tauſend + + + + + + + 7 + + + + + - + + + + + ſiebenzig + + + + + tauſend + + + 7 - ſiebenzig tauſend + + + + + + + + 8 + + + + + - + + + + + achtzig + + + + + tauſend + + + 8 - achtzig tauſend + + + + + + + 9 + + + + + - + + + + + neunzig + + + + + tauſend + + + 9 - neunzig tauſend + + + 5 - fuͤnfzig tauſend +6 - ſechzig tauſend +7 - ſiebenzig tauſend +8 - achtzig tauſend +9 - neunzig tauſend + + + + + + + + A + + + + + 5 + + + A 5 + + A 5 + + + + + + + + + Anf + + + Anf + + Anf + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/resources/kant_aufklaerung_1784_0020.xml b/tests/resources/kant_aufklaerung_1784_0020.xml new file mode 100644 index 0000000..47484cd --- /dev/null +++ b/tests/resources/kant_aufklaerung_1784_0020.xml @@ -0,0 +1,2129 @@ + + + OCR-D + 2016-09-20T11:09:27.431+02:00 + 2018-04-24T17:44:49.605+01:00 + + + + + + + + + + + + + + + + + + + + + + + ( + + + + + + + 484 + + + + + + + ) + + + + + ( 484 ) + + + + ( 484 ) + + + + + + + + + + + gewiegelt + + + + + + + worden + + + + + + + ; + + + + + + + ſo + + + + + + + ſchaͤdlich + + + + + + + iſt + + + + + + + es + + + + + + + Vorurtheile + + + + + + + zu + + + + + gewiegelt worden; ſo ſchaͤdlich iſt es Vorurtheile zu + + + + + + + + + + pflanzen + + + + + + + , + + + + + + + weil + + + + + + + ſie + + + + + + + ſich + + + + + + + zuletzt + + + + + + + an + + + + + + + denen + + + + + + + ſelbſt + + + + + + + raͤchen + + + + + + + , + + + + + pflanzen, weil ſie ſich zuletzt an denen ſelbſt raͤchen, + + + + + + + + + + die + + + + + + + , + + + + + + + oder + + + + + + + deren + + + + + + + Vorgaͤnger + + + + + + + , + + + + + + + ihre + + + + + + + Urheber + + + + + + + geweſen + + + + + die, oder deren Vorgaͤnger, ihre Urheber geweſen + + + + + + + + + + ſind + + + + + + + . + + + + + + + Daher + + + + + + + kann + + + + + + + ein + + + + + + + Publikum + + + + + + + nur + + + + + + + langſam + + + + + + + zur + + + + + ſind. Daher kann ein Publikum nur langſam zur + + + + + + + + + + Aufklaͤrung + + + + + + + gelangen + + + + + + + . + + + + + + + Durch + + + + + + + eine + + + + + + + Revolution + + + + + + + wird + + + + + Aufklaͤrung gelangen. Durch eine Revolution wird + + + + + + + + + + vielleicht + + + + + + + wohl + + + + + + + ein + + + + + + + Abfall + + + + + + + von + + + + + + + perſoͤnlichem + + + + + + + Despo- + + + + + vielleicht wohl ein Abfall von perſoͤnlichem Despo- + + + + + + + + + + tism + + + + + + + und + + + + + + + gewinnſuͤchtiger + + + + + + + oder + + + + + + + herrſchſüchtiger + + + + + + + Be + + + + + + + - + + + + + tism und gewinnſuͤchtiger oder herrſchſüchtiger Be- + + + + + + + + + + druͤkkung + + + + + + + , + + + + + + + aber + + + + + + + niemals + + + + + + + wahre + + + + + + + Reform + + + + + + + der + + + + + + + Den + + + + + + + - + + + + + druͤkkung, aber niemals wahre Reform der Den- + + + + + + + + + + kungsart + + + + + + + zu + + + + + + + Stande + + + + + + + kommen + + + + + + + ; + + + + + + + ſondern + + + + + + + neue + + + + + + + Vor + + + + + + + - + + + + + kungsart zu Stande kommen; ſondern neue Vor- + + + + + + + + + + urtheile + + + + + + + werden + + + + + + + , + + + + + + + eben + + + + + + + ſowohl + + + + + + + als + + + + + + + die + + + + + + + alten + + + + + + + , + + + + + + + zum + + + + + urtheile werden, eben ſowohl als die alten, zum + + + + + + + + + + Leitbande + + + + + + + des + + + + + + + gedankenloſen + + + + + + + großen + + + + + + + Haufens + + + + + Leitbande des gedankenloſen großen Haufens + + + + + + + + + + dienen + + + + + + + . + + + + + dienen. + + + + gewiegelt worden; ſo ſchaͤdlich iſt es Vorurtheile zu +pflanzen, weil ſie ſich zuletzt an denen ſelbſt raͤchen, +die, oder deren Vorgaͤnger, ihre Urheber geweſen +ſind. Daher kann ein Publikum nur langſam zur +Aufklaͤrung gelangen. Durch eine Revolution wird +vielleicht wohl ein Abfall von perſoͤnlichem Despo- +tism und gewinnſuͤchtiger oder herrſchſüchtiger Be- +druͤkkung, aber niemals wahre Reform der Den- +kungsart zu Stande kommen; ſondern neue Vor- +urtheile werden, eben ſowohl als die alten, zum +Leitbande des gedankenloſen großen Haufens +dienen. + + + + + + + + + + + Zu + + + + + + + dieſer + + + + + + + Aufklaͤrung + + + + + + + aber + + + + + + + wird + + + + + + + nichts + + + + + + + erfordert + + + + + Zu dieſer Aufklaͤrung aber wird nichts erfordert + + + + + + + + + + als + + + + + + + Freiheit + + + + + + + ; + + + + + + + und + + + + + + + zwar + + + + + + + die + + + + + + + unſchaͤdlichſte + + + + + + + unter + + + + + als Freiheit; und zwar die unſchaͤdlichſte unter + + + + + + + + + allem + + + + + + + , + + + + + + + was + + + + + + + nur + + + + + + + Freiheit + + + + + + + heißen + + + + + + + mag + + + + + + + , + + + + + + + naͤmlich + + + + + + + die + + + + + + + : + + + + + allem, was nur Freiheit heißen mag, naͤmlich die: + + + + + + + + + + von + + + + + + + ſeiner + + + + + + + Vernunft + + + + + + + in + + + + + + + allen + + + + + + + Stuͤkken + + + + + + + oͤffentlichen + + + + + von ſeiner Vernunft in allen Stuͤkken oͤffentlichen + + + + + + + + + + Gebrauch + + + + + + + zu + + + + + + + machen + + + + + + + . + + + + + + + Nun + + + + + + + hoͤre + + + + + + + ich + + + + + + + aber + + + + + + + von + + + + + + + al + + + + + + + - + + + + + Gebrauch zu machen. Nun hoͤre ich aber von al- + + + + + + + + + + len + + + + + + + Seiten + + + + + + + rufen + + + + + + + : + + + + + + + raͤſonnirt + + + + + + + nicht + + + + + + + ! + + + + + + + Der + + + + + + + Offi + + + + + + + - + + + + + len Seiten rufen: raͤſonnirt nicht! Der Offi- + + + + + + + + + + zier + + + + + + + ſagt + + + + + + + : + + + + + + + raͤſonnirt + + + + + + + nicht + + + + + + + , + + + + + + + ſondern + + + + + + + exercirt + + + + + + + ! + + + + + + + Der + + + + + zier ſagt: raͤſonnirt nicht, ſondern exercirt! Der + + + + + + + + + + Finanzrath + + + + + + + : + + + + + + + raͤſonnirt + + + + + + + nicht + + + + + + + , + + + + + + + ſondern + + + + + + + bezahlt + + + + + + + ! + + + + + + + Der + + + + + Finanzrath: raͤſonnirt nicht, ſondern bezahlt! Der + + + + + + + + + + Geiſtliche + + + + + + + : + + + + + + + raͤſonnirt + + + + + + + nicht + + + + + + + , + + + + + + + ſondern + + + + + + + glaubt + + + + + + + ! + + + + + + + ( + + + + + + + Nur + + + + + Geiſtliche: raͤſonnirt nicht, ſondern glaubt! (Nur + + + + + + + + + + ein + + + + + + + einziger + + + + + + + Herr + + + + + + + in + + + + + + + der + + + + + + + Welt + + + + + + + ſagt + + + + + + + : + + + + + + + raͤſonnirt + + + + + + + , + + + + + + + ſo + + + + + ein einziger Herr in der Welt ſagt: raͤſonnirt, ſo + + + + + + + + + + viel + + + + + + + ihr + + + + + + + wollt + + + + + + + , + + + + + + + und + + + + + + + woruͤber + + + + + + + ihr + + + + + + + wollt + + + + + + + ; + + + + + + + aber + + + + + + + ge + + + + + + + - + + + + + viel ihr wollt, und woruͤber ihr wollt; aber ge- + + + + + + + + + + horcht + + + + + + + ! + + + + + + + ) + + + + + + + Hier + + + + + + + iſt + + + + + + + uͤberall + + + + + + + Einſchraͤnkung + + + + + + + der + + + + + + + Frei + + + + + + + - + + + + + horcht!) Hier iſt uͤberall Einſchraͤnkung der Frei- + + + + + + + + + + heit + + + + + + + . + + + + + + + Welche + + + + + + + Einſchraͤnkung + + + + + + + aber + + + + + + + iſt + + + + + + + der + + + + + + + Aufklaͤ + + + + + + + - + + + + + heit. Welche Einſchraͤnkung aber iſt der Aufklaͤ- + + + + + + + + + + rung + + + + + + + hinderlich + + + + + + + ? + + + + + + + welche + + + + + + + nicht + + + + + + + , + + + + + + + ſondern + + + + + + + ihr + + + + + + + wohl + + + + + + + gar + + + + + rung hinderlich? welche nicht, ſondern ihr wohl gar + + + + + + + + + + befoͤrderlich + + + + + + + ? + + + + + + + + + + + + + + Ich + + + + + + + antworte + + + + + + + : + + + + + + + der + + + + + + + oͤffentliche + + + + + befoͤrderlich? — Ich antworte: der oͤffentliche + + + + + + + + + + Gebrauch + + + + + + + ſeiner + + + + + + + Vernunft + + + + + + + muß + + + + + + + jederzeit + + + + + + + frei + + + + + + + ſein + + + + + + + , + + + + + Gebrauch ſeiner Vernunft muß jederzeit frei ſein, + + + + + + + + + + und + + + + + + + der + + + + + + + allein + + + + + + + kann + + + + + + + Aufklaͤrung + + + + + + + unter + + + + + + + Menſchen + + + + + + + zu + + + + + und der allein kann Aufklaͤrung unter Menſchen zu + + + + + Zu dieſer Aufklaͤrung aber wird nichts erfordert +als Freiheit; und zwar die unſchaͤdlichſte unter +allem, was nur Freiheit heißen mag, naͤmlich die: +von ſeiner Vernunft in allen Stuͤkken oͤffentlichen +Gebrauch zu machen. Nun hoͤre ich aber von al- +len Seiten rufen: raͤſonnirt nicht! Der Offi- +zier ſagt: raͤſonnirt nicht, ſondern exercirt! Der +Finanzrath: raͤſonnirt nicht, ſondern bezahlt! Der +Geiſtliche: raͤſonnirt nicht, ſondern glaubt! (Nur +ein einziger Herr in der Welt ſagt: raͤſonnirt, ſo +viel ihr wollt, und woruͤber ihr wollt; aber ge- +horcht!) Hier iſt uͤberall Einſchraͤnkung der Frei- +heit. Welche Einſchraͤnkung aber iſt der Aufklaͤ- +rung hinderlich? welche nicht, ſondern ihr wohl gar +befoͤrderlich? — Ich antworte: der oͤffentliche +Gebrauch ſeiner Vernunft muß jederzeit frei ſein, +und der allein kann Aufklaͤrung unter Menſchen zu + + + + + + + + + + + Stan + + + + + + + - + + + + + Stan- + + + + + Stan- + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_run.py b/tests/test_run.py index b4e2dbd..370deef 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,7 +2,12 @@ from os import environ from pathlib import Path import logging from PIL import Image -from eynollah.cli import layout as layout_cli, binarization as binarization_cli +from eynollah.cli import ( + layout as layout_cli, + binarization as binarization_cli, + enhancement as enhancement_cli, + machine_based_reading_order as mbreorder_cli, +) from click.testing import CliRunner from ocrd_modelfactory import page_from_file from ocrd_models.constants import NAMESPACES as NS @@ -44,8 +49,7 @@ def test_run_eynollah_layout_filename(tmp_path, subtests, pytestconfig, caplog): options=options): with caplog.filtering(only_eynollah): result = runner.invoke(layout_cli, args + options, catch_exceptions=False) - print(result) - assert result.exit_code == 0 + assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert str(infile) in logmsgs assert outfile.exists() @@ -73,8 +77,7 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): runner = CliRunner() with caplog.filtering(only_eynollah): result = runner.invoke(layout_cli, args) - print(result) - assert result.exit_code == 0 + assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2 assert any(logmsg for logmsg in logmsgs if logmsg.startswith('All jobs done in')) @@ -88,6 +91,8 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca '-i', str(infile), '-o', str(outfile), ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'SbbBinarizer' @@ -100,8 +105,7 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca options=options): with caplog.filtering(only_eynollah): result = runner.invoke(binarization_cli, args + options) - print(result) - assert result.exit_code == 0 + assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) assert outfile.exists() @@ -119,14 +123,121 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c '-di', str(indir), '-o', str(outdir), ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) caplog.set_level(logging.INFO) def only_eynollah(logrec): return logrec.name == 'SbbBinarizer' runner = CliRunner() with caplog.filtering(only_eynollah): result = runner.invoke(binarization_cli, args) - print(result) - assert result.exit_code == 0 + assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 assert len(list(outdir.iterdir())) == 2 + +def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, caplog): + infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') + outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') + args = [ + '-m', EYNOLLAH_MODELS, + '-i', str(infile), + '-o', str(outfile.parent), + # subtests write to same location + '--overwrite', + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'enhancement' + runner = CliRunner() + for options in [ + [], # defaults + ["-sos"], + ]: + with subtests.test(#msg="test CLI", + options=options): + with caplog.filtering(only_eynollah): + result = runner.invoke(enhancement_cli, args + options) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs + assert outfile.exists() + with Image.open(infile) as original_img: + original_size = original_img.size + with Image.open(outfile) as enhanced_img: + enhanced_size = enhanced_img.size + assert (original_size == enhanced_size) == ("-sos" in options) + +def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, caplog): + indir = testdir.joinpath('resources') + outdir = tmp_path + args = [ + '-m', EYNOLLAH_MODELS, + '-di', str(indir), + '-o', str(outdir), + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'enhancement' + runner = CliRunner() + with caplog.filtering(only_eynollah): + result = runner.invoke(enhancement_cli, args) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2 + assert len(list(outdir.iterdir())) == 2 + +def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplog): + infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.xml') + outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') + args = [ + '-m', EYNOLLAH_MODELS, + '-i', str(infile), + '-o', str(outfile.parent), + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'mbreorder' + runner = CliRunner() + with caplog.filtering(only_eynollah): + result = runner.invoke(mbreorder_cli, args) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + # FIXME: mbreorder has no logging! + #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs + assert outfile.exists() + #in_tree = page_from_file(str(infile)).etree + #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS) + out_tree = page_from_file(str(outfile)).etree + out_order = out_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS) + #assert len(out_order) >= 2, "result is inaccurate" + #assert in_order != out_order + assert out_order == ['r_1_1', 'r_2_1', 'r_2_2', 'r_2_3'] + +def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, caplog): + indir = testdir.joinpath('resources') + outdir = tmp_path + args = [ + '-m', EYNOLLAH_MODELS, + '-di', str(indir), + '-o', str(outdir), + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'mbreorder' + runner = CliRunner() + with caplog.filtering(only_eynollah): + result = runner.invoke(mbreorder_cli, args) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + # FIXME: mbreorder has no logging! + #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2 + assert len(list(outdir.iterdir())) == 2 From 58dd192fad4dedb4161e2ee9a695039c5d4db964 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 16:05:45 +0200 Subject: [PATCH 09/15] smoke-test: also add enhancement and mbreorder here --- Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 177e87c..f78d7d1 100644 --- a/Makefile +++ b/Makefile @@ -82,13 +82,21 @@ smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $( Date: Thu, 25 Sep 2025 16:08:40 +0200 Subject: [PATCH 10/15] CLIs: add required=True where missing --- src/eynollah/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 71958df..9744ecb 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -29,6 +29,7 @@ def main(): "-o", help="directory for output images", type=click.Path(exists=True, file_okay=False), + required=True, ) @click.option( "--model", @@ -75,6 +76,7 @@ def machine_based_reading_order(dir_in, input, out, model, log_level): "-o", help="output image (if using -i) or output image directory (if using -di)", type=click.Path(file_okay=True, dir_okay=True), + required=True, ) @click.option( "--log_level", @@ -475,6 +477,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ "-dx", help="directory of xmls", type=click.Path(exists=True, file_okay=False), + required=True, ) @click.option( "--dir_out_image_text", @@ -492,6 +495,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ "--model_name", help="Specific model file path to use for OCR", type=click.Path(exists=True, file_okay=False), + required=True, ) @click.option( "--tr_ocr", From ef1304a764530802b34c54b8e2a53fbe8a6809d9 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 16:11:39 +0200 Subject: [PATCH 11/15] CLIs: reorder options, explain -i vs -di --- src/eynollah/cli.py | 72 +++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 9744ecb..3e9fbe4 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -12,18 +12,18 @@ def main(): pass @main.command() -@click.option( - "--dir_in", - "-di", - help="directory of PAGE-XML input files", - type=click.Path(exists=True, file_okay=False), -) @click.option( "--input", "-i", help="PAGE-XML input filename", type=click.Path(exists=True, dir_okay=False), ) +@click.option( + "--dir_in", + "-di", + help="directory of PAGE-XML input files (instead of --input)", + type=click.Path(exists=True, file_okay=False), +) @click.option( "--out", "-o", @@ -45,7 +45,8 @@ def main(): help="Override log level globally to this", ) -def machine_based_reading_order(dir_in, input, out, model, log_level): +def machine_based_reading_order(input, dir_in, out, model, log_level): + assert bool(input) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." orderer = machine_based_reading_order_on_layout(model, dir_out=out) if log_level: orderer.logger.setLevel(getLevelName(log_level)) @@ -68,7 +69,7 @@ def machine_based_reading_order(dir_in, input, out, model, log_level): @click.option( "--dir_in", "-di", - help="directory of input images", + help="directory of input images (instead of --image)", type=click.Path(exists=True, file_okay=False), ) @click.option( @@ -85,7 +86,7 @@ def machine_based_reading_order(dir_in, input, out, model, log_level): help="Override log level globally to this", ) def binarization(patches, model_dir, input_image, dir_in, output, log_level): - assert (dir_in is None) != (input_image is None), "Specify either -di and or -i not both" + assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." binarizer = SbbBinarizer(model_dir) if log_level: binarizer.log.setLevel(getLevelName(log_level)) @@ -116,7 +117,7 @@ def binarization(patches, model_dir, input_image, dir_in, output, log_level): @click.option( "--dir_in", "-di", - help="directory of input images", + help="directory of input images (instead of --image)", type=click.Path(exists=True, file_okay=False), ) @click.option( @@ -151,8 +152,8 @@ def binarization(patches, model_dir, input_image, dir_in, output, log_level): ) def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale, log_level): + assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." initLogging() - assert image or dir_in, "Either a single image -i or a dir_in -di is required" enhancer = Enhancer( model, dir_out=out, @@ -191,7 +192,7 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low @click.option( "--dir_in", "-di", - help="directory of input images", + help="directory of input images (instead of --image)", type=click.Path(exists=True, file_okay=False), ) @click.option( @@ -400,7 +401,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ assert not extract_only_images or not tables, "Image extraction -eoi can not be set alongside tables -tab" assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l" assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho" - assert image or dir_in, "Either a single image -i or a dir_in -di is required" + assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." eynollah = Eynollah( model, dir_out=out, @@ -447,44 +448,44 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="input image filename", type=click.Path(exists=True, dir_okay=False), ) -@click.option( - "--overwrite", - "-O", - help="overwrite (instead of skipping) if output xml exists", - is_flag=True, -) @click.option( "--dir_in", "-di", - help="directory of input images", + help="directory of input images (instead of --image)", type=click.Path(exists=True, file_okay=False), ) @click.option( "--dir_in_bin", "-dib", - help="directory of binarized images. This should be given if you want to do prediction based on both rgb and bin images. And all bin images are png files", + help="directory of binarized images (in addition to --dir_in for RGB images; filename stems must match the RGB image files, with '.png' suffix).\nPerform prediction using both RGB and binary images. (This does not necessarily improve results, however it may be beneficial for certain document images.)", type=click.Path(exists=True, file_okay=False), ) -@click.option( - "--out", - "-o", - help="directory to write output xml data", - type=click.Path(exists=True, file_okay=False), - required=True, -) @click.option( "--dir_xmls", "-dx", - help="directory of xmls", + help="directory of input PAGE-XML files (in addition to --dir_in; filename stems must match the image files, with '.xml' suffix).", + type=click.Path(exists=True, file_okay=False), + required=True, +) +@click.option( + "--out", + "-o", + help="directory for output PAGE-XML files", type=click.Path(exists=True, file_okay=False), required=True, ) @click.option( "--dir_out_image_text", "-doit", - help="directory of images with predicted text", + help="directory for output images, newly rendered with predicted text", type=click.Path(exists=True, file_okay=False), ) +@click.option( + "--overwrite", + "-O", + help="overwrite (instead of skipping) if output xml exists", + is_flag=True, +) @click.option( "--model", "-m", @@ -515,12 +516,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ is_flag=True, help="if this parameter set to true, cropped textline images will not be masked with textline contour.", ) -@click.option( - "--prediction_with_both_of_rgb_and_bin", - "-brb/-nbrb", - is_flag=True, - help="If this parameter is set to True, the prediction will be performed using both RGB and binary images. However, this does not necessarily improve results; it may be beneficial for certain document images.", -) @click.option( "--batch_size", "-bs", @@ -543,7 +538,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="Override log level globally to this", ) -def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): +def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): initLogging() assert not model or not model_name, "model directory -m can not be set alongside specific model name --model_name" @@ -552,8 +547,7 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs" assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib" assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit" - assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text -etit can not be set alongside prediction with both rgb and bin -brb" - assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both." + assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both." eynollah_ocr = Eynollah_ocr( image_filename=image, dir_xmls=dir_xmls, From 5b1e0c13276db179f74770408fb805f9a7b84d87 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 16:26:31 +0200 Subject: [PATCH 12/15] layout/ocr: make all path options kwargs to run() instead of attributes; ocr: drop redundant prediction_with_both_of_rgb_and_bin in favour of just bool(dir_in_bin) --- src/eynollah/cli.py | 37 +++-- src/eynollah/eynollah.py | 233 ++++++++++++-------------------- src/eynollah/utils/utils_ocr.py | 4 +- 3 files changed, 110 insertions(+), 164 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 3e9fbe4..a0608f9 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -404,13 +404,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." eynollah = Eynollah( model, - dir_out=out, - dir_of_cropped_images=save_images, extract_only_images=extract_only_images, - dir_of_layout=save_layout, - dir_of_deskewed=save_deskewed, - dir_of_all=save_all, - dir_save_page=save_page, enable_plotting=enable_plotting, allow_enhancement=allow_enhancement, curved_line=curved_line, @@ -435,11 +429,16 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ ) if log_level: eynollah.logger.setLevel(getLevelName(log_level)) - if dir_in: - eynollah.run(dir_in=dir_in, overwrite=overwrite) - else: - eynollah.run(image_filename=image, overwrite=overwrite) - + eynollah.run(overwrite=overwrite, + image_filename=image, + dir_in=dir_in, + dir_out=out, + dir_of_cropped_images=save_images, + dir_of_layout=save_layout, + dir_of_deskewed=save_deskewed, + dir_of_all=save_all, + dir_save_page=save_page, + ) @main.command() @click.option( @@ -549,25 +548,25 @@ def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit" assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both." eynollah_ocr = Eynollah_ocr( - image_filename=image, - dir_xmls=dir_xmls, - dir_out_image_text=dir_out_image_text, - dir_in=dir_in, - dir_in_bin=dir_in_bin, - dir_out=out, dir_models=model, model_name=model_name, tr_ocr=tr_ocr, export_textline_images_and_text=export_textline_images_and_text, do_not_mask_with_textline_contour=do_not_mask_with_textline_contour, - prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin, batch_size=batch_size, pref_of_dataset=dataset_abbrevation, min_conf_value_of_textline_text=min_conf_value_of_textline_text, ) if log_level: eynollah_ocr.logger.setLevel(getLevelName(log_level)) - eynollah_ocr.run(overwrite=overwrite) + eynollah_ocr.run(overwrite=overwrite, + dir_in=dir_in, + dir_in_bin=dir_in_bin, + image_filename=image, + dir_xmls=dir_xmls, + dir_out_image_text=dir_out_image_text, + dir_out=out, + ) if __name__ == "__main__": main() diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 9071f7a..533b38f 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -191,13 +191,7 @@ class Eynollah: def __init__( self, dir_models : str, - dir_out : Optional[str] = None, - dir_of_cropped_images : Optional[str] = None, extract_only_images : bool =False, - dir_of_layout : Optional[str] = None, - dir_of_deskewed : Optional[str] = None, - dir_of_all : Optional[str] = None, - dir_save_page : Optional[str] = None, enable_plotting : bool = False, allow_enhancement : bool = False, curved_line : bool = False, @@ -221,18 +215,12 @@ class Eynollah: skip_layout_and_reading_order : bool = False, ): self.logger = getLogger('eynollah') - + self.plotter = None + if skip_layout_and_reading_order: textline_light = True self.light_version = light_version - self.dir_out = dir_out - self.dir_of_all = dir_of_all - self.dir_save_page = dir_save_page self.reading_order_machine_based = reading_order_machine_based - self.dir_of_deskewed = dir_of_deskewed - self.dir_of_deskewed = dir_of_deskewed - self.dir_of_cropped_images=dir_of_cropped_images - self.dir_of_layout=dir_of_layout self.enable_plotting = enable_plotting self.allow_enhancement = allow_enhancement self.curved_line = curved_line @@ -423,21 +411,11 @@ class Eynollah: if dpi is not None: self.dpi = dpi - def reset_file_name_dir(self, image_filename): + def reset_file_name_dir(self, image_filename, dir_out): t_c = time.time() self.cache_images(image_filename=image_filename) - - self.plotter = None if not self.enable_plotting else EynollahPlotter( - dir_out=self.dir_out, - dir_of_all=self.dir_of_all, - dir_save_page=self.dir_save_page, - dir_of_deskewed=self.dir_of_deskewed, - dir_of_cropped_images=self.dir_of_cropped_images, - dir_of_layout=self.dir_of_layout, - image_filename_stem=Path(Path(image_filename).name).stem) - self.writer = EynollahXmlWriter( - dir_out=self.dir_out, + dir_out=dir_out, image_filename=image_filename, curved_line=self.curved_line, textline_light = self.textline_light) @@ -4525,7 +4503,17 @@ class Eynollah: return ordered_left_marginals, ordered_right_marginals, ordered_left_marginals_textline, ordered_right_marginals_textline, ordered_left_marginals_bbox, ordered_right_marginals_bbox, ordered_left_slopes_marginals, ordered_right_slopes_marginals - def run(self, image_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False): + def run(self, + overwrite: bool = False, + image_filename: Optional[str] = None, + dir_in: Optional[str] = None, + dir_out: Optional[str] = None, + dir_of_cropped_images: Optional[str] = None, + dir_of_layout: Optional[str] = None, + dir_of_deskewed: Optional[str] = None, + dir_of_all: Optional[str] = None, + dir_save_page: Optional[str] = None, + ): """ Get image and scales, then extract the page of scanned image """ @@ -4546,9 +4534,19 @@ class Eynollah: enabled_modes.append("Table detection") if enabled_modes: self.logger.info("Enabled modes: " + ", ".join(enabled_modes)) + if self.enable_plotting: + self.logger.info("Saving debug plots") + if dir_of_cropped_images: + self.logger.info(f"Saving cropped images to: {dir_of_cropped_images}") + if dir_of_layout: + self.logger.info(f"Saving layout plots to: {dir_of_layout}") + if dir_of_deskewed: + self.logger.info(f"Saving deskewed images to: {dir_of_deskewed}") if dir_in: - ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in))) + ls_imgs = [os.path.join(dir_in, image_filename) + for image_filename in filter(is_image_filename, + os.listdir(dir_in))] elif image_filename: ls_imgs = [image_filename] else: @@ -4558,7 +4556,15 @@ class Eynollah: self.logger.info(img_filename) t0 = time.time() - self.reset_file_name_dir(os.path.join(dir_in or "", img_filename)) + self.reset_file_name_dir(img_filename, dir_out) + if self.enable_plotting: + self.plotter = EynollahPlotter(dir_out=dir_out, + dir_of_all=dir_of_all, + dir_save_page=dir_save_page, + dir_of_deskewed=dir_of_deskewed, + dir_of_cropped_images=dir_of_cropped_images, + dir_of_layout=dir_of_layout, + image_filename_stem=Path(image_filename).stem) #print("text region early -11 in %.1fs", time.time() - t0) if os.path.exists(self.writer.output_filename): if overwrite: @@ -5151,19 +5157,6 @@ class Eynollah: self.logger.info("Step 5/5: Output Generation") - output_config = [] - if self.enable_plotting: - output_config.append("Saving debug plots") - if self.dir_of_cropped_images: - output_config.append(f"Saving cropped images to: {self.dir_of_cropped_images}") - if self.dir_of_layout: - output_config.append(f"Saving layout plots to: {self.dir_of_layout}") - if self.dir_of_deskewed: - output_config.append(f"Saving deskewed images to: {self.dir_of_deskewed}") - - if output_config: - self.logger.info("Output configuration:\n * %s", "\n * ".join(output_config)) - pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, @@ -5283,21 +5276,8 @@ class Eynollah: self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") self.logger.info("Step 5/5: Output Generation") - self.logger.info("Generating PAGE-XML output") - if self.enable_plotting: - self.logger.info("Saving debug plots") - - if self.dir_of_cropped_images: - self.logger.info(f"Saving cropped images to: {self.dir_of_cropped_images}") - - if self.dir_of_layout: - self.logger.info(f"Saving layout plots to: {self.dir_of_layout}") - - if self.dir_of_deskewed: - self.logger.info(f"Saving deskewed images to: {self.dir_of_deskewed}") - pcgts = self.writer.build_pagexml_no_full_layout( txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, @@ -5315,32 +5295,19 @@ class Eynollah_ocr: dir_models, model_name=None, dir_xmls=None, - dir_in=None, - image_filename=None, - dir_in_bin=None, - dir_out=None, - dir_out_image_text=None, tr_ocr=False, batch_size=None, export_textline_images_and_text=False, do_not_mask_with_textline_contour=False, - prediction_with_both_of_rgb_and_bin=False, pref_of_dataset=None, min_conf_value_of_textline_text : Optional[float]=None, logger=None, ): - self.dir_in = dir_in - self.image_filename = image_filename - self.dir_in_bin = dir_in_bin - self.dir_out = dir_out - self.dir_xmls = dir_xmls self.dir_models = dir_models self.model_name = model_name self.tr_ocr = tr_ocr self.export_textline_images_and_text = export_textline_images_and_text self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour - self.dir_out_image_text = dir_out_image_text - self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin self.pref_of_dataset = pref_of_dataset self.logger = logger if logger else getLogger('eynollah') @@ -5392,23 +5359,27 @@ class Eynollah_ocr: ) self.end_character = len(characters) + 2 - def run(self, overwrite : bool = False): - if self.dir_in: - ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in))) + def run(self, overwrite: bool = False, + dir_in: Optional[str] = None, + dir_in_bin: Optional[str] = None, + image_filename: Optional[str] = None, + dir_xmls: Optional[str] = None, + dir_out_image_text: Optional[str] = None, + dir_out: Optional[str] = None, + ): + if dir_in: + ls_imgs = [os.path.join(dir_in, image_filename) + for image_filename in filter(is_image_filename, + os.listdir(dir_in))] else: - ls_imgs = [self.image_filename] - + ls_imgs = [image_filename] + if self.tr_ocr: tr_ocr_input_height_and_width = 384 - for ind_img in ls_imgs: - if self.dir_in: - file_name = Path(ind_img).stem - dir_img = os.path.join(self.dir_in, ind_img) - else: - file_name = Path(self.image_filename).stem - dir_img = self.image_filename - dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') - out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + for dir_img in ls_imgs: + file_name = Path(dir_img).stem + dir_xml = os.path.join(dir_xmls, file_name+'.xml') + out_file_ocr = os.path.join(dir_out, file_name+'.xml') if os.path.exists(out_file_ocr): if overwrite: @@ -5419,8 +5390,8 @@ class Eynollah_ocr: img = cv2.imread(dir_img) - if self.dir_out_image_text: - out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png') + if dir_out_image_text: + out_image_with_text = os.path.join(dir_out_image_text, file_name+'.png') image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white") draw = ImageDraw.Draw(image_text) total_bb_coordinates = [] @@ -5458,7 +5429,7 @@ class Eynollah_ocr: textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) - if self.dir_out_image_text: + if dir_out_image_text: total_bb_coordinates.append([x,y,w,h]) h2w_ratio = h/float(w) @@ -5580,7 +5551,7 @@ class Eynollah_ocr: unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - if self.dir_out_image_text: + if dir_out_image_text: font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! font = ImageFont.truetype(font_path, 40) @@ -5708,18 +5679,10 @@ class Eynollah_ocr: img_size=(image_width, image_height) - for ind_img in ls_imgs: - if self.dir_in: - file_name = Path(ind_img).stem - dir_img = os.path.join(self.dir_in, ind_img) - else: - file_name = Path(self.image_filename).stem - dir_img = self.image_filename - - #file_name = Path(ind_img).stem - #dir_img = os.path.join(self.dir_in, ind_img) - dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') - out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + for dir_img in ls_imgs: + file_name = Path(dir_img).stem + dir_xml = os.path.join(dir_xmls, file_name+'.xml') + out_file_ocr = os.path.join(dir_out, file_name+'.xml') if os.path.exists(out_file_ocr): if overwrite: @@ -5729,13 +5692,13 @@ class Eynollah_ocr: continue img = cv2.imread(dir_img) - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: cropped_lines_bin = [] - dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png') + dir_img_bin = os.path.join(dir_in_bin, file_name+'.png') img_bin = cv2.imread(dir_img_bin) - if self.dir_out_image_text: - out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png') + if dir_out_image_text: + out_image_with_text = os.path.join(dir_out_image_text, file_name+'.png') image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white") draw = ImageDraw.Draw(image_text) total_bb_coordinates = [] @@ -5779,13 +5742,13 @@ class Eynollah_ocr: if type_textregion=='drop-capital': angle_degrees = 0 - if self.dir_out_image_text: + if dir_out_image_text: total_bb_coordinates.append([x,y,w,h]) w_scaled = w * image_height/float(h) img_poly_on_img = np.copy(img) - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_poly_on_img_bin = np.copy(img_bin) img_crop_bin = img_poly_on_img_bin[y:y+h, x:x+w, :] @@ -5808,7 +5771,7 @@ class Eynollah_ocr: img_crop = rotate_image_with_padding(img_crop, better_des_slope ) - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope ) mask_poly = rotate_image_with_padding(mask_poly, better_des_slope ) @@ -5823,13 +5786,13 @@ class Eynollah_ocr: if not self.do_not_mask_with_textline_contour: img_crop[mask_poly==0] = 255 - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :] if not self.do_not_mask_with_textline_contour: img_crop_bin[mask_poly==0] = 255 if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90: - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) else: img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) @@ -5839,14 +5802,14 @@ class Eynollah_ocr: better_des_slope = 0 if not self.do_not_mask_with_textline_contour: img_crop[mask_poly==0] = 255 - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: if not self.do_not_mask_with_textline_contour: img_crop_bin[mask_poly==0] = 255 if type_textregion=='drop-capital': pass else: if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90: - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) else: img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) @@ -5861,14 +5824,12 @@ class Eynollah_ocr: cropped_lines_ver_index.append(0) cropped_lines_meging_indexing.append(0) - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) else: - if self.prediction_with_both_of_rgb_and_bin: - splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin, prediction_with_both_of_rgb_and_bin=self.prediction_with_both_of_rgb_and_bin) - else: - splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) + splited_images, splited_images_bin = return_textlines_split_if_needed( + img_crop, img_crop_bin if dir_in_bin is not None else None) if splited_images: img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) cropped_lines.append(img_fin) @@ -5889,7 +5850,7 @@ class Eynollah_ocr: else: cropped_lines_ver_index.append(0) - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width) cropped_lines_bin.append(img_fin) img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width) @@ -5905,7 +5866,7 @@ class Eynollah_ocr: else: cropped_lines_ver_index.append(0) - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) @@ -5918,29 +5879,15 @@ class Eynollah_ocr: if cheild_text.tag.endswith("Unicode"): textline_text = cheild_text.text if textline_text: - if self.do_not_mask_with_textline_contour: - if self.pref_of_dataset: - with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.txt'), 'w') as text_file: - text_file.write(textline_text) - - cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.png'), img_crop ) - else: - with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file: - text_file.write(textline_text) - - cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop ) - else: - if self.pref_of_dataset: - with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.txt'), 'w') as text_file: - text_file.write(textline_text) - - cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.png'), img_crop ) - else: - with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.txt'), 'w') as text_file: - text_file.write(textline_text) - - cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.png'), img_crop ) + base_name = os.path.join(dir_out, file_name + '_line_' + str(indexer_textlines)) + if self.pref_of_dataset: + base_name += '_' + self.pref_of_dataset + if not self.do_not_mask_with_textline_contour: + base_name += '_masked' + with open(base_name + '.txt', 'w') as text_file: + text_file.write(textline_text) + cv2.imwrite(base_name + '.png', img_crop) indexer_textlines+=1 if not self.export_textline_images_and_text: @@ -5971,7 +5918,7 @@ class Eynollah_ocr: else: imgs_ver_flipped = None - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: imgs_bin = cropped_lines_bin[n_start:] imgs_bin = np.array(imgs_bin) imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3) @@ -6001,7 +5948,7 @@ class Eynollah_ocr: imgs_ver_flipped = None - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: imgs_bin = cropped_lines_bin[n_start:n_end] imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3) @@ -6040,7 +5987,7 @@ class Eynollah_ocr: if len(indices_where_flipped_conf_value_is_higher)>0: indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) if len(indices_ver)>0: @@ -6087,7 +6034,7 @@ class Eynollah_ocr: extracted_texts.append("") extracted_conf_value.append(0) del cropped_lines - if self.prediction_with_both_of_rgb_and_bin: + if dir_in_bin is not None: del cropped_lines_bin gc.collect() @@ -6100,7 +6047,7 @@ class Eynollah_ocr: unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - if self.dir_out_image_text: + if dir_out_image_text: font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! font = ImageFont.truetype(font_path, 40) diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index d974650..4fa99f7 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -109,13 +109,13 @@ def fit_text_single_line(draw, text, font_path, max_width, max_height): return ImageFont.truetype(font_path, 10) # Smallest font fallback -def return_textlines_split_if_needed(textline_image, textline_image_bin, prediction_with_both_of_rgb_and_bin=False): +def return_textlines_split_if_needed(textline_image, textline_image_bin=None): split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image) if split_point: image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) - if prediction_with_both_of_rgb_and_bin: + if textline_image_bin is not None: image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height)) image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height)) return [image1, image2], [image1_bin, image2_bin] From 1dcc7b5795d92619cd87699e6030cea088441f3c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 16:38:43 +0200 Subject: [PATCH 13/15] ocr CLI: make --model vs --model_name xor --- src/eynollah/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index a0608f9..3436250 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -495,7 +495,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ "--model_name", help="Specific model file path to use for OCR", type=click.Path(exists=True, file_okay=False), - required=True, ) @click.option( "--tr_ocr", @@ -540,7 +539,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ def ocr(image, dir_in, dir_in_bin, dir_xmls, out, dir_out_image_text, overwrite, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): initLogging() - assert not model or not model_name, "model directory -m can not be set alongside specific model name --model_name" + assert bool(model) != bool(model_name), "Either -m (model directory) or --model_name (specific model name) must be provided." assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr" assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m" assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs" From 2d14d57e4f42988e19cbc976e8b5174dec671b1b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 19:52:50 +0200 Subject: [PATCH 14/15] ocr: minimal debug logging --- src/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 533b38f..6191b8e 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5442,7 +5442,7 @@ class Eynollah_ocr: img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 - + self.logger.debug("processing %d lines for '%s'", len(cropped_lines), nn.attrib['id']) if h2w_ratio > 0.1: cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) ) cropped_lines_meging_indexing.append(0) @@ -5961,6 +5961,7 @@ class Eynollah_ocr: imgs_bin_ver_flipped = None + self.logger.debug("processing next %d lines", len(imgs)) preds = self.prediction_model.predict(imgs, verbose=0) if len(indices_ver)>0: From 5c7e1f21fb5c36c4012eb8b7231af47166da2820 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 19:53:19 +0200 Subject: [PATCH 15/15] test_run: add tests for ocr --- tests/test_run.py | 80 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/tests/test_run.py b/tests/test_run.py index 370deef..cd24225 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -7,6 +7,7 @@ from eynollah.cli import ( binarization as binarization_cli, enhancement as enhancement_cli, machine_based_reading_order as mbreorder_cli, + ocr as ocr_cli, ) from click.testing import CliRunner from ocrd_modelfactory import page_from_file @@ -76,7 +77,7 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): return logrec.name == 'eynollah' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(layout_cli, args) + result = runner.invoke(layout_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Job done in')]) == 2 @@ -104,7 +105,7 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca with subtests.test(#msg="test CLI", options=options): with caplog.filtering(only_eynollah): - result = runner.invoke(binarization_cli, args + options) + result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert any(True for logmsg in logmsgs if logmsg.startswith('Predicting')) @@ -130,7 +131,7 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c return logrec.name == 'SbbBinarizer' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(binarization_cli, args) + result = runner.invoke(binarization_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Predicting')]) == 2 @@ -159,7 +160,7 @@ def test_run_eynollah_enhancement_filename(tmp_path, subtests, pytestconfig, cap with subtests.test(#msg="test CLI", options=options): with caplog.filtering(only_eynollah): - result = runner.invoke(enhancement_cli, args + options) + result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert any(True for logmsg in logmsgs if logmsg.startswith('Image was enhanced')), logmsgs @@ -185,7 +186,7 @@ def test_run_eynollah_enhancement_directory(tmp_path, subtests, pytestconfig, ca return logrec.name == 'enhancement' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(enhancement_cli, args) + result = runner.invoke(enhancement_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] assert len([logmsg for logmsg in logmsgs if logmsg.startswith('Image was enhanced')]) == 2 @@ -206,7 +207,7 @@ def test_run_eynollah_mbreorder_filename(tmp_path, subtests, pytestconfig, caplo return logrec.name == 'mbreorder' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(mbreorder_cli, args) + result = runner.invoke(mbreorder_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] # FIXME: mbreorder has no logging! @@ -235,9 +236,74 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl return logrec.name == 'mbreorder' runner = CliRunner() with caplog.filtering(only_eynollah): - result = runner.invoke(mbreorder_cli, args) + result = runner.invoke(mbreorder_cli, args, catch_exceptions=False) assert result.exit_code == 0, result.stdout logmsgs = [logrec.message for logrec in caplog.records] # FIXME: mbreorder has no logging! #assert len([logmsg for logmsg in logmsgs if logmsg.startswith('???')]) == 2 assert len(list(outdir.iterdir())) == 2 + +def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): + infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') + outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') + outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.xml') + outrenderfile.parent.mkdir() + args = [ + '-m', EYNOLLAH_MODELS, + '-i', str(infile), + '-dx', str(infile.parent), + '-o', str(outfile.parent), + # subtests write to same location + '--overwrite', + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.DEBUG) + def only_eynollah(logrec): + return logrec.name == 'eynollah' + runner = CliRunner() + for options in [ + [], # defaults + ["-doit", str(outrenderfile.parent)], + ["-trocr"], + ]: + with subtests.test(#msg="test CLI", + options=options): + with caplog.filtering(only_eynollah): + result = runner.invoke(ocr_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + # FIXME: ocr has no logging! + #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs + assert outfile.exists() + if "-doit" in options: + assert outrenderfile.exists() + #in_tree = page_from_file(str(infile)).etree + #in_order = in_tree.xpath("//page:OrderedGroup//@regionRef", namespaces=NS) + out_tree = page_from_file(str(outfile)).etree + out_texts = out_tree.xpath("//page:TextLine/page:TextEquiv[last()]/page:Unicode/text()", namespaces=NS) + assert len(out_texts) >= 2, ("result is inaccurate", out_texts) + assert sum(map(len, out_texts)) > 100, ("result is inaccurate", out_texts) + +def test_run_eynollah_ocr_directory(tmp_path, subtests, pytestconfig, caplog): + indir = testdir.joinpath('resources') + outdir = tmp_path + args = [ + '-m', EYNOLLAH_MODELS, + '-di', str(indir), + '-dx', str(indir), + '-o', str(outdir), + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'eynollah' + runner = CliRunner() + with caplog.filtering(only_eynollah): + result = runner.invoke(ocr_cli, args, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + # FIXME: ocr has no logging! + #assert any(True for logmsg in logmsgs if logmsg.startswith('???')), logmsgs + assert len(list(outdir.iterdir())) == 2