diff --git a/Makefile b/Makefile index 73d4d34..177e87c 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,7 @@ smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif eynollah layout -di $( | -di \ -o \ -m \ @@ -66,6 +77,7 @@ The following options can be used to further configure the processing: |-------------------|:-------------------------------------------------------------------------------| | `-fl` | full layout analysis including all steps and segmentation classes | | `-light` | lighter and faster but simpler method for main region detection and deskewing | +| `-tll` | this indicates the light textline and should be passed with light version | | `-tab` | apply table detection | | `-ae` | apply enhancement (the resulting image is saved to the output directory) | | `-as` | apply scaling | @@ -80,11 +92,57 @@ The following options can be used to further configure the processing: | `-sp ` | save cropped page image to this directory | | `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | -If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals). +If no option is set, the tool performs layout detection of main regions (background, text, images, separators +and marginals). The best output quality is produced when RGB images are used as input rather than greyscale or binarized images. -#### Use as OCR-D processor +### Binarization +The binarization module performs document image binarization using pretrained pixelwise segmentation models. +The command-line interface for binarization of single image can be called like this: + +```sh +eynollah binarization \ + -m \ + \ + +``` + +and for flowing from a directory like this: + +```sh +eynollah binarization \ + -m \ + -di \ + -do +``` + +### OCR +The OCR module performs text recognition from images using two main families of pretrained models: CNN-RNN–based OCR and Transformer-based OCR. + +The command-line interface for ocr can be called like this: + +```sh +eynollah ocr \ + -m | --model_name \ + -i | -di \ + -dx \ + -o +``` + +### Machine-based-reading-order +The machine-based reading-order module employs a pretrained model to identify the reading order from layouts represented in PAGE-XML files. + +The command-line interface for machine based reading order can be called like this: + +```sh +eynollah machine-based-reading-order \ + -m \ + -xml | -dx \ + -o +``` + +#### Use as OCR-D processor Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli), formally described in [`ocrd-tool.json`](https://github.com/qurator-spk/eynollah/tree/main/src/eynollah/ocrd-tool.json). @@ -92,7 +150,6 @@ In this case, the source image file group with (preferably) RGB images should be ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models 2022-04-05 - If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows: - existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results) - existing annotation (and respective `AlternativeImage`s) are partially _ignored_: @@ -103,7 +160,6 @@ If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynol (because some other preprocessing step was in effect like `denoised`), then the output PAGE-XML will be based on that as new top-level (`@imageFilename`) - ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models 2022-04-05 Still, in general, it makes more sense to add other workflow steps **after** Eynollah. diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 9dc326d..b5c2a47 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -49,8 +49,7 @@ def machine_based_reading_order(dir_xml, xml_file, dir_out, model): @main.command() @click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') @click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') -@click.argument('input_image', required=False) -@click.argument('output_image', required=False) +@click.option("--input-image", "-i", help="input image", type=click.Path(exists=True, dir_okay=False)) @click.option( "--dir_in", "-di", @@ -58,16 +57,14 @@ def machine_based_reading_order(dir_xml, xml_file, dir_out, model): type=click.Path(exists=True, file_okay=False), ) @click.option( - "--dir_out", - "-do", - help="directory for output images", - type=click.Path(exists=True, file_okay=False), + "--output", + "-o", + help="output image (if using -i) or output image directory (if using -di)", + type=click.Path(file_okay=True, dir_okay=True), ) -def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out): - assert (dir_out is None) == (dir_in is None), "Options -di and -do are mutually dependent" - assert (input_image is None) == (output_image is None), "INPUT_IMAGE and OUTPUT_IMAGE are mutually dependent" - assert (dir_in is None) != (input_image is None), "Specify either -di and -do options, or INPUT_IMAGE and OUTPUT_IMAGE" - SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) +def binarization(patches, model_dir, input_image, dir_in, output): + assert (dir_in is None) != (input_image is None), "Specify either -di and or -i not both" + SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in) diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index f43b6ba..2d5035f 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -314,8 +314,8 @@ class SbbBinarizer: prediction_true = prediction_true.astype(np.uint8) return prediction_true[:,:,0] - def run(self, image=None, image_path=None, save=None, use_patches=False, dir_in=None, dir_out=None): - print(dir_in,'dir_in') + def run(self, image=None, image_path=None, output=None, use_patches=False, dir_in=None): + # print(dir_in,'dir_in') if not dir_in: if (image is not None and image_path is not None) or \ (image is None and image_path is None): @@ -343,8 +343,8 @@ class SbbBinarizer: kernel = np.ones((5, 5), np.uint8) img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 - if save: - cv2.imwrite(save, img_last) + if output: + cv2.imwrite(output, img_last) return img_last else: ls_imgs = os.listdir(dir_in) @@ -374,4 +374,4 @@ class SbbBinarizer: img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 - cv2.imwrite(os.path.join(dir_out,image_stem+'.png'), img_last) + cv2.imwrite(os.path.join(output, image_stem + '.png'), img_last) diff --git a/tests/test_run.py b/tests/test_run.py index 607140e..b4e2dbd 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -85,8 +85,8 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ '-m', SBBBIN_MODELS, - str(infile), - str(outfile), + '-i', str(infile), + '-o', str(outfile), ] caplog.set_level(logging.INFO) def only_eynollah(logrec): @@ -117,7 +117,7 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c args = [ '-m', SBBBIN_MODELS, '-di', str(indir), - '-do', str(outdir), + '-o', str(outdir), ] caplog.set_level(logging.INFO) def only_eynollah(logrec):