diff --git a/Makefile b/Makefile index 73d4d34..177e87c 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,7 @@ smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif eynollah layout -di $( | -di \ -o \ -m \ @@ -66,6 +77,7 @@ The following options can be used to further configure the processing: |-------------------|:-------------------------------------------------------------------------------| | `-fl` | full layout analysis including all steps and segmentation classes | | `-light` | lighter and faster but simpler method for main region detection and deskewing | +| `-tll` | this indicates the light textline and should be passed with light version | | `-tab` | apply table detection | | `-ae` | apply enhancement (the resulting image is saved to the output directory) | | `-as` | apply scaling | @@ -80,11 +92,38 @@ The following options can be used to further configure the processing: | `-sp ` | save cropped page image to this directory | | `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | -If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals). +If no option is set, the tool performs layout detection of main regions (background, text, images, separators +and marginals). The best output quality is produced when RGB images are used as input rather than greyscale or binarized images. -#### Use as OCR-D processor +### Binarization +The binarization module performs document image binarization using pretrained pixelwise segmentation models. +The command-line interface for binarization of single image can be called like this: + +```sh +eynollah binarization \ + -m \ + \ + +``` + +and for flowing from a directory like this: + +```sh +eynollah binarization \ + -m \ + -di \ + -do +``` + +### OCR +Under development + +### Machine-based-reading-order +Under development + +#### Use as OCR-D processor Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli), formally described in [`ocrd-tool.json`](https://github.com/qurator-spk/eynollah/tree/main/src/eynollah/ocrd-tool.json). @@ -92,7 +131,6 @@ In this case, the source image file group with (preferably) RGB images should be ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models 2022-04-05 - If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows: - existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results) - existing annotation (and respective `AlternativeImage`s) are partially _ignored_: @@ -103,7 +141,6 @@ If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynol (because some other preprocessing step was in effect like `denoised`), then the output PAGE-XML will be based on that as new top-level (`@imageFilename`) - ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models 2022-04-05 Still, in general, it makes more sense to add other workflow steps **after** Eynollah. diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index c189aca..42f9bca 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -48,8 +48,7 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i @main.command() @click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') @click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') -@click.argument('input_image', required=False) -@click.argument('output_image', required=False) +@click.option("--input-image", "-i", help="input image", type=click.Path(exists=True, dir_okay=False)) @click.option( "--dir_in", "-di", @@ -57,16 +56,14 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i type=click.Path(exists=True, file_okay=False), ) @click.option( - "--dir_out", - "-do", - help="directory for output images", - type=click.Path(exists=True, file_okay=False), + "--output", + "-o", + help="output image (if using -i) or output image directory (if using -di)", + type=click.Path(file_okay=True, dir_okay=True), ) -def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out): - assert (dir_out is None) == (dir_in is None), "Options -di and -do are mutually dependent" - assert (input_image is None) == (output_image is None), "INPUT_IMAGE and OUTPUT_IMAGE are mutually dependent" - assert (dir_in is None) != (input_image is None), "Specify either -di and -do options, or INPUT_IMAGE and OUTPUT_IMAGE" - SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) +def binarization(patches, model_dir, input_image, dir_in, output): + assert (dir_in is None) != (input_image is None), "Specify either -di and or -i not both" + SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d47016b..5875440 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4343,12 +4343,12 @@ class Eynollah: polygons_lines_xml = [] contours_tables = [] ocr_all_textlines = None - conf_contours_textregions =None + conf_contours_textregions = [0] pcgts = self.writer.build_pagexml_no_full_layout( cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order) return pcgts #print("text region early -1 in %.1fs", time.time() - t0) diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index f43b6ba..2d5035f 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -314,8 +314,8 @@ class SbbBinarizer: prediction_true = prediction_true.astype(np.uint8) return prediction_true[:,:,0] - def run(self, image=None, image_path=None, save=None, use_patches=False, dir_in=None, dir_out=None): - print(dir_in,'dir_in') + def run(self, image=None, image_path=None, output=None, use_patches=False, dir_in=None): + # print(dir_in,'dir_in') if not dir_in: if (image is not None and image_path is not None) or \ (image is None and image_path is None): @@ -343,8 +343,8 @@ class SbbBinarizer: kernel = np.ones((5, 5), np.uint8) img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 - if save: - cv2.imwrite(save, img_last) + if output: + cv2.imwrite(output, img_last) return img_last else: ls_imgs = os.listdir(dir_in) @@ -374,4 +374,4 @@ class SbbBinarizer: img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 - cv2.imwrite(os.path.join(dir_out,image_stem+'.png'), img_last) + cv2.imwrite(os.path.join(output, image_stem + '.png'), img_last) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 92e353f..e589fd4 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -168,7 +168,7 @@ class EynollahXmlWriter(): with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -184,7 +184,7 @@ class EynollahXmlWriter(): for mm in range(len(found_polygons_text_region)): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]), + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]), ) #textregion.set_conf(conf_contours_textregion[mm]) page.add_TextRegion(textregion) @@ -303,18 +303,28 @@ class EynollahXmlWriter(): return pcgts - def calculate_polygon_coords(self, contour, page_coord): + def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False): self.logger.debug('enter calculate_polygon_coords') coords = '' for value_bbox in contour: - if len(value_bbox) == 2: - coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y)) + if skip_layout_reading_order: + if len(value_bbox) == 2: + coords += str(int((value_bbox[0]) / self.scale_x)) + coords += ',' + coords += str(int((value_bbox[1]) / self.scale_y)) + else: + coords += str(int((value_bbox[0][0]) / self.scale_x)) + coords += ',' + coords += str(int((value_bbox[0][1]) / self.scale_y)) else: - coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y)) + if len(value_bbox) == 2: + coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x)) + coords += ',' + coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y)) + else: + coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x)) + coords += ',' + coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y)) coords=coords + ' ' return coords[:-1] diff --git a/tests/test_run.py b/tests/test_run.py index 607140e..b4e2dbd 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -85,8 +85,8 @@ def test_run_eynollah_binarization_filename(tmp_path, subtests, pytestconfig, ca outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') args = [ '-m', SBBBIN_MODELS, - str(infile), - str(outfile), + '-i', str(infile), + '-o', str(outfile), ] caplog.set_level(logging.INFO) def only_eynollah(logrec): @@ -117,7 +117,7 @@ def test_run_eynollah_binarization_directory(tmp_path, subtests, pytestconfig, c args = [ '-m', SBBBIN_MODELS, '-di', str(indir), - '-do', str(outdir), + '-o', str(outdir), ] caplog.set_level(logging.INFO) def only_eynollah(logrec):