diff --git a/.circleci/config.yml b/.circleci/config.yml index 751ea54..a782d8f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,6 +5,7 @@ jobs: build-python37: machine: - image: ubuntu-2004:2023.02.1 + steps: - checkout - restore_cache: diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index e06cb35..de742f1 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8'] + python-version: ['3.7'] # '3.8' steps: - uses: actions/checkout@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index e8815d6..9f6ceff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,30 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +## [0.2.0] - 2023-03-24 + +Changed: + + * Convert default model from HDFS to TF SavedModel, #91 + +Added: + + * parmeter `tables` to toggle table detectino, #91 + * default model described in ocrd-tool.json, #91 + +## [0.1.0] - 2023-03-22 + +Fixed: + + * Do not produce spurious `TextEquiv`, #68 + * Less spammy logging, #64, #65, #71 + +Changed: + + * Upgrade to tensorflow 2.4.0, #74 + * Improved README + * CI: test for python 3.7+, #90 + ## [0.0.11] - 2022-02-02 Fixed: @@ -72,6 +96,8 @@ Fixed: Initial release +[0.2.0]: ../../compare/v0.2.0...v0.1.0 +[0.1.0]: ../../compare/v0.1.0...v0.0.11 [0.0.11]: ../../compare/v0.0.11...v0.0.10 [0.0.10]: ../../compare/v0.0.10...v0.0.9 [0.0.9]: ../../compare/v0.0.9...v0.0.8 diff --git a/README.md b/README.md index 4b7be73..1b3a589 100644 --- a/README.md +++ b/README.md @@ -38,11 +38,15 @@ cd eynollah; pip install -e . Alternatively, you can run `make install` or `make install-dev` for editable installation. -### Models +
+ click to expand/collapse
+ +First, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied. Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). -Alternatively, running `make models` will download and extract models to `$(PWD)/models_eynollah`. + +* If you set `-ae` (**a**llow image **e**nhancement) parameter to `true`, the tool will first check the ppi (pixel-per-inch) of the image and when it is less than 300, the tool will resize it and only then image enhancement will occur. Image enhancement can also take place without this option, but by setting this option to `true`, the layout xml data (e.g. coordinates) will be based on the resized and enhanced image instead of the original image. In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). @@ -99,3 +103,4 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models ``` still uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps + diff --git a/qurator/__init__.py b/qurator/__init__.py index 5284146..e69de29 100644 --- a/qurator/__init__.py +++ b/qurator/__init__.py @@ -1 +0,0 @@ -__import__("pkg_resources").declare_namespace(__name__) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 8c42f64..4bbd3f2 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -29,6 +29,7 @@ from qurator.eynollah.eynollah import Eynollah "-m", help="directory of models", type=click.Path(exists=True, file_okay=False), + required=True, ) @click.option( "--save_images", diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index ec65361..fa55055 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -393,7 +393,6 @@ class Eynollah: prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg prediction_true = prediction_true.astype(int) - return prediction_true def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred): @@ -495,12 +494,11 @@ class Eynollah: label_p_pred = model_num_classifier.predict(img_in, verbose=0) else: label_p_pred = self.model_classifier.predict(img_in, verbose=0) + num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) - - img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if img_new.shape[1] > img.shape[1]: @@ -527,7 +525,7 @@ class Eynollah: prediction_bin = prediction_bin*255 prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + prediction_bin = prediction_bin.astype(np.uint8) img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) @@ -535,6 +533,7 @@ class Eynollah: img = self.imread() img_bin = None + t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) @@ -578,7 +577,6 @@ class Eynollah: image_res = np.copy(img) is_image_enhanced = False - self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin @@ -655,10 +653,6 @@ class Eynollah: except: self.logger.warning("no GPU device available") - # try: - # model = load_model(model_dir, compile=False) - # except: - # model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) if model_dir.endswith('.h5') and Path(model_dir[:-3]).exists(): # prefer SavedModel over HDF5 format if it exists model_dir = model_dir[:-3] @@ -672,6 +666,7 @@ class Eynollah: model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) self.models[model_dir] = model + return model, None def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): @@ -808,6 +803,7 @@ class Eynollah: label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) @@ -858,7 +854,8 @@ class Eynollah: index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] @@ -957,8 +954,6 @@ class Eynollah: prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) - ##del model - ##gc.collect() return prediction_true def extract_page(self): @@ -1044,7 +1039,6 @@ class Eynollah: croped_page, page_coord = crop_image_inside_box(box, img) return croped_page, page_coord - def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] @@ -1138,7 +1132,6 @@ class Eynollah: marginal_of_patch_percent = 0.1 prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) - self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 @@ -1541,6 +1534,7 @@ class Eynollah: else: return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] + def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): self.logger.debug('enter do_work_of_slopes') slope_biggest = 0 @@ -1713,6 +1707,7 @@ class Eynollah: if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) + img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) if self.dir_in: @@ -1761,8 +1756,6 @@ class Eynollah: prediction_bin = prediction_bin*255 prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) @@ -1781,7 +1774,6 @@ class Eynollah: mask_lines_only=(prediction_regions_org[:,:]==3)*1 - mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 @@ -1799,7 +1791,6 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) - return text_regions_p_true, erosion_hurts, polygons_lines_xml except: @@ -1819,7 +1810,6 @@ class Eynollah: prediction_bin = prediction_bin*255 prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - if not self.dir_in: @@ -1840,7 +1830,6 @@ class Eynollah: prediction_regions_org=prediction_regions_org[:,:,0] #mask_lines_only=(prediction_regions_org[:,:]==3)*1 - #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) #prediction_regions_org = self.do_prediction(True, img, model_region) @@ -2367,9 +2356,8 @@ class Eynollah: img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] - + prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) pre_updown = cv2.flip(pre_updown, -1) @@ -2390,9 +2378,8 @@ class Eynollah: img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] - + prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) pre_updown = cv2.flip(pre_updown, -1) @@ -2405,12 +2392,10 @@ class Eynollah: else: prediction_table = np.zeros(img.shape) img_w_half = int(img.shape[1]/2.) - + pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], model_region) pre2 = self.do_prediction(patches, img[:,img_w_half:,:], model_region) - pre_full = self.do_prediction(patches, img[:,:,:], model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region) pre_updown = cv2.flip(pre_updown, -1) @@ -2432,8 +2417,6 @@ class Eynollah: prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) - - return prediction_table_erode.astype(np.int16) def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts): img_g = self.imread(grayscale=True, uint8=True) @@ -2558,6 +2541,7 @@ class Eynollah: textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) + if self.plotter: self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page) return textline_mask_tot_ea @@ -2787,7 +2771,6 @@ class Eynollah: regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) - regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) if num_col_classifier > 2: regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -2836,6 +2819,7 @@ class Eynollah: """ self.logger.debug("enter run") + t0_tot = time.time() if not self.dir_in: @@ -3023,6 +3007,7 @@ class Eynollah: else: slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + else: scale_param = 1 diff --git a/qurator/eynollah/ocrd-tool.json b/qurator/eynollah/ocrd-tool.json index 1291979..fc9ee72 100644 --- a/qurator/eynollah/ocrd-tool.json +++ b/qurator/eynollah/ocrd-tool.json @@ -1,5 +1,5 @@ { - "version": "0.0.11", + "version": "0.2.0", "git_url": "https://github.com/qurator-spk/eynollah", "tools": { "ocrd-eynollah-segment": { @@ -49,7 +49,17 @@ "default": false, "description": "ignore the special role of headings during reading order detection" } - } + }, + "resources": [ + { + "description": "models for eynollah (TensorFlow format)", + "url": "https://qurator-data.de/eynollah/2021-04-25/SavedModel.tar.gz", + "name": "default", + "size": 1483106598, + "type": "archive", + "path_in_archive": "default" + } + ] } } } diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index b29b5b6..bac8235 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -20,7 +20,7 @@ def contours_in_same_horizon(cy_main_hor): list_h.append(i) if len(list_h) > 1: all_args.append(list(set(list_h))) - return np.unique(all_args) + return np.unique(np.array(all_args, dtype=object)) def find_contours_mean_y_diff(contours_main): M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] diff --git a/setup.py b/setup.py index f4dc6b1..807eae7 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,6 @@ setup( author='Vahid Rezanezhad', url='https://github.com/qurator-spk/eynollah', license='Apache License 2.0', - namespace_packages=['qurator'], packages=find_packages(exclude=['tests']), install_requires=install_requires, package_data={