Merge branch 'main' into eynollah_light

2025-12-12 06:04:18 +01:00 · 2023-05-09 00:46:58 +02:00 · 2023-05-09 00:46:58 +02:00 · 1621532092
commit 1621532092
parent 4c217018cc 52d2e0b098
10 changed files with 64 additions and 38 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -5,6 +5,7 @@ jobs:
  build-python37:
    machine:
      - image: ubuntu-2004:2023.02.1
+
    steps:
      - checkout
      - restore_cache:
--- a/.github/workflows/test-eynollah.yml
+++ b/.github/workflows/test-eynollah.yml
@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        python-version: ['3.7', '3.8']
+        python-version: ['3.7'] # '3.8'

    steps:
    - uses: actions/checkout@v2
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,30 @@ Versioned according to [Semantic Versioning](http://semver.org/).

 ## Unreleased

+## [0.2.0] - 2023-03-24
+
+Changed:
+
+  * Convert default model from HDFS to TF SavedModel, #91
+
+Added:
+
+  * parmeter `tables` to toggle table detectino, #91
+  * default model described in ocrd-tool.json, #91
+
+## [0.1.0] - 2023-03-22
+
+Fixed:
+
+  * Do not produce spurious `TextEquiv`, #68
+  * Less spammy logging, #64, #65, #71
+
+Changed:
+
+  * Upgrade to tensorflow 2.4.0, #74
+  * Improved README
+  * CI: test for python 3.7+, #90
+
 ## [0.0.11] - 2022-02-02

 Fixed:
@ -72,6 +96,8 @@ Fixed:
 Initial release

 <!-- link-labels -->
+[0.2.0]: ../../compare/v0.2.0...v0.1.0
+[0.1.0]: ../../compare/v0.1.0...v0.0.11
 [0.0.11]: ../../compare/v0.0.11...v0.0.10
 [0.0.10]: ../../compare/v0.0.10...v0.0.9
 [0.0.9]: ../../compare/v0.0.9...v0.0.8
--- a/README.md
+++ b/README.md
@ -38,11 +38,15 @@ cd eynollah; pip install -e .

 Alternatively, you can run `make install` or `make install-dev` for editable installation.

-### Models
+  <details>
+  <summary>click to expand/collapse</summary><br/>
+  
+First, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied.

 Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/).

-Alternatively, running `make models` will download and extract models to `$(PWD)/models_eynollah`.
+
+* If you set `-ae` (**a**llow image **e**nhancement) parameter to `true`, the tool will first check the ppi (pixel-per-inch) of the image and when it is less than 300, the tool will resize it and only then image enhancement will occur. Image enhancement can also take place without this option, but by setting this option to `true`, the layout xml data (e.g. coordinates) will be based on the resized and enhanced image instead of the original image.

 In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). 

@ -99,3 +103,4 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models
 ```
    
 still uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps
+
--- a/qurator/init.py
+++ b/qurator/init.py
@ -1 +0,0 @@
-__import__("pkg_resources").declare_namespace(__name__)
--- a/qurator/eynollah/cli.py
+++ b/qurator/eynollah/cli.py
@ -29,6 +29,7 @@ from qurator.eynollah.eynollah import Eynollah
    "-m",
    help="directory of models",
    type=click.Path(exists=True, file_okay=False),
+    required=True,
 )
@click.option(
    "--save_images",
--- a/qurator/eynollah/eynollah.py
+++ b/qurator/eynollah/eynollah.py
@ -393,7 +393,6 @@ class Eynollah:
                    prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg

        prediction_true = prediction_true.astype(int)
-
        return prediction_true

    def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
@ -495,12 +494,11 @@ class Eynollah:
            label_p_pred = model_num_classifier.predict(img_in, verbose=0)
        else:
            label_p_pred = self.model_classifier.predict(img_in, verbose=0)
+
        num_col = np.argmax(label_p_pred[0]) + 1

        self.logger.info("Found %s columns (%s)", num_col, label_p_pred)

-
-
        img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred)

        if img_new.shape[1] > img.shape[1]:
@ -535,6 +533,7 @@ class Eynollah:
            img = self.imread()
            img_bin = None

+        t1 = time.time()
        _, page_coord = self.early_page_for_num_of_column_classification(img_bin)
        if not self.dir_in:
            model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
@ -578,7 +577,6 @@ class Eynollah:
            image_res = np.copy(img)
            is_image_enhanced = False

-        
        self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
        return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin

@ -655,10 +653,6 @@ class Eynollah:
        except:
            self.logger.warning("no GPU device available")

-        # try:
-        #     model = load_model(model_dir, compile=False)
-        # except:
-        #     model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
        if model_dir.endswith('.h5') and Path(model_dir[:-3]).exists():
            # prefer SavedModel over HDF5 format if it exists
            model_dir = model_dir[:-3]
@ -672,6 +666,7 @@ class Eynollah:
                model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
                self.models[model_dir] = model

+
        return model, None

    def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1):
@ -808,6 +803,7 @@ class Eynollah:

            label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]))

+
            seg = np.argmax(label_p_pred, axis=3)[0]
            seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
            prediction_true = resize_image(seg_color, img_h_page, img_w_page)
@ -858,7 +854,8 @@ class Eynollah:
                        index_y_d = img_h - img_height_model

                    img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-                    label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
+                    label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]),
+                                                 verbose=0)
                    seg = np.argmax(label_p_pred, axis=3)[0]
                    
                    
@ -957,8 +954,6 @@ class Eynollah:
                        prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color

            prediction_true = prediction_true.astype(np.uint8)
-        ##del model
-        ##gc.collect()
        return prediction_true

    def extract_page(self):
@ -1044,7 +1039,6 @@ class Eynollah:
            croped_page, page_coord = crop_image_inside_box(box, img)
        return croped_page, page_coord

-
    def extract_text_regions(self, img, patches, cols):
        self.logger.debug("enter extract_text_regions")
        img_height_h = img.shape[0]
@ -1138,7 +1132,6 @@ class Eynollah:
        marginal_of_patch_percent = 0.1
        prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent)
        prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
-        
        self.logger.debug("exit extract_text_regions")
        return prediction_regions, prediction_regions2
    
@ -1541,6 +1534,7 @@ class Eynollah:
        else:
            return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0]

+
    def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process):
        self.logger.debug('enter do_work_of_slopes')
        slope_biggest = 0
@ -1713,6 +1707,7 @@ class Eynollah:
            
            if not self.dir_in:
                model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2)
+
            img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))
            
            if self.dir_in:
@ -1762,8 +1757,6 @@ class Eynollah:
                    
                    prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
                
-                
-                
                if not self.dir_in:
                    model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
                ratio_y=1
@ -1781,7 +1774,6 @@ class Eynollah:
                
                mask_lines_only=(prediction_regions_org[:,:]==3)*1
                
-                
            mask_texts_only=(prediction_regions_org[:,:]==1)*1
            mask_images_only=(prediction_regions_org[:,:]==2)*1
            
@ -1799,7 +1791,6 @@ class Eynollah:

            text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))

-            
            return text_regions_p_true, erosion_hurts, polygons_lines_xml
        except:
            
@ -1821,7 +1812,6 @@ class Eynollah:
                prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
            
            
-            
                if not self.dir_in:
                    model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
                    
@ -1840,7 +1830,6 @@ class Eynollah:
            prediction_regions_org=prediction_regions_org[:,:,0]
            
            #mask_lines_only=(prediction_regions_org[:,:]==3)*1
-            
            #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
            
            #prediction_regions_org = self.do_prediction(True, img, model_region)
@ -2369,7 +2358,6 @@ class Eynollah:
            img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:]

            prediction_ext = self.do_prediction(patches, img_new, model_region)
-            
            pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region)
            pre_updown = cv2.flip(pre_updown, -1)
            
@ -2392,7 +2380,6 @@ class Eynollah:
            img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:]

            prediction_ext = self.do_prediction(patches, img_new, model_region)
-            
            pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region)
            pre_updown = cv2.flip(pre_updown, -1)
            
@ -2408,9 +2395,7 @@ class Eynollah:

            pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], model_region)
            pre2 = self.do_prediction(patches, img[:,img_w_half:,:], model_region)
-            
            pre_full = self.do_prediction(patches, img[:,:,:], model_region)
-            
            pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region)
            pre_updown = cv2.flip(pre_updown, -1)
            
@ -2432,8 +2417,6 @@ class Eynollah:
        
        prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20)
        prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20)
-        
-        
        return prediction_table_erode.astype(np.int16)
    def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts):
        img_g = self.imread(grayscale=True, uint8=True)
@ -2558,6 +2541,7 @@ class Eynollah:
        textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline)
        if self.textline_light:
            textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16)
+
        if self.plotter:
            self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page)
        return textline_mask_tot_ea
@ -2787,7 +2771,6 @@ class Eynollah:
        regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4

        regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully)
-
        regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier)
        if num_col_classifier > 2:
            regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0
@ -2836,6 +2819,7 @@ class Eynollah:
        """
        self.logger.debug("enter run")

+
        t0_tot = time.time()

        if not self.dir_in:
@ -3023,6 +3007,7 @@ class Eynollah:
                else:
                    slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
                    slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
+
            else:
                
                scale_param = 1
--- a/qurator/eynollah/ocrd-tool.json
+++ b/qurator/eynollah/ocrd-tool.json
@ -1,5 +1,5 @@
 {
-  "version": "0.0.11",
+  "version": "0.2.0",
  "git_url": "https://github.com/qurator-spk/eynollah",
  "tools": {
    "ocrd-eynollah-segment": {
@ -49,7 +49,17 @@
          "default": false,
          "description": "ignore the special role of headings during reading order detection"
        }
-      }
+      },
+      "resources": [
+	{
+	  "description": "models for eynollah (TensorFlow format)",
+	  "url": "https://qurator-data.de/eynollah/2021-04-25/SavedModel.tar.gz",
+	  "name": "default",
+	  "size": 1483106598,
+	  "type": "archive",
+	  "path_in_archive": "default"
+	}
+      ]
    }
  }
 }
--- a/qurator/eynollah/utils/contour.py
+++ b/qurator/eynollah/utils/contour.py
@ -20,7 +20,7 @@ def contours_in_same_horizon(cy_main_hor):
        list_h.append(i)
        if len(list_h) > 1:
            all_args.append(list(set(list_h)))
-    return np.unique(all_args)
+    return np.unique(np.array(all_args, dtype=object))

 def find_contours_mean_y_diff(contours_main):
    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
--- a/setup.py
+++ b/setup.py
@ -13,7 +13,6 @@ setup(
    author='Vahid Rezanezhad',
    url='https://github.com/qurator-spk/eynollah',
    license='Apache License 2.0',
-    namespace_packages=['qurator'],
    packages=find_packages(exclude=['tests']),
    install_requires=install_requires,
    package_data={
				`@ -1 +0,0 @@`
				`__import__("pkg_resources").declare_namespace(__name__)`