💀 remove dead code from eynollah.py

2026-03-13 02:31:56 +01:00 · 2025-12-10 13:14:32 +01:00 · 2025-12-10 13:14:32 +01:00 · fcd87fc3cf
commit fcd87fc3cf
parent 1eef5514d7
4 changed files with 5 additions and 339 deletions
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -83,9 +83,7 @@ from .utils.rotate import (
 from .utils.separate_lines import (
    separate_lines_new2,
    return_deskew_slop,
-    do_work_of_slopes_new,
    do_work_of_slopes_new_curved,
-    do_work_of_slopes_new_light,
 )
 from .utils.drop_capitals import (
    adhere_drop_capital_region_into_corresponding_textline,
@ -96,7 +94,7 @@ from .utils.resize import resize_image
 from .utils.shm import share_ndarray
 from .utils import (
    is_image_filename,
-    boosting_headers_by_longshot_region_segmentation,
+    isNaN,
    crop_image_inside_box,
    box2rect,
    box2slice,
@ -290,9 +288,6 @@ class Eynollah:
            key += '_uint8'
        return self._imgs[key].copy()

-    def isNaN(self, num):
-        return num != num
-
    def predict_enhancement(self, img):
        self.logger.debug("enter predict_enhancement")

@ -1637,42 +1632,6 @@ class Eynollah:
                all_box_coord,
                slopes)

-    def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
-        if not len(contours):
-            return [], [], []
-        self.logger.debug("enter get_slopes_and_deskew_new_light")
-        with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
-            assert self.executor
-            results = self.executor.map(partial(do_work_of_slopes_new_light,
-                                                textline_mask_tot_ea=textline_mask_tot_shared,
-                                                slope_deskew=slope_deskew,
-                                                textline_light=True,
-                                                logger=self.logger,),
-                                        boxes, contours, contours_par)
-            results = list(results) # exhaust prior to release
-        #textline_polygons, box_coord, slopes = zip(*results)
-        self.logger.debug("exit get_slopes_and_deskew_new_light")
-        return tuple(zip(*results))
-
-    def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
-        if not len(contours):
-            return [], [], []
-        self.logger.debug("enter get_slopes_and_deskew_new")
-        with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
-            assert self.executor
-            results = self.executor.map(partial(do_work_of_slopes_new,
-                                                textline_mask_tot_ea=textline_mask_tot_shared,
-                                                slope_deskew=slope_deskew,
-                                                MAX_SLOPE=MAX_SLOPE,
-                                                KERNEL=KERNEL,
-                                                logger=self.logger,
-                                                plotter=self.plotter,),
-                                        boxes, contours, contours_par)
-            results = list(results) # exhaust prior to release
-        #textline_polygons, box_coord, slopes = zip(*results)
-        self.logger.debug("exit get_slopes_and_deskew_new")
-        return tuple(zip(*results))
-
    def get_slopes_and_deskew_new_curved(self, contours_par, textline_mask_tot, boxes,
                                         mask_texts_only, num_col, scale_par, slope_deskew):
        if not len(contours_par):
@ -1959,145 +1918,6 @@ class Eynollah:
                img_bin,
                confidence_matrix)

-    def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier):
-        self.logger.debug("enter get_regions_from_xy_2models")
-        erosion_hurts = False
-        img_org = np.copy(img)
-        img_height_h = img_org.shape[0]
-        img_width_h = img_org.shape[1]
-
-        ratio_y=1.3
-        ratio_x=1
-
-        img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
-        prediction_regions_org_y = self.do_prediction(True, img, self.model_zoo.get("region"))
-        prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h )
-
-        #plt.imshow(prediction_regions_org_y[:,:,0])
-        #plt.show()
-        prediction_regions_org_y = prediction_regions_org_y[:,:,0]
-        mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1
-
-        ##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1
-        img_only_regions_with_sep = (prediction_regions_org_y == 1).astype(np.uint8)
-        try:
-            img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20)
-            _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
-            img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
-
-            prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
-            prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
-
-            prediction_regions_org=prediction_regions_org[:,:,0]
-            prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0
-
-            img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))
-
-            prediction_regions_org2 = self.do_prediction(True, img, self.model_zoo.get("region_p2"), marginal_of_patch_percent=0.2)
-            prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )
-
-            mask_zeros2 = (prediction_regions_org2[:,:,0] == 0)
-            mask_lines2 = (prediction_regions_org2[:,:,0] == 3)
-            text_sume_early = (prediction_regions_org[:,:] == 1).sum()
-            prediction_regions_org_copy = np.copy(prediction_regions_org)
-            prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0
-            text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum()
-            rate_two_models = 100. * text_sume_second / text_sume_early
-
-            self.logger.info("ratio_of_two_models: %s", rate_two_models)
-            if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD):
-                prediction_regions_org = np.copy(prediction_regions_org_copy)
-
-            prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
-            mask_lines_only=(prediction_regions_org[:,:]==3)*1
-            prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2)
-            prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)
-
-            if rate_two_models<=40:
-                if self.input_binary:
-                    prediction_bin = np.copy(img_org)
-                else:
-                    prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
-                    prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
-                    prediction_bin = 255 * (prediction_bin[:,:,0]==0)
-                    prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
-
-                ratio_y=1
-                ratio_x=1
-
-                img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
-
-                prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
-                prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
-                prediction_regions_org=prediction_regions_org[:,:,0]
-
-                mask_lines_only=(prediction_regions_org[:,:]==3)*1
-
-            mask_texts_only=(prediction_regions_org[:,:]==1)*1
-            mask_images_only=(prediction_regions_org[:,:]==2)*1
-
-            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
-            polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
-
-            polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
-            polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
-
-            text_regions_p_true = np.zeros(prediction_regions_org.shape)
-            text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3))
-            text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
-
-            text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
-
-            self.logger.debug("exit get_regions_from_xy_2models")
-            return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_only_texts
-        except:
-            if self.input_binary:
-                prediction_bin = np.copy(img_org)
-                prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
-                prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
-                prediction_bin = 255 * (prediction_bin[:,:,0]==0)
-                prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
-            else:
-                prediction_bin = np.copy(img_org)
-            ratio_y=1
-            ratio_x=1
-
-
-            img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
-            prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
-            prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
-            prediction_regions_org=prediction_regions_org[:,:,0]
-
-            #mask_lines_only=(prediction_regions_org[:,:]==3)*1
-            #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
-
-            #prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get_model("region"))
-            #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
-            #prediction_regions_org = prediction_regions_org[:,:,0]
-            #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
-
-            mask_lines_only = (prediction_regions_org == 3)*1
-            mask_texts_only = (prediction_regions_org == 1)*1
-            mask_images_only= (prediction_regions_org == 2)*1
-
-            polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only)
-            polygons_seplines = filter_contours_area_of_image(
-                mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
-
-            polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
-            polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
-
-            text_regions_p_true = np.zeros(prediction_regions_org.shape)
-            text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
-
-            text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
-            text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
-
-            erosion_hurts = True
-            self.logger.debug("exit get_regions_from_xy_2models")
-            return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_only_texts
-
    def do_order_of_regions(
            self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):

@ -2343,7 +2163,7 @@ class Eynollah:

            img_comm = cv2.fillPoly(img_comm, pts=main_contours, color=indiv)

-        if not self.isNaN(slope_mean_hor):
+        if not isNaN(slope_mean_hor):
            image_revised_last = np.zeros(image_regions_eraly_p.shape[:2])
            for i in range(len(boxes)):
                box_ys = slice(*boxes[i][2:4])
@ -2622,52 +2442,6 @@ class Eynollah:

        return  page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page

-    def run_graphics_and_columns(
-            self, text_regions_p_1,
-            num_col_classifier, num_column_is_classified, erosion_hurts):
-
-        t_in_gr = time.time()
-        img_g = self.imread(grayscale=True, uint8=True)
-
-        img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
-        img_g3 = img_g3.astype(np.uint8)
-        img_g3[:, :, 0] = img_g[:, :]
-        img_g3[:, :, 1] = img_g[:, :]
-        img_g3[:, :, 2] = img_g[:, :]
-
-        image_page, page_coord, cont_page = self.extract_page()
-
-        if self.tables:
-            table_prediction = self.get_tables_from_model(image_page, num_col_classifier)
-        else:
-            table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16)
-
-        if self.plotter:
-            self.plotter.save_page_image(image_page)
-
-        text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
-        mask_images = (text_regions_p_1[:, :] == 2) * 1
-        mask_images = mask_images.astype(np.uint8)
-        mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
-        mask_lines = (text_regions_p_1[:, :] == 3) * 1
-        mask_lines = mask_lines.astype(np.uint8)
-        img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
-        img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
-
-        if erosion_hurts:
-            img_only_regions = np.copy(img_only_regions_with_sep[:,:])
-        else:
-            img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6)
-        try:
-            num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
-            num_col = num_col + 1
-            if not num_column_is_classified:
-                num_col_classifier = num_col + 1
-        except Exception as why:
-            self.logger.error(why)
-            num_col = None
-        return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines,
-                text_regions_p_1, cont_page, table_prediction)

    def run_enhancement(self, light_version):
        t_in = time.time()
@ -2685,10 +2459,7 @@ class Eynollah:
            else:
                self.get_image_and_scales_after_enhancing(img_org, img_res)
        else:
-            if self.allow_enhancement:
-                self.get_image_and_scales(img_org, img_res, scale)
-            else:
-                self.get_image_and_scales(img_org, img_res, scale)
+            self.get_image_and_scales(img_org, img_res, scale)
            if self.allow_scaling:
                img_org, img_res, is_image_enhanced = \
                    self.resize_image_with_column_classifier(is_image_enhanced, img_bin)
@ -3341,42 +3112,6 @@ class Eynollah:
            region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
            return ordered, region_ids

-
-    
-
-
-    
-
-    
-
-    def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes):
-        return list(np.array(ls_cons)[np.array(sorted_indexes)])
-
-    def return_it_in_two_groups(self, x_differential):
-        split = [ind if x_differential[ind]!=x_differential[ind+1] else -1
-                 for ind in range(len(x_differential)-1)]
-        split_masked = list( np.array(split[:])[np.array(split[:])!=-1] )
-        if 0 not in split_masked:
-            split_masked.insert(0, -1)
-        split_masked.append(len(x_differential)-1)
-
-        split_masked = np.array(split_masked) +1
-
-        sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]])
-                for ind in range(len(split_masked)-1)]
-
-        indexes_to_bec_changed = [ind if (np.abs(sums[ind-1]) > np.abs(sums[ind]) and
-                                          np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1
-                                  for ind in range(1,len(sums)-1)]
-        indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1]
-
-        x_differential_new = np.copy(x_differential)
-        for i in indexes_to_bec_changed_filtered:
-            i_slice = slice(split_masked[i], split_masked[i+1])
-            x_differential_new[i_slice] = -1 * np.array(x_differential)[i_slice]
-
-        return x_differential_new
-
    def filter_contours_inside_a_bigger_one(self, contours, contours_d_ordered, image,
                                            marginal_cnts=None, type_contour="textregion"):
        if type_contour == "textregion":
--- a/src/eynollah/image_enhancer.py
+++ b/src/eynollah/image_enhancer.py
@ -10,14 +10,14 @@ Image enhancer. The output can be written as same scale of input or in new predi
 import logging
 import os
 import time
-from typing import Dict, Optional
+from typing import Optional
 from pathlib import Path
 import gc

 import cv2
 from keras.models import Model
 import numpy as np
-import tensorflow as tf
+import tensorflow as tf # type: ignore
 from skimage.morphology import skeletonize

 from .model_zoo import EynollahModelZoo
@ -27,7 +27,6 @@ from .utils import (
    is_image_filename,
    crop_image_inside_box
 )
-from .patch_encoder import PatchEncoder, Patches

 DPI_THRESHOLD = 298
 KERNEL = np.ones((5, 5), np.uint8)
@ -91,9 +90,6 @@ class Enhancer:
            key += '_uint8'
        return self._imgs[key].copy()

-    def isNaN(self, num):
-        return num != num
-    
    def predict_enhancement(self, img):
        self.logger.debug("enter predict_enhancement")

--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@ -5,8 +5,6 @@ import numpy as np
 import cv2
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
-from multiprocessing import Process, Queue, cpu_count
-from multiprocessing import Pool
 from .rotate import rotate_image
 from .resize import resize_image
 from .contour import (
@ -20,9 +18,7 @@ from .contour import (
 from .shm import share_ndarray, wrap_ndarray_shared
 from . import (
    find_num_col_deskew,
-    crop_image_inside_box,
    box2rect,
-    box2slice,
 )

 def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -1590,65 +1586,6 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
        var = 0
    return angle, var

-@wrap_ndarray_shared(kw='textline_mask_tot_ea')
-def do_work_of_slopes_new(
-        box_text, contour, contour_par,
-        textline_mask_tot_ea=None, slope_deskew=0.0,
-        logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
-):
-    if KERNEL is None:
-        KERNEL = np.ones((5, 5), np.uint8)
-    if logger is None:
-        logger = getLogger(__package__)
-    logger.debug('enter do_work_of_slopes_new')
-
-    x, y, w, h = box_text
-    crop_coor = box2rect(box_text)
-    mask_textline = np.zeros(textline_mask_tot_ea.shape)
-    mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
-    all_text_region_raw = textline_mask_tot_ea * mask_textline
-    all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8)
-    img_int_p = all_text_region_raw[:,:]
-    img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
-
-    if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
-        slope = 0
-        slope_for_all = slope_deskew
-        all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
-        cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, 0)
-    else:
-        try:
-            textline_con, hierarchy = return_contours_of_image(img_int_p)
-            textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
-                                                             hierarchy,
-                                                             max_area=1, min_area=0.00008)
-            y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
-            if np.isnan(y_diff_mean):
-                slope_for_all = MAX_SLOPE
-            else:
-                sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
-                img_int_p[img_int_p > 0] = 1
-                slope_for_all = return_deskew_slop(img_int_p, sigma_des, logger=logger, plotter=plotter)
-                if abs(slope_for_all) <= 0.5:
-                    slope_for_all = slope_deskew
-        except:
-            logger.exception("cannot determine angle of contours")
-            slope_for_all = MAX_SLOPE
-
-        if slope_for_all == MAX_SLOPE:
-            slope_for_all = slope_deskew
-        slope = slope_for_all
-        mask_only_con_region = np.zeros(textline_mask_tot_ea.shape)
-        mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1))
-
-        all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy()
-        mask_only_con_region = mask_only_con_region[y: y + h, x: x + w]
-
-        all_text_region_raw[mask_only_con_region == 0] = 0
-        cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text)
-
-    return cnt_clean_rot, crop_coor, slope
-
@wrap_ndarray_shared(kw='textline_mask_tot_ea')
@wrap_ndarray_shared(kw='mask_texts_only')
 def do_work_of_slopes_new_curved(
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@ -4,7 +4,6 @@ from pathlib import Path
 import os.path
 from typing import Optional
 import logging
-import xml.etree.ElementTree as ET
 from .utils.xml import create_page_xml, xml_reading_order
 from .utils.counter import EynollahIdCounter

@ -19,7 +18,6 @@ from ocrd_models.ocrd_page import (
        SeparatorRegionType,
        to_xml
        )
-import numpy as np

 class EynollahXmlWriter: