From b75ca0d31fc4b8b2806569aebfa38e3203a0e7a0 Mon Sep 17 00:00:00 2001 From: kba Date: Wed, 24 Sep 2025 16:29:05 +0200 Subject: [PATCH] mb_ro_on_layout: remove copy-pasta code not actually used --- src/eynollah/mb_ro_on_layout.py | 333 +------------------------------- 1 file changed, 3 insertions(+), 330 deletions(-) diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index c03d831..c6c02cf 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -3,46 +3,26 @@ Image enhancer. The output can be written as same scale of input or in new predi """ from logging import Logger -from difflib import SequenceMatcher as sq -from PIL import Image, ImageDraw, ImageFont -import math import os -import sys import time from typing import Optional import atexit -import warnings from functools import partial from pathlib import Path from multiprocessing import cpu_count -import gc -import copy from loky import ProcessPoolExecutor import xml.etree.ElementTree as ET import cv2 import numpy as np -from ocrd import OcrdPage -from ocrd_utils import getLogger, tf_disable_interactive_logs +from ocrd_utils import getLogger import statistics +import tensorflow as tf from tensorflow.keras.models import load_model from .utils.resize import resize_image -from .utils import ( - crop_image_inside_box -) from .utils.contour import ( - filter_contours_area_of_image, - filter_contours_area_of_image_tables, - find_contours_mean_y_diff, find_new_features_of_contours, - find_features_of_contours, - get_text_region_boxes_by_given_contours, - get_textregion_contours_in_org_image, - get_textregion_contours_in_org_image_light, return_contours_of_image, - return_contours_of_interested_region, - return_contours_of_interested_region_by_min_size, - return_contours_of_interested_textline, return_parent_contours, ) @@ -64,7 +44,7 @@ class machine_based_reading_order_on_layout: self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) atexit.register(self.executor.shutdown) self.dir_models = dir_models - self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_ens_reading_order_machine_based" + self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824" try: for device in tf.config.list_physical_devices('GPU'): @@ -76,43 +56,7 @@ class machine_based_reading_order_on_layout: self.light_version = True - def cache_images(self, image_filename=None, image_pil=None, dpi=None): - ret = {} - t_c0 = time.time() - if image_filename: - ret['img'] = cv2.imread(image_filename) - if self.light_version: - self.dpi = 100 - else: - self.dpi = 0#check_dpi(image_filename) - else: - ret['img'] = pil2cv(image_pil) - if self.light_version: - self.dpi = 100 - else: - self.dpi = 0#check_dpi(image_pil) - ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) - for prefix in ('', '_grayscale'): - ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) - self._imgs = ret - if dpi is not None: - self.dpi = dpi - def reset_file_name_dir(self, image_filename): - t_c = time.time() - self.cache_images(image_filename=image_filename) - self.output_filename = os.path.join(self.dir_out, Path(image_filename).stem +'.png') - - def imread(self, grayscale=False, uint8=True): - key = 'img' - if grayscale: - key += '_grayscale' - if uint8: - key += '_uint8' - return self._imgs[key].copy() - - def isNaN(self, num): - return num != num @staticmethod def our_load_model(model_file): @@ -126,278 +70,7 @@ class machine_based_reading_order_on_layout: "PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def predict_enhancement(self, img): - self.logger.debug("enter predict_enhancement") - - img_height_model = self.model_enhancement.layers[-1].output_shape[1] - img_width_model = self.model_enhancement.layers[-1].output_shape[2] - if img.shape[0] < img_height_model: - img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) - if img.shape[1] < img_width_model: - img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST) - margin = int(0.1 * img_width_model) - width_mid = img_width_model - 2 * margin - height_mid = img_height_model - 2 * margin - img = img / 255. - img_h = img.shape[0] - img_w = img.shape[1] - - prediction_true = np.zeros((img_h, img_w, 3)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) - nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - - for i in range(nxf): - for j in range(nyf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - else: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - else: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - img_width_model - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - img_height_model - - img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) - seg = label_p_pred[0, :, :, :] * 255 - - if i == 0 and j == 0: - prediction_true[index_y_d + 0:index_y_u - margin, - index_x_d + 0:index_x_u - margin] = \ - seg[0:-margin or None, - 0:-margin or None] - elif i == nxf - 1 and j == nyf - 1: - prediction_true[index_y_d + margin:index_y_u - 0, - index_x_d + margin:index_x_u - 0] = \ - seg[margin:, - margin:] - elif i == 0 and j == nyf - 1: - prediction_true[index_y_d + margin:index_y_u - 0, - index_x_d + 0:index_x_u - margin] = \ - seg[margin:, - 0:-margin or None] - elif i == nxf - 1 and j == 0: - prediction_true[index_y_d + 0:index_y_u - margin, - index_x_d + margin:index_x_u - 0] = \ - seg[0:-margin or None, - margin:] - elif i == 0 and j != 0 and j != nyf - 1: - prediction_true[index_y_d + margin:index_y_u - margin, - index_x_d + 0:index_x_u - margin] = \ - seg[margin:-margin or None, - 0:-margin or None] - elif i == nxf - 1 and j != 0 and j != nyf - 1: - prediction_true[index_y_d + margin:index_y_u - margin, - index_x_d + margin:index_x_u - 0] = \ - seg[margin:-margin or None, - margin:] - elif i != 0 and i != nxf - 1 and j == 0: - prediction_true[index_y_d + 0:index_y_u - margin, - index_x_d + margin:index_x_u - margin] = \ - seg[0:-margin or None, - margin:-margin or None] - elif i != 0 and i != nxf - 1 and j == nyf - 1: - prediction_true[index_y_d + margin:index_y_u - 0, - index_x_d + margin:index_x_u - margin] = \ - seg[margin:, - margin:-margin or None] - else: - prediction_true[index_y_d + margin:index_y_u - margin, - index_x_d + margin:index_x_u - margin] = \ - seg[margin:-margin or None, - margin:-margin or None] - - prediction_true = prediction_true.astype(int) - return prediction_true - def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred): - self.logger.debug("enter calculate_width_height_by_columns") - if num_col == 1: - img_w_new = 2000 - elif num_col == 2: - img_w_new = 2400 - elif num_col == 3: - img_w_new = 3000 - elif num_col == 4: - img_w_new = 4000 - elif num_col == 5: - img_w_new = 5000 - elif num_col == 6: - img_w_new = 6500 - else: - img_w_new = width_early - img_h_new = img_w_new * img.shape[0] // img.shape[1] - - if img_h_new >= 8000: - img_new = np.copy(img) - num_column_is_classified = False - else: - img_new = resize_image(img, img_h_new, img_w_new) - num_column_is_classified = True - - return img_new, num_column_is_classified - - def early_page_for_num_of_column_classification(self,img_bin): - self.logger.debug("enter early_page_for_num_of_column_classification") - if self.input_binary: - img = np.copy(img_bin).astype(np.uint8) - else: - img = self.imread() - img = cv2.GaussianBlur(img, (5, 5), 0) - img_page_prediction = self.do_prediction(False, img, self.model_page) - - imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, KERNEL, iterations=3) - contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) - for j in range(len(contours))]) - cnt = contours[np.argmax(cnt_size)] - box = cv2.boundingRect(cnt) - else: - box = [0, 0, img.shape[1], img.shape[0]] - cropped_page, page_coord = crop_image_inside_box(box, img) - - self.logger.debug("exit early_page_for_num_of_column_classification") - return cropped_page, page_coord - - def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): - self.logger.debug("enter calculate_width_height_by_columns") - if num_col == 1: - img_w_new = 1000 - else: - img_w_new = 1300 - img_h_new = img_w_new * img.shape[0] // img.shape[1] - - if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: - img_new = np.copy(img) - num_column_is_classified = False - #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: - elif img_h_new >= 8000: - img_new = np.copy(img) - num_column_is_classified = False - else: - img_new = resize_image(img, img_h_new, img_w_new) - num_column_is_classified = True - - return img_new, num_column_is_classified - - def resize_and_enhance_image_with_column_classifier(self, light_version): - self.logger.debug("enter resize_and_enhance_image_with_column_classifier") - dpi = 0#self.dpi - self.logger.info("Detected %s DPI", dpi) - if self.input_binary: - img = self.imread() - prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) - prediction_bin = 255 * (prediction_bin[:,:,0]==0) - prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) - img= np.copy(prediction_bin) - img_bin = prediction_bin - else: - img = self.imread() - self.h_org, self.w_org = img.shape[:2] - img_bin = None - - width_early = img.shape[1] - t1 = time.time() - _, page_coord = self.early_page_for_num_of_column_classification(img_bin) - - self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :] - self.page_coord = page_coord - - if self.num_col_upper and not self.num_col_lower: - num_col = self.num_col_upper - label_p_pred = [np.ones(6)] - elif self.num_col_lower and not self.num_col_upper: - num_col = self.num_col_lower - label_p_pred = [np.ones(6)] - elif not self.num_col_upper and not self.num_col_lower: - if self.input_binary: - img_in = np.copy(img) - img_in = img_in / 255.0 - img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = img_in.reshape(1, 448, 448, 3) - else: - img_1ch = self.imread(grayscale=True) - width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - - img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) - img_in[0, :, :, 0] = img_1ch[:, :] - img_in[0, :, :, 1] = img_1ch[:, :] - img_in[0, :, :, 2] = img_1ch[:, :] - - label_p_pred = self.model_classifier.predict(img_in, verbose=0) - num_col = np.argmax(label_p_pred[0]) + 1 - elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): - if self.input_binary: - img_in = np.copy(img) - img_in = img_in / 255.0 - img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = img_in.reshape(1, 448, 448, 3) - else: - img_1ch = self.imread(grayscale=True) - width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - - img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) - img_in[0, :, :, 0] = img_1ch[:, :] - img_in[0, :, :, 1] = img_1ch[:, :] - img_in[0, :, :, 2] = img_1ch[:, :] - - label_p_pred = self.model_classifier.predict(img_in, verbose=0) - num_col = np.argmax(label_p_pred[0]) + 1 - - if num_col > self.num_col_upper: - num_col = self.num_col_upper - label_p_pred = [np.ones(6)] - if num_col < self.num_col_lower: - num_col = self.num_col_lower - label_p_pred = [np.ones(6)] - else: - num_col = self.num_col_upper - label_p_pred = [np.ones(6)] - - self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) - - if dpi < DPI_THRESHOLD: - if light_version and num_col in (1,2): - img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( - img, num_col, width_early, label_p_pred) - else: - img_new, num_column_is_classified = self.calculate_width_height_by_columns( - img, num_col, width_early, label_p_pred) - if light_version: - image_res = np.copy(img_new) - else: - image_res = self.predict_enhancement(img_new) - is_image_enhanced = True - - else: - num_column_is_classified = True - image_res = np.copy(img) - is_image_enhanced = False - - self.logger.debug("exit resize_and_enhance_image_with_column_classifier") - return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin def read_xml(self, xml_file): file_name = Path(xml_file).stem tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))