From 6ee79c7320d11eb93535b886b85f6746b90deb40 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 17 Dec 2025 13:28:02 +0100 Subject: [PATCH 1/7] evaluation with a given GT is only possible for segmentation tasks --- src/eynollah/training/inference.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eynollah/training/inference.py b/src/eynollah/training/inference.py index 3fa8fd6..f739438 100644 --- a/src/eynollah/training/inference.py +++ b/src/eynollah/training/inference.py @@ -576,9 +576,9 @@ class sbb_predict: if self.save_layout: cv2.imwrite(self.save_layout, only_layout) - if self.ground_truth: - gt_img=cv2.imread(self.ground_truth) - self.IoU(gt_img[:,:,0],res[:,:,0]) + if self.ground_truth: + gt_img=cv2.imread(self.ground_truth) + self.IoU(gt_img[:,:,0],res[:,:,0]) else: ls_images = os.listdir(self.dir_in) @@ -599,9 +599,9 @@ class sbb_predict: self.save_layout = os.path.join(self.out, f_name+'_layout.png') cv2.imwrite(self.save_layout, only_layout) - if self.ground_truth: - gt_img=cv2.imread(self.ground_truth) - self.IoU(gt_img[:,:,0],res[:,:,0]) + if self.ground_truth: + gt_img=cv2.imread(self.ground_truth) + self.IoU(gt_img[:,:,0],res[:,:,0]) From 49261fa99b06b6a747e953505894a6394746e695 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 17 Dec 2025 15:12:39 +0100 Subject: [PATCH 2/7] =?UTF-8?q?CNN=E2=80=93RNN=E2=80=93OCR=20inference=20a?= =?UTF-8?q?nd=20adaptation=20of=20the=20CNN=E2=80=93RNN=E2=80=93OCR=20mode?= =?UTF-8?q?l=20to=20support=20inference=20on=20both=20CPU=20and=20GPU?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/eynollah/training/inference.py | 87 ++++++++++++++++++++---------- src/eynollah/training/models.py | 2 +- 2 files changed, 61 insertions(+), 28 deletions(-) diff --git a/src/eynollah/training/inference.py b/src/eynollah/training/inference.py index f739438..ef4be28 100644 --- a/src/eynollah/training/inference.py +++ b/src/eynollah/training/inference.py @@ -25,6 +25,9 @@ from .models import ( Patches ) +from.utils import (scale_padd_image_for_ocr) +from eynollah.utils.utils_ocr import (decode_batch_predictions) + with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -34,7 +37,7 @@ Tool to load model and predict for given image. """ class sbb_predict: - def __init__(self,image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area): + def __init__(self,image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, cpu, out, min_area): self.image=image self.dir_in=dir_in self.patches=patches @@ -46,6 +49,7 @@ class sbb_predict: self.config_params_model=config_params_model self.xml_file = xml_file self.out = out + self.cpu = cpu if min_area: self.min_area = float(min_area) else: @@ -157,30 +161,26 @@ class sbb_predict: return mIoU def start_new_session_and_model(self): - - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True + if self.task == "cnn-rnn-ocr": + if self.cpu: + os.environ['CUDA_VISIBLE_DEVICES']='-1' + self.model = load_model(self.model_dir) + self.model = tf.keras.models.Model( + self.model.get_layer(name = "image").input, + self.model.get_layer(name = "dense2").output) + else: + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True - session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() - tensorflow_backend.set_session(session) - #tensorflow.keras.layers.custom_layer = PatchEncoder - #tensorflow.keras.layers.custom_layer = Patches - self.model = load_model(self.model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) - #config = tf.ConfigProto() - #config.gpu_options.allow_growth=True - - #self.session = tf.InteractiveSession() - #keras.losses.custom_loss = self.weighted_categorical_crossentropy - #self.model = load_model(self.model_dir , compile=False) + session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() + tensorflow_backend.set_session(session) - - ##if self.weights_dir!=None: - ##self.model.load_weights(self.weights_dir) - - if self.task != 'classification' and self.task != 'reading_order': - self.img_height=self.model.layers[len(self.model.layers)-1].output_shape[1] - self.img_width=self.model.layers[len(self.model.layers)-1].output_shape[2] - self.n_classes=self.model.layers[len(self.model.layers)-1].output_shape[3] + self.model = load_model(self.model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + + if self.task != 'classification' and self.task != 'reading_order': + self.img_height=self.model.layers[len(self.model.layers)-1].output_shape[1] + self.img_width=self.model.layers[len(self.model.layers)-1].output_shape[2] + self.n_classes=self.model.layers[len(self.model.layers)-1].output_shape[3] def visualize_model_output(self, prediction, img, task): if task == "binarization": @@ -244,6 +244,30 @@ class sbb_predict: index_class = np.argmax(label_p_pred[0]) print("Predicted Class: {}".format(classes_names[str(int(index_class))])) + elif self.task == "cnn-rnn-ocr": + img=cv2.imread(image_dir) + img = scale_padd_image_for_ocr(img, self.config_params_model['input_height'], self.config_params_model['input_width']) + + img = img / 255. + + with open(os.path.join(self.model_dir, "characters_org.txt"), 'r') as char_txt_f: + characters = json.load(char_txt_f) + + AUTOTUNE = tf.data.AUTOTUNE + + # Mapping characters to integers. + char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) + + # Mapping integers back to original characters. + num_to_char = StringLookup( + vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True + ) + preds = self.model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) + pred_texts = decode_batch_predictions(preds, num_to_char) + pred_texts = pred_texts[0].replace("[UNK]", "") + return pred_texts + + elif self.task == 'reading_order': img_height = self.config_params_model['input_height'] img_width = self.config_params_model['input_width'] @@ -569,6 +593,8 @@ class sbb_predict: elif self.task == 'enhancement': if self.save: cv2.imwrite(self.save,res) + elif self.task == "cnn-rnn-ocr": + print(f"Detected text: {res}") else: img_seg_overlayed, only_layout = self.visualize_model_output(res, self.img_org, self.task) if self.save: @@ -592,6 +618,8 @@ class sbb_predict: elif self.task == 'enhancement': self.save = os.path.join(self.out, f_name+'.png') cv2.imwrite(self.save,res) + elif self.task == "cnn-rnn-ocr": + print(f"Detected text for file name {f_name} is: {res}") else: img_seg_overlayed, only_layout = self.visualize_model_output(res, self.img_org, self.task) self.save = os.path.join(self.out, f_name+'_overlayed.png') @@ -657,24 +685,29 @@ class sbb_predict: "-xml", help="xml file with layout coordinates that reading order detection will be implemented on. The result will be written in the same xml file.", ) - +@click.option( + "--cpu", + "-cpu", + help="For OCR, the default device is the GPU. If this parameter is set to true, inference will be performed on the CPU", + is_flag=True, +) @click.option( "--min_area", "-min", help="min area size of regions considered for reading order detection. The default value is zero and means that all text regions are considered for reading order.", ) -def main(image, dir_in, model, patches, save, save_layout, ground_truth, xml_file, out, min_area): +def main(image, dir_in, model, patches, save, save_layout, ground_truth, xml_file, cpu, out, min_area): assert image or dir_in, "Either a single image -i or a dir_in -di is required" with open(os.path.join(model,'config.json')) as f: config_params_model = json.load(f) task = config_params_model['task'] - if task != 'classification' and task != 'reading_order': + if task != 'classification' and task != 'reading_order' and task != "cnn-rnn-ocr": if image and not save: print("Error: You used one of segmentation or binarization task with image input but not set -s, you need a filename to save visualized output with -s") sys.exit(1) if dir_in and not out: print("Error: You used one of segmentation or binarization task with dir_in but not set -out") sys.exit(1) - x=sbb_predict(image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area) + x=sbb_predict(image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, cpu, out, min_area) x.run() diff --git a/src/eynollah/training/models.py b/src/eynollah/training/models.py index d1b0aa2..5528761 100644 --- a/src/eynollah/training/models.py +++ b/src/eynollah/training/models.py @@ -843,7 +843,7 @@ def cnn_rnn_ocr_model(image_height=None, image_width=None, n_classes=None, max_s addition_rnn = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(image_width, return_sequences=True, dropout=0.25))(addition) - out = tf.keras.layers.Conv1D(max_seq, 1, data_format="channels_first")(addition_rnn) + out = tf.keras.layers.Conv1D(max_seq, 1, data_format="channels_last")(addition_rnn) out = tf.keras.layers.BatchNormalization(name="bn9")(out) out = tf.keras.layers.Activation("relu", name="relu9")(out) #out = tf.keras.layers.Conv1D(n_classes, 1, activation='relu', data_format="channels_last")(out) From c8240905a8bf5496eb550c63059fc11c5331c421 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 26 Jan 2026 13:36:24 +0100 Subject: [PATCH 3/7] Fix label generation by selecting largest contour when erosion splits shapes --- src/eynollah/training/gt_gen_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/eynollah/training/gt_gen_utils.py b/src/eynollah/training/gt_gen_utils.py index 2e3428b..1eeb5ad 100644 --- a/src/eynollah/training/gt_gen_utils.py +++ b/src/eynollah/training/gt_gen_utils.py @@ -231,7 +231,12 @@ def update_region_contours(co_text, img_boundary, erosion_rate, dilation_rate, y con_eroded = return_contours_of_interested_region(img_boundary_in,pixel, min_size ) try: - co_text_eroded.append(con_eroded[0]) + if len(con_eroded)>1: + cnt_size = np.array([cv2.contourArea(con_eroded[j]) for j in range(len(con_eroded))]) + cnt = contours[np.argmax(cnt_size)] + co_text_eroded.append(cnt) + else: + co_text_eroded.append(con_eroded[0]) except: co_text_eroded.append(con) From 30f39e73837f766d17026b8733ce95ee3faf6b3a Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 26 Jan 2026 13:56:34 +0100 Subject: [PATCH 4/7] mapregion is added to labels --- .../training/generate_gt_for_training.py | 2 +- src/eynollah/training/gt_gen_utils.py | 72 ++++++++++++++++++- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/src/eynollah/training/generate_gt_for_training.py b/src/eynollah/training/generate_gt_for_training.py index 693cab8..30abd04 100644 --- a/src/eynollah/training/generate_gt_for_training.py +++ b/src/eynollah/training/generate_gt_for_training.py @@ -474,7 +474,7 @@ def visualize_layout_segmentation(xml_file, dir_xml, dir_out, dir_imgs): img_file_name_with_format = find_format_of_given_filename_in_dir(dir_imgs, f_name) img = cv2.imread(os.path.join(dir_imgs, img_file_name_with_format)) - co_text, co_graphic, co_sep, co_img, co_table, co_noise, y_len, x_len = get_layout_contours_for_visualization(xml_file) + co_text, co_graphic, co_sep, co_img, co_table, co_map, co_noise, y_len, x_len = get_layout_contours_for_visualization(xml_file) added_image = visualize_image_from_contours_layout(co_text['paragraph'], co_text['header']+co_text['heading'], co_text['drop-capital'], co_sep, co_img, co_text['marginalia'], co_table, img) diff --git a/src/eynollah/training/gt_gen_utils.py b/src/eynollah/training/gt_gen_utils.py index 1eeb5ad..62a094a 100644 --- a/src/eynollah/training/gt_gen_utils.py +++ b/src/eynollah/training/gt_gen_utils.py @@ -15,7 +15,7 @@ with warnings.catch_warnings(): warnings.simplefilter("ignore") -def visualize_image_from_contours_layout(co_par, co_header, co_drop, co_sep, co_image, co_marginal, co_table, img): +def visualize_image_from_contours_layout(co_par, co_header, co_drop, co_sep, co_image, co_marginal, co_table, co_map, img): alpha = 0.5 blank_image = np.ones( (img.shape[:]), dtype=np.uint8) * 255 @@ -28,6 +28,7 @@ def visualize_image_from_contours_layout(co_par, co_header, co_drop, co_sep, co_ col_sep = (255, 0, 0) col_marginal = (106, 90, 205) col_table = (0, 90, 205) + col_map = (90, 90, 205) if len(co_image)>0: cv2.drawContours(blank_image, co_image, -1, col_image, thickness=cv2.FILLED) # Fill the contour @@ -52,6 +53,9 @@ def visualize_image_from_contours_layout(co_par, co_header, co_drop, co_sep, co_ if len(co_table)>0: cv2.drawContours(blank_image, co_table, -1, col_table, thickness=cv2.FILLED) # Fill the contour + + if len(co_map)>0: + cv2.drawContours(blank_image, co_map, -1, col_map, thickness=cv2.FILLED) # Fill the contour img_final =cv2.cvtColor(blank_image, cv2.COLOR_BGR2RGB) @@ -380,6 +384,7 @@ def get_layout_contours_for_visualization(xml_file): co_sep=[] co_img=[] co_table=[] + co_map=[] co_noise=[] types_text = [] @@ -596,6 +601,31 @@ def get_layout_contours_for_visualization(xml_file): elif vv.tag!=link+'Point' and sumi>=1: break co_table.append(np.array(c_t_in)) + + if tag.endswith('}MapRegion') or tag.endswith('}mapregion'): + #print('sth') + for nn in root1.iter(tag): + c_t_in=[] + sumi=0 + for vv in nn.iter(): + # check the format of coords + if vv.tag==link+'Coords': + coords=bool(vv.attrib) + if coords: + p_h=vv.attrib['points'].split(' ') + c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + break + else: + pass + + + if vv.tag==link+'Point': + c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) + sumi+=1 + #print(vv.tag,'in') + elif vv.tag!=link+'Point' and sumi>=1: + break + co_map.append(np.array(c_t_in)) if tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'): @@ -622,7 +652,7 @@ def get_layout_contours_for_visualization(xml_file): elif vv.tag!=link+'Point' and sumi>=1: break co_noise.append(np.array(c_t_in)) - return co_text, co_graphic, co_sep, co_img, co_table, co_noise, y_len, x_len + return co_text, co_graphic, co_sep, co_img, co_table, co_map, co_noise, y_len, x_len def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images): """ @@ -841,7 +871,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ types_graphic_label = list(types_graphic_dict.values()) - labels_rgb_color = [ (0,0,0), (255,0,0), (255,125,0), (255,0,125), (125,255,125), (125,125,0), (0,125,255), (0,125,0), (125,125,125), (255,0,255), (125,0,125), (0,255,0),(0,0,255), (0,255,255), (255,125,125), (0,125,125), (0,255,125), (255,125,255), (125,255,0)] + labels_rgb_color = [ (0,0,0), (255,0,0), (255,125,0), (255,0,125), (125,255,125), (125,125,0), (0,125,255), (0,125,0), (125,125,125), (255,0,255), (125,0,125), (0,255,0),(0,0,255), (0,255,255), (255,125,125), (0,125,125), (0,255,125), (255,125,255), (125,255,0), (125,255,255)] region_tags=np.unique([x for x in alltags if x.endswith('Region')]) @@ -852,6 +882,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ co_sep=[] co_img=[] co_table=[] + co_map=[] co_noise=[] for tag in region_tags: @@ -1062,6 +1093,32 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ elif vv.tag!=link+'Point' and sumi>=1: break co_table.append(np.array(c_t_in)) + + if 'mapregion' in keys: + if tag.endswith('}MapRegion') or tag.endswith('}mapregion'): + #print('sth') + for nn in root1.iter(tag): + c_t_in=[] + sumi=0 + for vv in nn.iter(): + # check the format of coords + if vv.tag==link+'Coords': + coords=bool(vv.attrib) + if coords: + p_h=vv.attrib['points'].split(' ') + c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + break + else: + pass + + + if vv.tag==link+'Point': + c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) + sumi+=1 + #print(vv.tag,'in') + elif vv.tag!=link+'Point' and sumi>=1: + break + co_map.append(np.array(c_t_in)) if 'noiseregion' in keys: if tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'): @@ -1135,6 +1192,10 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ erosion_rate = 0#2 dilation_rate = 3#4 co_table, img_boundary = update_region_contours(co_table, img_boundary, erosion_rate, dilation_rate, y_len, x_len ) + if "mapregion" in elements_with_artificial_class: + erosion_rate = 0#2 + dilation_rate = 3#4 + co_map, img_boundary = update_region_contours(co_map, img_boundary, erosion_rate, dilation_rate, y_len, x_len ) @@ -1160,6 +1221,8 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ img_poly=cv2.fillPoly(img, pts =co_img, color=labels_rgb_color[ config_params['imageregion']]) if 'tableregion' in keys: img_poly=cv2.fillPoly(img, pts =co_table, color=labels_rgb_color[ config_params['tableregion']]) + if 'mapregion' in keys: + img_poly=cv2.fillPoly(img, pts =co_map, color=labels_rgb_color[ config_params['mapregion']]) if 'noiseregion' in keys: img_poly=cv2.fillPoly(img, pts =co_noise, color=labels_rgb_color[ config_params['noiseregion']]) @@ -1220,6 +1283,9 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ if 'tableregion' in keys: color_label = config_params['tableregion'] img_poly=cv2.fillPoly(img, pts =co_table, color=(color_label,color_label,color_label)) + if 'mapregion' in keys: + color_label = config_params['mapregion'] + img_poly=cv2.fillPoly(img, pts =co_map, color=(color_label,color_label,color_label)) if 'noiseregion' in keys: color_label = config_params['noiseregion'] img_poly=cv2.fillPoly(img, pts =co_noise, color=(color_label,color_label,color_label)) From 6ae244bf9bf811fd365cb002f4feb338d1df730a Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 26 Jan 2026 15:03:11 +0100 Subject: [PATCH 5/7] Fix filename stem extraction using binarization. Restore the CNN-RNN model to its previous version, as setting channels_last alone was insufficient for running on both CPU and GPU. Prevent errors caused by null values in image shape elements. --- src/eynollah/sbb_binarize.py | 4 ++-- src/eynollah/training/models.py | 2 +- src/eynollah/training/utils.py | 8 ++++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index 851ac7d..37ac7c3 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -19,7 +19,7 @@ from eynollah.model_zoo import EynollahModelZoo tf_disable_interactive_logs() import tensorflow as tf from tensorflow.python.keras import backend as tensorflow_backend - +from pathlib import Path from .utils import is_image_filename def resize_image(img_in, input_height, input_width): @@ -347,7 +347,7 @@ class SbbBinarizer: self.logger.info("Found %d image files to binarize in %s", len(ls_imgs), dir_in) for i, image_path in enumerate(ls_imgs): self.logger.info('Binarizing [%3d/%d] %s', i + 1, len(ls_imgs), image_path) - image_stem = image_path.split('.')[0] + image_stem = Path(image_path).stem image = cv2.imread(os.path.join(dir_in,image_path) ) img_last = 0 model_file, model = self.models diff --git a/src/eynollah/training/models.py b/src/eynollah/training/models.py index 5528761..d1b0aa2 100644 --- a/src/eynollah/training/models.py +++ b/src/eynollah/training/models.py @@ -843,7 +843,7 @@ def cnn_rnn_ocr_model(image_height=None, image_width=None, n_classes=None, max_s addition_rnn = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(image_width, return_sequences=True, dropout=0.25))(addition) - out = tf.keras.layers.Conv1D(max_seq, 1, data_format="channels_last")(addition_rnn) + out = tf.keras.layers.Conv1D(max_seq, 1, data_format="channels_first")(addition_rnn) out = tf.keras.layers.BatchNormalization(name="bn9")(out) out = tf.keras.layers.Activation("relu", name="relu9")(out) #out = tf.keras.layers.Conv1D(n_classes, 1, activation='relu', data_format="channels_last")(out) diff --git a/src/eynollah/training/utils.py b/src/eynollah/training/utils.py index c589957..3b685f1 100644 --- a/src/eynollah/training/utils.py +++ b/src/eynollah/training/utils.py @@ -1,7 +1,7 @@ import os import math import random - +from pathlib import Path import cv2 import numpy as np import seaborn as sns @@ -32,6 +32,9 @@ def scale_padd_image_for_ocr(img, height, width): else: width_new = width + if width_new <= 0: + width_new = width + img_res= resize_image (img, height, width_new) img_fin = np.ones((height, width, 3))*255 @@ -1304,7 +1307,8 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir batchcount = 0 while True: for i in ls_files_images: - f_name = i.split('.')[0] + print(i, 'i') + f_name = Path(i).stem#.split('.')[0] txt_inp = open(os.path.join(dir_train, "labels/"+f_name+'.txt'),'r').read().split('\n')[0] From 33f6a231bc5065731b4b92744e95c67c3b13d6e4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 26 Jan 2026 17:30:26 +0100 Subject: [PATCH 6/7] fix: prevent crash when printspace is missing in xmls used for label generation --- src/eynollah/training/gt_gen_utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/eynollah/training/gt_gen_utils.py b/src/eynollah/training/gt_gen_utils.py index 62a094a..0f29f9e 100644 --- a/src/eynollah/training/gt_gen_utils.py +++ b/src/eynollah/training/gt_gen_utils.py @@ -734,12 +734,15 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - - cnt = contours[np.argmax(cnt_size)] - - x, y, w, h = cv2.boundingRect(cnt) + + try: + cnt = contours[np.argmax(cnt_size)] + x, y, w, h = cv2.boundingRect(cnt) + except: + x, y , w, h = 0, 0, x_len, y_len + bb_xywh = [x, y, w, h] From 3500167870fa7963e291857031bcab9df0c7fb5c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 28 Jan 2026 11:52:12 +0100 Subject: [PATCH 7/7] weights ensembling for tensorflow models is integrated --- src/eynollah/training/cli.py | 2 + src/eynollah/training/weights_ensembling.py | 136 ++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 src/eynollah/training/weights_ensembling.py diff --git a/src/eynollah/training/cli.py b/src/eynollah/training/cli.py index 65a7a8a..3718275 100644 --- a/src/eynollah/training/cli.py +++ b/src/eynollah/training/cli.py @@ -9,6 +9,7 @@ from .generate_gt_for_training import main as generate_gt_cli from .inference import main as inference_cli from .train import ex from .extract_line_gt import linegt_cli +from .weights_ensembling import main as ensemble_cli @click.command(context_settings=dict( ignore_unknown_options=True, @@ -26,3 +27,4 @@ main.add_command(generate_gt_cli, 'generate-gt') main.add_command(inference_cli, 'inference') main.add_command(train_cli, 'train') main.add_command(linegt_cli, 'export_textline_images_and_text') +main.add_command(ensemble_cli, 'ensembling') diff --git a/src/eynollah/training/weights_ensembling.py b/src/eynollah/training/weights_ensembling.py new file mode 100644 index 0000000..6dce7fd --- /dev/null +++ b/src/eynollah/training/weights_ensembling.py @@ -0,0 +1,136 @@ +import sys +from glob import glob +from os import environ, devnull +from os.path import join +from warnings import catch_warnings, simplefilter +import os + +import numpy as np +from PIL import Image +import cv2 +environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +stderr = sys.stderr +sys.stderr = open(devnull, 'w') +import tensorflow as tf +from tensorflow.keras.models import load_model +from tensorflow.python.keras import backend as tensorflow_backend +sys.stderr = stderr +from tensorflow.keras import layers +import tensorflow.keras.losses +from tensorflow.keras.layers import * +import click +import logging + + +class Patches(layers.Layer): + def __init__(self, patch_size_x, patch_size_y): + super(Patches, self).__init__() + self.patch_size_x = patch_size_x + self.patch_size_y = patch_size_y + + def call(self, images): + #print(tf.shape(images)[1],'images') + #print(self.patch_size,'self.patch_size') + batch_size = tf.shape(images)[0] + patches = tf.image.extract_patches( + images=images, + sizes=[1, self.patch_size_y, self.patch_size_x, 1], + strides=[1, self.patch_size_y, self.patch_size_x, 1], + rates=[1, 1, 1, 1], + padding="VALID", + ) + #patch_dims = patches.shape[-1] + patch_dims = tf.shape(patches)[-1] + patches = tf.reshape(patches, [batch_size, -1, patch_dims]) + return patches + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'patch_size_x': self.patch_size_x, + 'patch_size_y': self.patch_size_y, + }) + return config + + + +class PatchEncoder(layers.Layer): + def __init__(self, **kwargs): + super(PatchEncoder, self).__init__() + self.num_patches = num_patches + self.projection = layers.Dense(units=projection_dim) + self.position_embedding = layers.Embedding( + input_dim=num_patches, output_dim=projection_dim + ) + + def call(self, patch): + positions = tf.range(start=0, limit=self.num_patches, delta=1) + encoded = self.projection(patch) + self.position_embedding(positions) + return encoded + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'num_patches': self.num_patches, + 'projection': self.projection, + 'position_embedding': self.position_embedding, + }) + return config + + +def start_new_session(): + ###config = tf.compat.v1.ConfigProto() + ###config.gpu_options.allow_growth = True + + ###self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() + ###tensorflow_backend.set_session(self.session) + + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + + session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() + tensorflow_backend.set_session(session) + return session + +def run_ensembling(dir_models, out): + ls_models = os.listdir(dir_models) + + + weights=[] + + for model_name in ls_models: + model = load_model(os.path.join(dir_models,model_name) , compile=False, custom_objects={'PatchEncoder':PatchEncoder, 'Patches': Patches}) + weights.append(model.get_weights()) + + new_weights = list() + + for weights_list_tuple in zip(*weights): + new_weights.append( + [np.array(weights_).mean(axis=0)\ + for weights_ in zip(*weights_list_tuple)]) + + + + new_weights = [np.array(x) for x in new_weights] + + model.set_weights(new_weights) + model.save(out) + os.system('cp '+os.path.join(os.path.join(dir_models,model_name) , "config.json ")+out) + +@click.command() +@click.option( + "--dir_models", + "-dm", + help="directory of models", + type=click.Path(exists=True, file_okay=False), +) +@click.option( + "--out", + "-o", + help="output directory where ensembled model will be written.", + type=click.Path(exists=False, file_okay=False), +) + +def main(dir_models, out): + run_ensembling(dir_models, out) +