diff --git a/requirements-ocr.txt b/requirements-ocr.txt index 9f31ebb..8f3b062 100644 --- a/requirements-ocr.txt +++ b/requirements-ocr.txt @@ -1,2 +1,2 @@ -torch <= 2.0.1 +torch transformers <= 4.30.2 diff --git a/requirements.txt b/requirements.txt index db1d7df..5699566 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,9 @@ # ocrd includes opencv, numpy, shapely, click ocrd >= 3.3.0 -numpy <1.24.0 +numpy < 2.0 scikit-learn >= 0.23.2 -tensorflow < 2.13 +tensorflow +tf-keras # avoid keras 3 (also needs TF_USE_LEGACY_KERAS=1) numba <= 0.58.1 scikit-image biopython diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index c33b9f8..4a83c0a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -56,14 +56,12 @@ except ImportError: TrOCRProcessor = VisionEncoderDecoderModel = None #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15 tf_disable_interactive_logs() import tensorflow as tf -from tensorflow.python.keras import backend as K from tensorflow.keras.models import load_model tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") -# use tf1 compatibility for keras backend -from tensorflow.compat.v1.keras.backend import set_session from tensorflow.keras import layers from tensorflow.keras.layers import StringLookup @@ -277,14 +275,6 @@ class Eynollah: t_start = time.time() - # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) - # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) - # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) - # config = tf.compat.v1.ConfigProto() - # config.gpu_options.allow_growth = True - # #session = tf.InteractiveSession() - # session = tf.compat.v1.Session(config=config) - # set_session(session) try: for device in tf.config.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(device, True) diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index b81f45e..2ca4a40 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -2,19 +2,19 @@ Tool to load model and binarize a given image. """ -import sys from glob import glob import os import logging +from PIL import Image import numpy as np -from PIL import Image import cv2 from ocrd_utils import tf_disable_interactive_logs + +os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15 tf_disable_interactive_logs() import tensorflow as tf from tensorflow.keras.models import load_model -from tensorflow.python.keras import backend as tensorflow_backend from .utils import is_image_filename @@ -27,26 +27,17 @@ class SbbBinarizer: self.model_dir = model_dir self.logger = logger if logger else logging.getLogger('SbbBinarizer') - self.start_new_session() - - self.model_files = glob(self.model_dir+"/*/", recursive = True) + try: + for device in tf.config.list_physical_devices('GPU'): + tf.config.experimental.set_memory_growth(device, True) + except: + self.logger.warning("no GPU device available") + self.model_files = glob(self.model_dir + "/*/", recursive=True) self.models = [] for model_file in self.model_files: self.models.append(self.load_model(model_file)) - def start_new_session(self): - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True - - self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() - tensorflow_backend.set_session(self.session) - - def end_session(self): - tensorflow_backend.clear_session() - self.session.close() - del self.session - def load_model(self, model_name): model = load_model(os.path.join(self.model_dir, model_name), compile=False) model_height = model.layers[len(model.layers)-1].output_shape[1] @@ -55,7 +46,6 @@ class SbbBinarizer: return model, model_height, model_width, n_classes def predict(self, model_in, img, use_patches, n_batch_inference=5): - tensorflow_backend.set_session(self.session) model, model_height, model_width, n_classes = model_in img_org_h = img.shape[0] diff --git a/src/eynollah/training/build_model_load_pretrained_weights_and_save.py b/src/eynollah/training/build_model_load_pretrained_weights_and_save.py index 40fc1fe..9fba66b 100644 --- a/src/eynollah/training/build_model_load_pretrained_weights_and_save.py +++ b/src/eynollah/training/build_model_load_pretrained_weights_and_save.py @@ -1,3 +1,4 @@ +import sys import click import tensorflow as tf @@ -5,8 +6,11 @@ from .models import resnet50_unet def configuration(): - gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) - session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) + try: + for device in tf.config.list_physical_devices('GPU'): + tf.config.experimental.set_memory_growth(device, True) + except: + print("no GPU device available", file=sys.stderr) @click.command() def build_model_load_pretrained_weights_and_save(): diff --git a/src/eynollah/training/inference.py b/src/eynollah/training/inference.py index 3fa8fd6..15d1e6a 100644 --- a/src/eynollah/training/inference.py +++ b/src/eynollah/training/inference.py @@ -1,16 +1,19 @@ +""" +Tool to load model and predict for given image. +""" + import sys import os import warnings import json +import click import numpy as np import cv2 -from tensorflow.keras.models import load_model + +os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15 import tensorflow as tf -from tensorflow.keras import backend as K -from tensorflow.keras.layers import * -import click -from tensorflow.python.keras import backend as tensorflow_backend +from tensorflow.keras.models import load_model import xml.etree.ElementTree as ET from .gt_gen_utils import ( @@ -24,17 +27,29 @@ from .models import ( PatchEncoder, Patches ) +from .metrics import ( + soft_dice_loss, + weighted_categorical_crossentropy, +) with warnings.catch_warnings(): warnings.simplefilter("ignore") -__doc__=\ -""" -Tool to load model and predict for given image. -""" +class SBBPredict: + def __init__(self, + image, + dir_in, + model, + task, + config_params_model, + patches, + save, + save_layout, + ground_truth, + xml_file, + out, + min_area): -class sbb_predict: - def __init__(self,image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area): self.image=image self.dir_in=dir_in self.patches=patches @@ -52,8 +67,9 @@ class sbb_predict: self.min_area = 0 def resize_image(self,img_in,input_height,input_width): - return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST) - + return cv2.resize(img_in, (input_width, + input_height), + interpolation=cv2.INTER_NEAREST) def color_images(self,seg): ann_u=range(self.n_classes) @@ -69,68 +85,6 @@ class sbb_predict: seg_img[:,:,2][seg==c]=c return seg_img - def otsu_copy_binary(self,img): - img_r=np.zeros((img.shape[0],img.shape[1],3)) - img1=img[:,:,0] - - #print(img.min()) - #print(img[:,:,0].min()) - #blur = cv2.GaussianBlur(img,(5,5)) - #ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) - retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) - - - - img_r[:,:,0]=threshold1 - img_r[:,:,1]=threshold1 - img_r[:,:,2]=threshold1 - #img_r=img_r/float(np.max(img_r))*255 - return img_r - - def otsu_copy(self,img): - img_r=np.zeros((img.shape[0],img.shape[1],3)) - #img1=img[:,:,0] - - #print(img.min()) - #print(img[:,:,0].min()) - #blur = cv2.GaussianBlur(img,(5,5)) - #ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) - _, threshold1 = cv2.threshold(img[:,:,0], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) - _, threshold2 = cv2.threshold(img[:,:,1], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) - _, threshold3 = cv2.threshold(img[:,:,2], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) - - - - img_r[:,:,0]=threshold1 - img_r[:,:,1]=threshold2 - img_r[:,:,2]=threshold3 - ###img_r=img_r/float(np.max(img_r))*255 - return img_r - - def soft_dice_loss(self,y_true, y_pred, epsilon=1e-6): - - axes = tuple(range(1, len(y_pred.shape)-1)) - - numerator = 2. * K.sum(y_pred * y_true, axes) - - denominator = K.sum(K.square(y_pred) + K.square(y_true), axes) - return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch - - def weighted_categorical_crossentropy(self,weights=None): - - def loss(y_true, y_pred): - labels_floats = tf.cast(y_true, tf.float32) - per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred) - - if weights is not None: - weight_mask = tf.maximum(tf.reduce_max(tf.constant( - np.array(weights, dtype=np.float32)[None, None, None]) - * labels_floats, axis=-1), 1.0) - per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] - return tf.reduce_mean(per_pixel_loss) - return self.loss - - def IoU(self,Yi,y_predi): ## mean Intersection over Union ## Mean IoU = TP/(FN + TP + FP) @@ -157,30 +111,28 @@ class sbb_predict: return mIoU def start_new_session_and_model(self): - - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True + try: + for device in tf.config.list_physical_devices('GPU'): + tf.config.experimental.set_memory_growth(device, True) + except: + print("no GPU device available", file=sys.stderr) - session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() - tensorflow_backend.set_session(session) #tensorflow.keras.layers.custom_layer = PatchEncoder #tensorflow.keras.layers.custom_layer = Patches - self.model = load_model(self.model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) - #config = tf.ConfigProto() - #config.gpu_options.allow_growth=True - - #self.session = tf.InteractiveSession() - #keras.losses.custom_loss = self.weighted_categorical_crossentropy - #self.model = load_model(self.model_dir , compile=False) + self.model = load_model(self.model_dir, compile=False, + custom_objects={"PatchEncoder": PatchEncoder, + "Patches": Patches}) + #keras.losses.custom_loss = weighted_categorical_crossentropy + #self.model = load_model(self.model_dir, compile=False) - ##if self.weights_dir!=None: ##self.model.load_weights(self.weights_dir) if self.task != 'classification' and self.task != 'reading_order': - self.img_height=self.model.layers[len(self.model.layers)-1].output_shape[1] - self.img_width=self.model.layers[len(self.model.layers)-1].output_shape[2] - self.n_classes=self.model.layers[len(self.model.layers)-1].output_shape[3] + last = self.model.layers[-1] + self.img_height = last.output_shape[1] + self.img_width = last.output_shape[2] + self.n_classes = last.output_shape[3] def visualize_model_output(self, prediction, img, task): if task == "binarization": @@ -208,21 +160,16 @@ class sbb_predict: '15' : [255, 0, 255]} layout_only = np.zeros(prediction.shape) - for unq_class in unique_classes: + where = prediction[:,:,0]==unq_class rgb_class_unique = rgb_colors[str(int(unq_class))] - layout_only[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0] - layout_only[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1] - layout_only[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2] - - + layout_only[:,:,0][where] = rgb_class_unique[0] + layout_only[:,:,1][where] = rgb_class_unique[1] + layout_only[:,:,2][where] = rgb_class_unique[2] + layout_only = layout_only.astype(np.int32) img = self.resize_image(img, layout_only.shape[0], layout_only.shape[1]) - - layout_only = layout_only.astype(np.int32) img = img.astype(np.int32) - - added_image = cv2.addWeighted(img,0.5,layout_only,0.1,0) @@ -231,10 +178,10 @@ class sbb_predict: def predict(self, image_dir): if self.task == 'classification': classes_names = self.config_params_model['classification_classes_name'] - img_1ch = img=cv2.imread(image_dir, 0) - - img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (self.config_params_model['input_height'], self.config_params_model['input_width']), interpolation=cv2.INTER_NEAREST) + img_1ch = cv2.imread(image_dir, 0) / 255.0 + img_1ch = cv2.resize(img_1ch, (self.config_params_model['input_height'], + self.config_params_model['input_width']), + interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] @@ -244,23 +191,27 @@ class sbb_predict: index_class = np.argmax(label_p_pred[0]) print("Predicted Class: {}".format(classes_names[str(int(index_class))])) + elif self.task == 'reading_order': img_height = self.config_params_model['input_height'] img_width = self.config_params_model['input_width'] - tree_xml, root_xml, bb_coord_printspace, file_name, id_paragraph, id_header, co_text_paragraph, co_text_header, tot_region_ref, x_len, y_len, index_tot_regions, img_poly = read_xml(self.xml_file) - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_header) + tree_xml, root_xml, bb_coord_printspace, file_name, \ + id_paragraph, id_header, \ + co_text_paragraph, co_text_header, \ + tot_region_ref, x_len, y_len, index_tot_regions, \ + img_poly = read_xml(self.xml_file) + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = \ + find_new_features_of_contours(co_text_header) img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - + img_header_and_sep[int(y_max_main[j]): int(y_max_main[j]) + 12, + int(x_min_main[j]): int(x_max_main[j])] = 1 + co_text_all = co_text_paragraph + co_text_header id_all_text = id_paragraph + id_header - ##texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] ##texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] texts_corr_order_index_int = list(np.array(range(len(co_text_all)))) @@ -271,7 +222,8 @@ class sbb_predict: #print(np.shape(co_text_all[0]), len( np.shape(co_text_all[0]) ),'co_text_all') #co_text_all = filter_contours_area_of_image_tables(img_poly, co_text_all, _, max_area, min_area) #print(co_text_all,'co_text_all') - co_text_all, texts_corr_order_index_int, _ = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, self.min_area) + co_text_all, texts_corr_order_index_int, _ = filter_contours_area_of_image( + img_poly, co_text_all, texts_corr_order_index_int, max_area, self.min_area) #print(texts_corr_order_index_int) @@ -664,17 +616,15 @@ class sbb_predict: help="min area size of regions considered for reading order detection. The default value is zero and means that all text regions are considered for reading order.", ) def main(image, dir_in, model, patches, save, save_layout, ground_truth, xml_file, out, min_area): - assert image or dir_in, "Either a single image -i or a dir_in -di is required" + assert image or dir_in, "Either a single image -i or a dir_in -di input is required" with open(os.path.join(model,'config.json')) as f: config_params_model = json.load(f) task = config_params_model['task'] if task != 'classification' and task != 'reading_order': - if image and not save: - print("Error: You used one of segmentation or binarization task with image input but not set -s, you need a filename to save visualized output with -s") - sys.exit(1) - if dir_in and not out: - print("Error: You used one of segmentation or binarization task with dir_in but not set -out") - sys.exit(1) - x=sbb_predict(image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area) + assert not image or save, "For segmentation or binarization, an input single image -i also requires an output filename -s" + assert not dir_in or out, "For segmentation or binarization, an input directory -di also requires an output directory -o" + x = SBBPredict(image, dir_in, model, task, config_params_model, + patches, save, save_layout, ground_truth, xml_file, out, + min_area) x.run() diff --git a/src/eynollah/training/train.py b/src/eynollah/training/train.py index 97736e0..da901b0 100644 --- a/src/eynollah/training/train.py +++ b/src/eynollah/training/train.py @@ -28,14 +28,14 @@ from eynollah.training.utils import ( ) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15 import tensorflow as tf -from tensorflow.compat.v1.keras.backend import set_session from tensorflow.keras.optimizers import SGD, Adam -from sacred import Experiment from tensorflow.keras.models import load_model +from tensorflow.keras.callbacks import Callback, TensorBoard +from sacred import Experiment from tqdm import tqdm from sklearn.metrics import f1_score -from tensorflow.keras.callbacks import Callback import numpy as np import cv2 @@ -63,10 +63,11 @@ class SaveWeightsAfterSteps(Callback): def configuration(): - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True - session = tf.compat.v1.Session(config=config) - set_session(session) + try: + for device in tf.config.list_physical_devices('GPU'): + tf.config.experimental.set_memory_growth(device, True) + except: + print("no GPU device available", file=sys.stderr) def get_dirs_or_files(input_data): @@ -171,12 +172,11 @@ def run(_config, n_classes, n_epochs, input_height, else: list_all_possible_foreground_rgbs = None - if task == "segmentation" or task == "enhancement" or task == "binarization": + if task in ["segmentation", "enhancement", "binarization"]: if data_is_provided: dir_train_flowing = os.path.join(dir_output, 'train') dir_eval_flowing = os.path.join(dir_output, 'eval') - dir_flow_train_imgs = os.path.join(dir_train_flowing, 'images') dir_flow_train_labels = os.path.join(dir_train_flowing, 'labels') @@ -227,176 +227,228 @@ def run(_config, n_classes, n_epochs, input_height, segs_list_test=np.array(os.listdir(dir_seg_val)) # writing patches into a sub-folder in order to be flowed from directory. - provide_patches(imgs_list, segs_list, dir_img, dir_seg, dir_flow_train_imgs, - dir_flow_train_labels, input_height, input_width, blur_k, - blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background,adding_rgb_foreground, add_red_textlines, channels_shuffling, - scaling, shifting, degrading, brightening, scales, degrade_scales, brightness, - flip_index,shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization, - rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=augmentation, - patches=patches, dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds, dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs) - - provide_patches(imgs_list_test, segs_list_test, dir_img_val, dir_seg_val, - dir_flow_eval_imgs, dir_flow_eval_labels, input_height, input_width, - blur_k, blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling, - scaling, shifting, degrading, brightening, scales, degrade_scales, brightness, - flip_index, shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization, - rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=patches,dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds,dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs ) + common_args = [input_height, input_width, + blur_k, blur_aug, + padding_white, padding_black, + flip_aug, binarization, + adding_rgb_background, + adding_rgb_foreground, + add_red_textlines, + channels_shuffling, + scaling, shifting, degrading, brightening, + scales, degrade_scales, brightness, + flip_index, shuffle_indexes, + scaling_bluring, scaling_brightness, scaling_binarization, + rotation, rotation_not_90, thetha, + scaling_flip, task, + ] + common_kwargs = dict(patches= + patches, + dir_img_bin= + dir_img_bin, + number_of_backgrounds_per_image= + number_of_backgrounds_per_image, + list_all_possible_background_images= + list_all_possible_background_images, + dir_rgb_backgrounds= + dir_rgb_backgrounds, + dir_rgb_foregrounds= + dir_rgb_foregrounds, + list_all_possible_foreground_rgbs= + list_all_possible_foreground_rgbs, + ) + provide_patches(imgs_list, segs_list, + dir_img, dir_seg, + dir_flow_train_imgs, + dir_flow_train_labels, + *common_args, + augmentation=augmentation, + **common_kwargs) + provide_patches(imgs_list_test, segs_list_test, + dir_img_val, dir_seg_val, + dir_flow_eval_imgs, + dir_flow_eval_labels, + *common_args, + augmentation=False, + **common_kwargs) if weighted_loss: weights = np.zeros(n_classes) if data_is_provided: - for obj in os.listdir(dir_flow_train_labels): - try: - label_obj = cv2.imread(dir_flow_train_labels + '/' + obj) - label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes) - weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0) - except: - pass + dirs = dir_flow_train_labels else: - - for obj in os.listdir(dir_seg): - try: - label_obj = cv2.imread(dir_seg + '/' + obj) - label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes) - weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0) - except: - pass + dirs = dir_seg + for obj in os.listdir(dirs): + label_file = os.path.join(dirs, + obj) + try: + label_obj = cv2.imread(label_file) + label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes) + weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0) + except Exception as e: + print("error reading data file '%s': %s" % (label_file, e), file=sys.stderr) weights = 1.00 / weights - weights = weights / float(np.sum(weights)) weights = weights / float(np.min(weights)) weights = weights / float(np.sum(weights)) if continue_training: - if backbone_type=='nontransformer': - if is_loss_soft_dice and (task == "segmentation" or task == "binarization"): - model = load_model(dir_of_start_model, compile=True, custom_objects={'soft_dice_loss': soft_dice_loss}) - if weighted_loss and (task == "segmentation" or task == "binarization"): - model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)}) - if not is_loss_soft_dice and not weighted_loss: + if backbone_type == 'nontransformer': + if is_loss_soft_dice and task in ["segmentation", "binarization"]: + model = load_model(dir_of_start_model, compile=True, + custom_objects={'soft_dice_loss': soft_dice_loss}) + elif weighted_loss and task in ["segmentation", "binarization"]: + model = load_model(dir_of_start_model, compile=True, + custom_objects={'loss': weighted_categorical_crossentropy(weights)}) + else: model = load_model(dir_of_start_model , compile=True) - elif backbone_type=='transformer': - if is_loss_soft_dice and (task == "segmentation" or task == "binarization"): - model = load_model(dir_of_start_model, compile=True, custom_objects={"PatchEncoder": PatchEncoder, "Patches": Patches,'soft_dice_loss': soft_dice_loss}) - if weighted_loss and (task == "segmentation" or task == "binarization"): - model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)}) - if not is_loss_soft_dice and not weighted_loss: - model = load_model(dir_of_start_model , compile=True,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + + elif backbone_type == 'transformer': + if is_loss_soft_dice and task in ["segmentation", "binarization"]: + model = load_model(dir_of_start_model, compile=True, + custom_objects={"PatchEncoder": PatchEncoder, + "Patches": Patches, + 'soft_dice_loss': soft_dice_loss}) + elif weighted_loss and task in ["segmentation", "binarization"]: + model = load_model(dir_of_start_model, compile=True, + custom_objects={'loss': weighted_categorical_crossentropy(weights)}) + else: + model = load_model(dir_of_start_model, compile=True, + custom_objects = {"PatchEncoder": PatchEncoder, + "Patches": Patches}) else: index_start = 0 - if backbone_type=='nontransformer': - model = resnet50_unet(n_classes, input_height, input_width, task, weight_decay, pretraining) - elif backbone_type=='transformer': + if backbone_type == 'nontransformer': + model = resnet50_unet(n_classes, + input_height, + input_width, + task, + weight_decay, + pretraining) + elif backbone_type == 'transformer': num_patches_x = transformer_num_patches_xy[0] num_patches_y = transformer_num_patches_xy[1] num_patches = num_patches_x * num_patches_y if transformer_cnn_first: - if input_height != (num_patches_y * transformer_patchsize_y * 32): - print("Error: transformer_patchsize_y or transformer_num_patches_xy height value error . input_height should be equal to ( transformer_num_patches_xy height value * transformer_patchsize_y * 32)") - sys.exit(1) - if input_width != (num_patches_x * transformer_patchsize_x * 32): - print("Error: transformer_patchsize_x or transformer_num_patches_xy width value error . input_width should be equal to ( transformer_num_patches_xy width value * transformer_patchsize_x * 32)") - sys.exit(1) - if (transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x)) != 0: - print("Error: transformer_projection_dim error. The remainder when parameter transformer_projection_dim is divided by (transformer_patchsize_y*transformer_patchsize_x) should be zero") - sys.exit(1) - - - model = vit_resnet50_unet(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining) + model_builder = vit_resnet50_unet + multiple_of_32 = True else: - if input_height != (num_patches_y * transformer_patchsize_y): - print("Error: transformer_patchsize_y or transformer_num_patches_xy height value error . input_height should be equal to ( transformer_num_patches_xy height value * transformer_patchsize_y)") - sys.exit(1) - if input_width != (num_patches_x * transformer_patchsize_x): - print("Error: transformer_patchsize_x or transformer_num_patches_xy width value error . input_width should be equal to ( transformer_num_patches_xy width value * transformer_patchsize_x)") - sys.exit(1) - if (transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x)) != 0: - print("Error: transformer_projection_dim error. The remainder when parameter transformer_projection_dim is divided by (transformer_patchsize_y*transformer_patchsize_x) should be zero") - sys.exit(1) - model = vit_resnet50_unet_transformer_before_cnn(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining) + model_builder = vit_resnet50_unet_transformer_before_cnn + multiple_of_32 = False + + assert input_height == num_patches_y * transformer_patchsize_y * (32 if multiple_of_32 else 1), \ + "transformer_patchsize_y or transformer_num_patches_xy height value error: " \ + "input_height should be equal to " \ + "(transformer_num_patches_xy height value * transformer_patchsize_y%s)" % \ + " * 32" if multiple_of_32 else "" + assert input_width == num_patches_x * transformer_patchsize_x * (32 if multiple_of_32 else 1), \ + "transformer_patchsize_x or transformer_num_patches_xy width value error: " \ + "input_width should be equal to " \ + "(transformer_num_patches_xy width value * transformer_patchsize_x%s)" % \ + " * 32" if multiple_of_32 else "" + assert 0 == transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x), \ + "transformer_projection_dim error: " \ + "The remainder when parameter transformer_projection_dim is divided by " \ + "(transformer_patchsize_y*transformer_patchsize_x) should be zero" + + model = model_builder( + n_classes, + transformer_patchsize_x, + transformer_patchsize_y, + num_patches, + transformer_mlp_head_units, + transformer_layers, + transformer_num_heads, + transformer_projection_dim, + input_height, + input_width, + task, + weight_decay, + pretraining) #if you want to see the model structure just uncomment model summary. model.summary() - - if task == "segmentation" or task == "binarization": - if not is_loss_soft_dice and not weighted_loss: - model.compile(loss='categorical_crossentropy', - optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy']) + if task in ["segmentation", "binarization"]: if is_loss_soft_dice: - model.compile(loss=soft_dice_loss, - optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy']) - if weighted_loss: - model.compile(loss=weighted_categorical_crossentropy(weights), - optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy']) - elif task == "enhancement": - model.compile(loss='mean_squared_error', - optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy']) - + loss = soft_dice_loss + elif weighted_loss: + loss = weighted_categorical_crossentropy(weights) + else: + loss = 'categorical_crossentropy' + else: # task == "enhancement" + loss = 'mean_squared_error' + model.compile(loss=loss, + optimizer=Adam(learning_rate=learning_rate), + metrics=['accuracy']) # generating train and evaluation data - train_gen = data_gen(dir_flow_train_imgs, dir_flow_train_labels, batch_size=n_batch, - input_height=input_height, input_width=input_width, n_classes=n_classes, task=task) - val_gen = data_gen(dir_flow_eval_imgs, dir_flow_eval_labels, batch_size=n_batch, - input_height=input_height, input_width=input_width, n_classes=n_classes, task=task) - + gen_kwargs = dict(batch_size=n_batch, + input_height=input_height, + input_width=input_width, + n_classes=n_classes, + task=task) + train_gen = data_gen(dir_flow_train_imgs, dir_flow_train_labels, **gen_kwargs) + val_gen = data_gen(dir_flow_eval_imgs, dir_flow_eval_labels, **gen_kwargs) + ##img_validation_patches = os.listdir(dir_flow_eval_imgs) ##score_best=[] ##score_best.append(0) + callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)] if save_interval: - save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) - + callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config)) for i in tqdm(range(index_start, n_epochs + index_start)): - if save_interval: - model.fit( - train_gen, - steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1, - validation_data=val_gen, - validation_steps=1, - epochs=1, callbacks=[save_weights_callback]) - else: - model.fit( - train_gen, - steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1, - validation_data=val_gen, - validation_steps=1, - epochs=1) - - model.save(os.path.join(dir_output,'model_'+str(i))) - - with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp: + model.fit( + train_gen, + steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1, + validation_data=val_gen, + validation_steps=1, + epochs=1, + callbacks=callbacks) + + dir_model = os.path.join(dir_output, 'model_' + str(i)) + model.save(dir_model) + with open(os.path.join(dir_model, "config.json"), "w") as fp: json.dump(_config, fp) # encode dict into JSON #os.system('rm -rf '+dir_train_flowing) #os.system('rm -rf '+dir_eval_flowing) #model.save(dir_output+'/'+'model'+'.h5') + elif task=='classification': configuration() - model = resnet50_classifier(n_classes, input_height, input_width, weight_decay, pretraining) + model = resnet50_classifier(n_classes, + input_height, + input_width, + weight_decay, + pretraining) - opt_adam = Adam(learning_rate=0.001) model.compile(loss='categorical_crossentropy', - optimizer = opt_adam,metrics=['accuracy']) - + optimizer=Adam(learning_rate=0.001), # rs: why not learning_rate? + metrics=['accuracy']) list_classes = list(classification_classes_name.values()) - testX, testY = generate_data_from_folder_evaluation(dir_eval, input_height, input_width, n_classes, list_classes) - - y_tot=np.zeros((testX.shape[0],n_classes)) + trainXY = generate_data_from_folder_training( + dir_train, n_batch, input_height, input_width, n_classes, list_classes) + testX, testY = generate_data_from_folder_evaluation( + dir_eval, input_height, input_width, n_classes, list_classes) + y_tot = np.zeros((testX.shape[0], n_classes)) score_best= [0] - num_rows = return_number_of_total_training_data(dir_train) weights=[] + callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)] for i in range(n_epochs): - history = model.fit( generate_data_from_folder_training(dir_train, n_batch , input_height, input_width, n_classes, list_classes), steps_per_epoch=num_rows / n_batch, verbose=1)#,class_weight=weights) - + history = model.fit(trainXY, + steps_per_epoch=num_rows / n_batch, + #class_weight=weights) + verbose=1, + callbacks=callbacks) y_pr_class = [] for jj in range(testY.shape[0]): y_pr=model.predict(testX[jj,:,:,:].reshape(1,input_height,input_width,3), verbose=0) @@ -433,7 +485,8 @@ def run(_config, n_classes, n_epochs, input_height, elif task=='reading_order': configuration() - model = machine_based_reading_order_model(n_classes,input_height,input_width,weight_decay,pretraining) + model = machine_based_reading_order_model( + n_classes, input_height, input_width, weight_decay, pretraining) dir_flow_train_imgs = os.path.join(dir_train, 'images') dir_flow_train_labels = os.path.join(dir_train, 'labels') @@ -447,20 +500,26 @@ def run(_config, n_classes, n_epochs, input_height, #f1score_tot = [0] indexer_start = 0 - # opt = SGD(learning_rate=0.01, momentum=0.9) - opt_adam = tf.keras.optimizers.Adam(learning_rate=0.0001) model.compile(loss="binary_crossentropy", - optimizer = opt_adam,metrics=['accuracy']) + #optimizer=SGD(learning_rate=0.01, momentum=0.9), + optimizer=Adam(learning_rate=0.0001), # rs: why not learning_rate? + metrics=['accuracy']) + callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)] if save_interval: - save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) - + callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config)) + + trainXY = generate_arrays_from_folder_reading_order( + dir_flow_train_labels, dir_flow_train_imgs, + n_batch, input_height, input_width, n_classes, + thetha, augmentation) + for i in range(n_epochs): - if save_interval: - history = model.fit(generate_arrays_from_folder_reading_order(dir_flow_train_labels, dir_flow_train_imgs, n_batch, input_height, input_width, n_classes, thetha, augmentation), steps_per_epoch=num_rows / n_batch, verbose=1, callbacks=[save_weights_callback]) - else: - history = model.fit(generate_arrays_from_folder_reading_order(dir_flow_train_labels, dir_flow_train_imgs, n_batch, input_height, input_width, n_classes, thetha, augmentation), steps_per_epoch=num_rows / n_batch, verbose=1) - model.save( os.path.join(dir_output,'model_'+str(i+indexer_start) )) + history = model.fit(trainXY, + steps_per_epoch=num_rows / n_batch, + verbose=1, + callbacks=callbacks) + model.save(os.path.join(dir_output, 'model_'+str(i+indexer_start) )) with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp: json.dump(_config, fp) # encode dict into JSON diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 7d01e74..c8caca9 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -12,7 +12,6 @@ from shapely import set_precision from shapely.ops import unary_union, nearest_points from .rotate import rotate_image, rotation_image_new -from . import ensure_array def contours_in_same_horizon(cy_main_hor): """ @@ -249,12 +248,14 @@ def return_contours_of_image(image): return contours, hierarchy def dilate_textline_contours(all_found_textline_polygons): + from . import ensure_array return [ensure_array( [polygon2contour(contour2polygon(contour, dilate=6)) for contour in region]) for region in all_found_textline_polygons] def dilate_textregion_contours(all_found_textregion_polygons): + from . import ensure_array return ensure_array( [polygon2contour(contour2polygon(contour, dilate=6)) for contour in all_found_textregion_polygons])