drop TF1 vernacular, relax TF/Keras and Torch requirements…

- do not restrict TF version, but depend on tf-keras and
  set `TF_USE_LEGACY_KERAS=1` to avoid Keras 3 behaviour
- relax Numpy version requirement up to v2
- relax Torch version requirement
- drop TF1 session management code
- drop TF1 config in favour of TF2 config code for memory growth
- training.*: also simplify and limit line length
- training.train: always train with TensorBoard callback
This commit is contained in:
Robert Sachunsky 2026-01-20 04:18:55 +01:00
parent e2754da4f5
commit 3c3effcfda
8 changed files with 289 additions and 294 deletions

View file

@ -1,2 +1,2 @@
torch <= 2.0.1 torch
transformers <= 4.30.2 transformers <= 4.30.2

View file

@ -1,8 +1,9 @@
# ocrd includes opencv, numpy, shapely, click # ocrd includes opencv, numpy, shapely, click
ocrd >= 3.3.0 ocrd >= 3.3.0
numpy <1.24.0 numpy < 2.0
scikit-learn >= 0.23.2 scikit-learn >= 0.23.2
tensorflow < 2.13 tensorflow
tf-keras # avoid keras 3 (also needs TF_USE_LEGACY_KERAS=1)
numba <= 0.58.1 numba <= 0.58.1
scikit-image scikit-image
biopython biopython

View file

@ -56,14 +56,12 @@ except ImportError:
TrOCRProcessor = VisionEncoderDecoderModel = None TrOCRProcessor = VisionEncoderDecoderModel = None
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
tf_disable_interactive_logs() tf_disable_interactive_logs()
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
tf.get_logger().setLevel("ERROR") tf.get_logger().setLevel("ERROR")
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
# use tf1 compatibility for keras backend
from tensorflow.compat.v1.keras.backend import set_session
from tensorflow.keras import layers from tensorflow.keras import layers
from tensorflow.keras.layers import StringLookup from tensorflow.keras.layers import StringLookup
@ -277,14 +275,6 @@ class Eynollah:
t_start = time.time() t_start = time.time()
# #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
# #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
# #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
# config = tf.compat.v1.ConfigProto()
# config.gpu_options.allow_growth = True
# #session = tf.InteractiveSession()
# session = tf.compat.v1.Session(config=config)
# set_session(session)
try: try:
for device in tf.config.list_physical_devices('GPU'): for device in tf.config.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(device, True) tf.config.experimental.set_memory_growth(device, True)

View file

@ -2,19 +2,19 @@
Tool to load model and binarize a given image. Tool to load model and binarize a given image.
""" """
import sys
from glob import glob from glob import glob
import os import os
import logging import logging
from PIL import Image
import numpy as np import numpy as np
from PIL import Image
import cv2 import cv2
from ocrd_utils import tf_disable_interactive_logs from ocrd_utils import tf_disable_interactive_logs
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
tf_disable_interactive_logs() tf_disable_interactive_logs()
import tensorflow as tf import tensorflow as tf
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from tensorflow.python.keras import backend as tensorflow_backend
from .utils import is_image_filename from .utils import is_image_filename
@ -27,26 +27,17 @@ class SbbBinarizer:
self.model_dir = model_dir self.model_dir = model_dir
self.logger = logger if logger else logging.getLogger('SbbBinarizer') self.logger = logger if logger else logging.getLogger('SbbBinarizer')
self.start_new_session() try:
for device in tf.config.list_physical_devices('GPU'):
self.model_files = glob(self.model_dir+"/*/", recursive = True) tf.config.experimental.set_memory_growth(device, True)
except:
self.logger.warning("no GPU device available")
self.model_files = glob(self.model_dir + "/*/", recursive=True)
self.models = [] self.models = []
for model_file in self.model_files: for model_file in self.model_files:
self.models.append(self.load_model(model_file)) self.models.append(self.load_model(model_file))
def start_new_session(self):
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession()
tensorflow_backend.set_session(self.session)
def end_session(self):
tensorflow_backend.clear_session()
self.session.close()
del self.session
def load_model(self, model_name): def load_model(self, model_name):
model = load_model(os.path.join(self.model_dir, model_name), compile=False) model = load_model(os.path.join(self.model_dir, model_name), compile=False)
model_height = model.layers[len(model.layers)-1].output_shape[1] model_height = model.layers[len(model.layers)-1].output_shape[1]
@ -55,7 +46,6 @@ class SbbBinarizer:
return model, model_height, model_width, n_classes return model, model_height, model_width, n_classes
def predict(self, model_in, img, use_patches, n_batch_inference=5): def predict(self, model_in, img, use_patches, n_batch_inference=5):
tensorflow_backend.set_session(self.session)
model, model_height, model_width, n_classes = model_in model, model_height, model_width, n_classes = model_in
img_org_h = img.shape[0] img_org_h = img.shape[0]

View file

@ -1,3 +1,4 @@
import sys
import click import click
import tensorflow as tf import tensorflow as tf
@ -5,8 +6,11 @@ from .models import resnet50_unet
def configuration(): def configuration():
gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) try:
session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) for device in tf.config.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(device, True)
except:
print("no GPU device available", file=sys.stderr)
@click.command() @click.command()
def build_model_load_pretrained_weights_and_save(): def build_model_load_pretrained_weights_and_save():

View file

@ -1,16 +1,19 @@
"""
Tool to load model and predict for given image.
"""
import sys import sys
import os import os
import warnings import warnings
import json import json
import click
import numpy as np import numpy as np
import cv2 import cv2
from tensorflow.keras.models import load_model
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
import tensorflow as tf import tensorflow as tf
from tensorflow.keras import backend as K from tensorflow.keras.models import load_model
from tensorflow.keras.layers import *
import click
from tensorflow.python.keras import backend as tensorflow_backend
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from .gt_gen_utils import ( from .gt_gen_utils import (
@ -24,17 +27,29 @@ from .models import (
PatchEncoder, PatchEncoder,
Patches Patches
) )
from .metrics import (
soft_dice_loss,
weighted_categorical_crossentropy,
)
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
__doc__=\ class SBBPredict:
""" def __init__(self,
Tool to load model and predict for given image. image,
""" dir_in,
model,
task,
config_params_model,
patches,
save,
save_layout,
ground_truth,
xml_file,
out,
min_area):
class sbb_predict:
def __init__(self,image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area):
self.image=image self.image=image
self.dir_in=dir_in self.dir_in=dir_in
self.patches=patches self.patches=patches
@ -52,8 +67,9 @@ class sbb_predict:
self.min_area = 0 self.min_area = 0
def resize_image(self,img_in,input_height,input_width): def resize_image(self,img_in,input_height,input_width):
return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST) return cv2.resize(img_in, (input_width,
input_height),
interpolation=cv2.INTER_NEAREST)
def color_images(self,seg): def color_images(self,seg):
ann_u=range(self.n_classes) ann_u=range(self.n_classes)
@ -69,68 +85,6 @@ class sbb_predict:
seg_img[:,:,2][seg==c]=c seg_img[:,:,2][seg==c]=c
return seg_img return seg_img
def otsu_copy_binary(self,img):
img_r=np.zeros((img.shape[0],img.shape[1],3))
img1=img[:,:,0]
#print(img.min())
#print(img[:,:,0].min())
#blur = cv2.GaussianBlur(img,(5,5))
#ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
img_r[:,:,0]=threshold1
img_r[:,:,1]=threshold1
img_r[:,:,2]=threshold1
#img_r=img_r/float(np.max(img_r))*255
return img_r
def otsu_copy(self,img):
img_r=np.zeros((img.shape[0],img.shape[1],3))
#img1=img[:,:,0]
#print(img.min())
#print(img[:,:,0].min())
#blur = cv2.GaussianBlur(img,(5,5))
#ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
_, threshold1 = cv2.threshold(img[:,:,0], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
_, threshold2 = cv2.threshold(img[:,:,1], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
_, threshold3 = cv2.threshold(img[:,:,2], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
img_r[:,:,0]=threshold1
img_r[:,:,1]=threshold2
img_r[:,:,2]=threshold3
###img_r=img_r/float(np.max(img_r))*255
return img_r
def soft_dice_loss(self,y_true, y_pred, epsilon=1e-6):
axes = tuple(range(1, len(y_pred.shape)-1))
numerator = 2. * K.sum(y_pred * y_true, axes)
denominator = K.sum(K.square(y_pred) + K.square(y_true), axes)
return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch
def weighted_categorical_crossentropy(self,weights=None):
def loss(y_true, y_pred):
labels_floats = tf.cast(y_true, tf.float32)
per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred)
if weights is not None:
weight_mask = tf.maximum(tf.reduce_max(tf.constant(
np.array(weights, dtype=np.float32)[None, None, None])
* labels_floats, axis=-1), 1.0)
per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None]
return tf.reduce_mean(per_pixel_loss)
return self.loss
def IoU(self,Yi,y_predi): def IoU(self,Yi,y_predi):
## mean Intersection over Union ## mean Intersection over Union
## Mean IoU = TP/(FN + TP + FP) ## Mean IoU = TP/(FN + TP + FP)
@ -157,30 +111,28 @@ class sbb_predict:
return mIoU return mIoU
def start_new_session_and_model(self): def start_new_session_and_model(self):
try:
config = tf.compat.v1.ConfigProto() for device in tf.config.list_physical_devices('GPU'):
config.gpu_options.allow_growth = True tf.config.experimental.set_memory_growth(device, True)
except:
print("no GPU device available", file=sys.stderr)
session = tf.compat.v1.Session(config=config) # tf.InteractiveSession()
tensorflow_backend.set_session(session)
#tensorflow.keras.layers.custom_layer = PatchEncoder #tensorflow.keras.layers.custom_layer = PatchEncoder
#tensorflow.keras.layers.custom_layer = Patches #tensorflow.keras.layers.custom_layer = Patches
self.model = load_model(self.model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) self.model = load_model(self.model_dir, compile=False,
#config = tf.ConfigProto() custom_objects={"PatchEncoder": PatchEncoder,
#config.gpu_options.allow_growth=True "Patches": Patches})
#keras.losses.custom_loss = weighted_categorical_crossentropy
#self.session = tf.InteractiveSession() #self.model = load_model(self.model_dir, compile=False)
#keras.losses.custom_loss = self.weighted_categorical_crossentropy
#self.model = load_model(self.model_dir , compile=False)
##if self.weights_dir!=None: ##if self.weights_dir!=None:
##self.model.load_weights(self.weights_dir) ##self.model.load_weights(self.weights_dir)
if self.task != 'classification' and self.task != 'reading_order': if self.task != 'classification' and self.task != 'reading_order':
self.img_height=self.model.layers[len(self.model.layers)-1].output_shape[1] last = self.model.layers[-1]
self.img_width=self.model.layers[len(self.model.layers)-1].output_shape[2] self.img_height = last.output_shape[1]
self.n_classes=self.model.layers[len(self.model.layers)-1].output_shape[3] self.img_width = last.output_shape[2]
self.n_classes = last.output_shape[3]
def visualize_model_output(self, prediction, img, task): def visualize_model_output(self, prediction, img, task):
if task == "binarization": if task == "binarization":
@ -208,21 +160,16 @@ class sbb_predict:
'15' : [255, 0, 255]} '15' : [255, 0, 255]}
layout_only = np.zeros(prediction.shape) layout_only = np.zeros(prediction.shape)
for unq_class in unique_classes: for unq_class in unique_classes:
where = prediction[:,:,0]==unq_class
rgb_class_unique = rgb_colors[str(int(unq_class))] rgb_class_unique = rgb_colors[str(int(unq_class))]
layout_only[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0] layout_only[:,:,0][where] = rgb_class_unique[0]
layout_only[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1] layout_only[:,:,1][where] = rgb_class_unique[1]
layout_only[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2] layout_only[:,:,2][where] = rgb_class_unique[2]
layout_only = layout_only.astype(np.int32)
img = self.resize_image(img, layout_only.shape[0], layout_only.shape[1]) img = self.resize_image(img, layout_only.shape[0], layout_only.shape[1])
layout_only = layout_only.astype(np.int32)
img = img.astype(np.int32) img = img.astype(np.int32)
added_image = cv2.addWeighted(img,0.5,layout_only,0.1,0) added_image = cv2.addWeighted(img,0.5,layout_only,0.1,0)
@ -231,10 +178,10 @@ class sbb_predict:
def predict(self, image_dir): def predict(self, image_dir):
if self.task == 'classification': if self.task == 'classification':
classes_names = self.config_params_model['classification_classes_name'] classes_names = self.config_params_model['classification_classes_name']
img_1ch = img=cv2.imread(image_dir, 0) img_1ch = cv2.imread(image_dir, 0) / 255.0
img_1ch = cv2.resize(img_1ch, (self.config_params_model['input_height'],
img_1ch = img_1ch / 255.0 self.config_params_model['input_width']),
img_1ch = cv2.resize(img_1ch, (self.config_params_model['input_height'], self.config_params_model['input_width']), interpolation=cv2.INTER_NEAREST) interpolation=cv2.INTER_NEAREST)
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 0] = img_1ch[:, :]
img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :]
@ -244,23 +191,27 @@ class sbb_predict:
index_class = np.argmax(label_p_pred[0]) index_class = np.argmax(label_p_pred[0])
print("Predicted Class: {}".format(classes_names[str(int(index_class))])) print("Predicted Class: {}".format(classes_names[str(int(index_class))]))
elif self.task == 'reading_order': elif self.task == 'reading_order':
img_height = self.config_params_model['input_height'] img_height = self.config_params_model['input_height']
img_width = self.config_params_model['input_width'] img_width = self.config_params_model['input_width']
tree_xml, root_xml, bb_coord_printspace, file_name, id_paragraph, id_header, co_text_paragraph, co_text_header, tot_region_ref, x_len, y_len, index_tot_regions, img_poly = read_xml(self.xml_file) tree_xml, root_xml, bb_coord_printspace, file_name, \
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_header) id_paragraph, id_header, \
co_text_paragraph, co_text_header, \
tot_region_ref, x_len, y_len, index_tot_regions, \
img_poly = read_xml(self.xml_file)
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = \
find_new_features_of_contours(co_text_header)
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
for j in range(len(cy_main)): for j in range(len(cy_main)):
img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 img_header_and_sep[int(y_max_main[j]): int(y_max_main[j]) + 12,
int(x_min_main[j]): int(x_max_main[j])] = 1
co_text_all = co_text_paragraph + co_text_header co_text_all = co_text_paragraph + co_text_header
id_all_text = id_paragraph + id_header id_all_text = id_paragraph + id_header
##texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] ##texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ]
##texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] ##texts_corr_order_index_int = [int(x) for x in texts_corr_order_index]
texts_corr_order_index_int = list(np.array(range(len(co_text_all)))) texts_corr_order_index_int = list(np.array(range(len(co_text_all))))
@ -271,7 +222,8 @@ class sbb_predict:
#print(np.shape(co_text_all[0]), len( np.shape(co_text_all[0]) ),'co_text_all') #print(np.shape(co_text_all[0]), len( np.shape(co_text_all[0]) ),'co_text_all')
#co_text_all = filter_contours_area_of_image_tables(img_poly, co_text_all, _, max_area, min_area) #co_text_all = filter_contours_area_of_image_tables(img_poly, co_text_all, _, max_area, min_area)
#print(co_text_all,'co_text_all') #print(co_text_all,'co_text_all')
co_text_all, texts_corr_order_index_int, _ = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, self.min_area) co_text_all, texts_corr_order_index_int, _ = filter_contours_area_of_image(
img_poly, co_text_all, texts_corr_order_index_int, max_area, self.min_area)
#print(texts_corr_order_index_int) #print(texts_corr_order_index_int)
@ -664,17 +616,15 @@ class sbb_predict:
help="min area size of regions considered for reading order detection. The default value is zero and means that all text regions are considered for reading order.", help="min area size of regions considered for reading order detection. The default value is zero and means that all text regions are considered for reading order.",
) )
def main(image, dir_in, model, patches, save, save_layout, ground_truth, xml_file, out, min_area): def main(image, dir_in, model, patches, save, save_layout, ground_truth, xml_file, out, min_area):
assert image or dir_in, "Either a single image -i or a dir_in -di is required" assert image or dir_in, "Either a single image -i or a dir_in -di input is required"
with open(os.path.join(model,'config.json')) as f: with open(os.path.join(model,'config.json')) as f:
config_params_model = json.load(f) config_params_model = json.load(f)
task = config_params_model['task'] task = config_params_model['task']
if task != 'classification' and task != 'reading_order': if task != 'classification' and task != 'reading_order':
if image and not save: assert not image or save, "For segmentation or binarization, an input single image -i also requires an output filename -s"
print("Error: You used one of segmentation or binarization task with image input but not set -s, you need a filename to save visualized output with -s") assert not dir_in or out, "For segmentation or binarization, an input directory -di also requires an output directory -o"
sys.exit(1) x = SBBPredict(image, dir_in, model, task, config_params_model,
if dir_in and not out: patches, save, save_layout, ground_truth, xml_file, out,
print("Error: You used one of segmentation or binarization task with dir_in but not set -out") min_area)
sys.exit(1)
x=sbb_predict(image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area)
x.run() x.run()

View file

@ -28,14 +28,14 @@ from eynollah.training.utils import (
) )
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
import tensorflow as tf import tensorflow as tf
from tensorflow.compat.v1.keras.backend import set_session
from tensorflow.keras.optimizers import SGD, Adam from tensorflow.keras.optimizers import SGD, Adam
from sacred import Experiment
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import Callback, TensorBoard
from sacred import Experiment
from tqdm import tqdm from tqdm import tqdm
from sklearn.metrics import f1_score from sklearn.metrics import f1_score
from tensorflow.keras.callbacks import Callback
import numpy as np import numpy as np
import cv2 import cv2
@ -63,10 +63,11 @@ class SaveWeightsAfterSteps(Callback):
def configuration(): def configuration():
config = tf.compat.v1.ConfigProto() try:
config.gpu_options.allow_growth = True for device in tf.config.list_physical_devices('GPU'):
session = tf.compat.v1.Session(config=config) tf.config.experimental.set_memory_growth(device, True)
set_session(session) except:
print("no GPU device available", file=sys.stderr)
def get_dirs_or_files(input_data): def get_dirs_or_files(input_data):
@ -171,12 +172,11 @@ def run(_config, n_classes, n_epochs, input_height,
else: else:
list_all_possible_foreground_rgbs = None list_all_possible_foreground_rgbs = None
if task == "segmentation" or task == "enhancement" or task == "binarization": if task in ["segmentation", "enhancement", "binarization"]:
if data_is_provided: if data_is_provided:
dir_train_flowing = os.path.join(dir_output, 'train') dir_train_flowing = os.path.join(dir_output, 'train')
dir_eval_flowing = os.path.join(dir_output, 'eval') dir_eval_flowing = os.path.join(dir_output, 'eval')
dir_flow_train_imgs = os.path.join(dir_train_flowing, 'images') dir_flow_train_imgs = os.path.join(dir_train_flowing, 'images')
dir_flow_train_labels = os.path.join(dir_train_flowing, 'labels') dir_flow_train_labels = os.path.join(dir_train_flowing, 'labels')
@ -227,176 +227,228 @@ def run(_config, n_classes, n_epochs, input_height,
segs_list_test=np.array(os.listdir(dir_seg_val)) segs_list_test=np.array(os.listdir(dir_seg_val))
# writing patches into a sub-folder in order to be flowed from directory. # writing patches into a sub-folder in order to be flowed from directory.
provide_patches(imgs_list, segs_list, dir_img, dir_seg, dir_flow_train_imgs, common_args = [input_height, input_width,
dir_flow_train_labels, input_height, input_width, blur_k, blur_k, blur_aug,
blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background,adding_rgb_foreground, add_red_textlines, channels_shuffling, padding_white, padding_black,
scaling, shifting, degrading, brightening, scales, degrade_scales, brightness, flip_aug, binarization,
flip_index,shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization, adding_rgb_background,
rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=augmentation, adding_rgb_foreground,
patches=patches, dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds, dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs) add_red_textlines,
channels_shuffling,
provide_patches(imgs_list_test, segs_list_test, dir_img_val, dir_seg_val, scaling, shifting, degrading, brightening,
dir_flow_eval_imgs, dir_flow_eval_labels, input_height, input_width, scales, degrade_scales, brightness,
blur_k, blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling, flip_index, shuffle_indexes,
scaling, shifting, degrading, brightening, scales, degrade_scales, brightness, scaling_bluring, scaling_brightness, scaling_binarization,
flip_index, shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization, rotation, rotation_not_90, thetha,
rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=patches,dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds,dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs ) scaling_flip, task,
]
common_kwargs = dict(patches=
patches,
dir_img_bin=
dir_img_bin,
number_of_backgrounds_per_image=
number_of_backgrounds_per_image,
list_all_possible_background_images=
list_all_possible_background_images,
dir_rgb_backgrounds=
dir_rgb_backgrounds,
dir_rgb_foregrounds=
dir_rgb_foregrounds,
list_all_possible_foreground_rgbs=
list_all_possible_foreground_rgbs,
)
provide_patches(imgs_list, segs_list,
dir_img, dir_seg,
dir_flow_train_imgs,
dir_flow_train_labels,
*common_args,
augmentation=augmentation,
**common_kwargs)
provide_patches(imgs_list_test, segs_list_test,
dir_img_val, dir_seg_val,
dir_flow_eval_imgs,
dir_flow_eval_labels,
*common_args,
augmentation=False,
**common_kwargs)
if weighted_loss: if weighted_loss:
weights = np.zeros(n_classes) weights = np.zeros(n_classes)
if data_is_provided: if data_is_provided:
for obj in os.listdir(dir_flow_train_labels): dirs = dir_flow_train_labels
try:
label_obj = cv2.imread(dir_flow_train_labels + '/' + obj)
label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes)
weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0)
except:
pass
else: else:
dirs = dir_seg
for obj in os.listdir(dir_seg): for obj in os.listdir(dirs):
try: label_file = os.path.join(dirs, + obj)
label_obj = cv2.imread(dir_seg + '/' + obj) try:
label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes) label_obj = cv2.imread(label_file)
weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0) label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes)
except: weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0)
pass except Exception as e:
print("error reading data file '%s': %s" % (label_file, e), file=sys.stderr)
weights = 1.00 / weights weights = 1.00 / weights
weights = weights / float(np.sum(weights)) weights = weights / float(np.sum(weights))
weights = weights / float(np.min(weights)) weights = weights / float(np.min(weights))
weights = weights / float(np.sum(weights)) weights = weights / float(np.sum(weights))
if continue_training: if continue_training:
if backbone_type=='nontransformer': if backbone_type == 'nontransformer':
if is_loss_soft_dice and (task == "segmentation" or task == "binarization"): if is_loss_soft_dice and task in ["segmentation", "binarization"]:
model = load_model(dir_of_start_model, compile=True, custom_objects={'soft_dice_loss': soft_dice_loss}) model = load_model(dir_of_start_model, compile=True,
if weighted_loss and (task == "segmentation" or task == "binarization"): custom_objects={'soft_dice_loss': soft_dice_loss})
model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)}) elif weighted_loss and task in ["segmentation", "binarization"]:
if not is_loss_soft_dice and not weighted_loss: model = load_model(dir_of_start_model, compile=True,
custom_objects={'loss': weighted_categorical_crossentropy(weights)})
else:
model = load_model(dir_of_start_model , compile=True) model = load_model(dir_of_start_model , compile=True)
elif backbone_type=='transformer':
if is_loss_soft_dice and (task == "segmentation" or task == "binarization"): elif backbone_type == 'transformer':
model = load_model(dir_of_start_model, compile=True, custom_objects={"PatchEncoder": PatchEncoder, "Patches": Patches,'soft_dice_loss': soft_dice_loss}) if is_loss_soft_dice and task in ["segmentation", "binarization"]:
if weighted_loss and (task == "segmentation" or task == "binarization"): model = load_model(dir_of_start_model, compile=True,
model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)}) custom_objects={"PatchEncoder": PatchEncoder,
if not is_loss_soft_dice and not weighted_loss: "Patches": Patches,
model = load_model(dir_of_start_model , compile=True,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) 'soft_dice_loss': soft_dice_loss})
elif weighted_loss and task in ["segmentation", "binarization"]:
model = load_model(dir_of_start_model, compile=True,
custom_objects={'loss': weighted_categorical_crossentropy(weights)})
else:
model = load_model(dir_of_start_model, compile=True,
custom_objects = {"PatchEncoder": PatchEncoder,
"Patches": Patches})
else: else:
index_start = 0 index_start = 0
if backbone_type=='nontransformer': if backbone_type == 'nontransformer':
model = resnet50_unet(n_classes, input_height, input_width, task, weight_decay, pretraining) model = resnet50_unet(n_classes,
elif backbone_type=='transformer': input_height,
input_width,
task,
weight_decay,
pretraining)
elif backbone_type == 'transformer':
num_patches_x = transformer_num_patches_xy[0] num_patches_x = transformer_num_patches_xy[0]
num_patches_y = transformer_num_patches_xy[1] num_patches_y = transformer_num_patches_xy[1]
num_patches = num_patches_x * num_patches_y num_patches = num_patches_x * num_patches_y
if transformer_cnn_first: if transformer_cnn_first:
if input_height != (num_patches_y * transformer_patchsize_y * 32): model_builder = vit_resnet50_unet
print("Error: transformer_patchsize_y or transformer_num_patches_xy height value error . input_height should be equal to ( transformer_num_patches_xy height value * transformer_patchsize_y * 32)") multiple_of_32 = True
sys.exit(1)
if input_width != (num_patches_x * transformer_patchsize_x * 32):
print("Error: transformer_patchsize_x or transformer_num_patches_xy width value error . input_width should be equal to ( transformer_num_patches_xy width value * transformer_patchsize_x * 32)")
sys.exit(1)
if (transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x)) != 0:
print("Error: transformer_projection_dim error. The remainder when parameter transformer_projection_dim is divided by (transformer_patchsize_y*transformer_patchsize_x) should be zero")
sys.exit(1)
model = vit_resnet50_unet(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining)
else: else:
if input_height != (num_patches_y * transformer_patchsize_y): model_builder = vit_resnet50_unet_transformer_before_cnn
print("Error: transformer_patchsize_y or transformer_num_patches_xy height value error . input_height should be equal to ( transformer_num_patches_xy height value * transformer_patchsize_y)") multiple_of_32 = False
sys.exit(1)
if input_width != (num_patches_x * transformer_patchsize_x): assert input_height == num_patches_y * transformer_patchsize_y * (32 if multiple_of_32 else 1), \
print("Error: transformer_patchsize_x or transformer_num_patches_xy width value error . input_width should be equal to ( transformer_num_patches_xy width value * transformer_patchsize_x)") "transformer_patchsize_y or transformer_num_patches_xy height value error: " \
sys.exit(1) "input_height should be equal to " \
if (transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x)) != 0: "(transformer_num_patches_xy height value * transformer_patchsize_y%s)" % \
print("Error: transformer_projection_dim error. The remainder when parameter transformer_projection_dim is divided by (transformer_patchsize_y*transformer_patchsize_x) should be zero") " * 32" if multiple_of_32 else ""
sys.exit(1) assert input_width == num_patches_x * transformer_patchsize_x * (32 if multiple_of_32 else 1), \
model = vit_resnet50_unet_transformer_before_cnn(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining) "transformer_patchsize_x or transformer_num_patches_xy width value error: " \
"input_width should be equal to " \
"(transformer_num_patches_xy width value * transformer_patchsize_x%s)" % \
" * 32" if multiple_of_32 else ""
assert 0 == transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x), \
"transformer_projection_dim error: " \
"The remainder when parameter transformer_projection_dim is divided by " \
"(transformer_patchsize_y*transformer_patchsize_x) should be zero"
model = model_builder(
n_classes,
transformer_patchsize_x,
transformer_patchsize_y,
num_patches,
transformer_mlp_head_units,
transformer_layers,
transformer_num_heads,
transformer_projection_dim,
input_height,
input_width,
task,
weight_decay,
pretraining)
#if you want to see the model structure just uncomment model summary. #if you want to see the model structure just uncomment model summary.
model.summary() model.summary()
if task == "segmentation" or task == "binarization": if task in ["segmentation", "binarization"]:
if not is_loss_soft_dice and not weighted_loss:
model.compile(loss='categorical_crossentropy',
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])
if is_loss_soft_dice: if is_loss_soft_dice:
model.compile(loss=soft_dice_loss, loss = soft_dice_loss
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy']) elif weighted_loss:
if weighted_loss: loss = weighted_categorical_crossentropy(weights)
model.compile(loss=weighted_categorical_crossentropy(weights), else:
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy']) loss = 'categorical_crossentropy'
elif task == "enhancement": else: # task == "enhancement"
model.compile(loss='mean_squared_error', loss = 'mean_squared_error'
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy']) model.compile(loss=loss,
optimizer=Adam(learning_rate=learning_rate),
metrics=['accuracy'])
# generating train and evaluation data # generating train and evaluation data
train_gen = data_gen(dir_flow_train_imgs, dir_flow_train_labels, batch_size=n_batch, gen_kwargs = dict(batch_size=n_batch,
input_height=input_height, input_width=input_width, n_classes=n_classes, task=task) input_height=input_height,
val_gen = data_gen(dir_flow_eval_imgs, dir_flow_eval_labels, batch_size=n_batch, input_width=input_width,
input_height=input_height, input_width=input_width, n_classes=n_classes, task=task) n_classes=n_classes,
task=task)
train_gen = data_gen(dir_flow_train_imgs, dir_flow_train_labels, **gen_kwargs)
val_gen = data_gen(dir_flow_eval_imgs, dir_flow_eval_labels, **gen_kwargs)
##img_validation_patches = os.listdir(dir_flow_eval_imgs) ##img_validation_patches = os.listdir(dir_flow_eval_imgs)
##score_best=[] ##score_best=[]
##score_best.append(0) ##score_best.append(0)
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)]
if save_interval: if save_interval:
save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config))
for i in tqdm(range(index_start, n_epochs + index_start)): for i in tqdm(range(index_start, n_epochs + index_start)):
if save_interval: model.fit(
model.fit( train_gen,
train_gen, steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1,
steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1, validation_data=val_gen,
validation_data=val_gen, validation_steps=1,
validation_steps=1, epochs=1,
epochs=1, callbacks=[save_weights_callback]) callbacks=callbacks)
else:
model.fit( dir_model = os.path.join(dir_output, 'model_' + str(i))
train_gen, model.save(dir_model)
steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1, with open(os.path.join(dir_model, "config.json"), "w") as fp:
validation_data=val_gen,
validation_steps=1,
epochs=1)
model.save(os.path.join(dir_output,'model_'+str(i)))
with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp:
json.dump(_config, fp) # encode dict into JSON json.dump(_config, fp) # encode dict into JSON
#os.system('rm -rf '+dir_train_flowing) #os.system('rm -rf '+dir_train_flowing)
#os.system('rm -rf '+dir_eval_flowing) #os.system('rm -rf '+dir_eval_flowing)
#model.save(dir_output+'/'+'model'+'.h5') #model.save(dir_output+'/'+'model'+'.h5')
elif task=='classification': elif task=='classification':
configuration() configuration()
model = resnet50_classifier(n_classes, input_height, input_width, weight_decay, pretraining) model = resnet50_classifier(n_classes,
input_height,
input_width,
weight_decay,
pretraining)
opt_adam = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', model.compile(loss='categorical_crossentropy',
optimizer = opt_adam,metrics=['accuracy']) optimizer=Adam(learning_rate=0.001), # rs: why not learning_rate?
metrics=['accuracy'])
list_classes = list(classification_classes_name.values()) list_classes = list(classification_classes_name.values())
testX, testY = generate_data_from_folder_evaluation(dir_eval, input_height, input_width, n_classes, list_classes) trainXY = generate_data_from_folder_training(
dir_train, n_batch, input_height, input_width, n_classes, list_classes)
y_tot=np.zeros((testX.shape[0],n_classes)) testX, testY = generate_data_from_folder_evaluation(
dir_eval, input_height, input_width, n_classes, list_classes)
y_tot = np.zeros((testX.shape[0], n_classes))
score_best= [0] score_best= [0]
num_rows = return_number_of_total_training_data(dir_train) num_rows = return_number_of_total_training_data(dir_train)
weights=[] weights=[]
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)]
for i in range(n_epochs): for i in range(n_epochs):
history = model.fit( generate_data_from_folder_training(dir_train, n_batch , input_height, input_width, n_classes, list_classes), steps_per_epoch=num_rows / n_batch, verbose=1)#,class_weight=weights) history = model.fit(trainXY,
steps_per_epoch=num_rows / n_batch,
#class_weight=weights)
verbose=1,
callbacks=callbacks)
y_pr_class = [] y_pr_class = []
for jj in range(testY.shape[0]): for jj in range(testY.shape[0]):
y_pr=model.predict(testX[jj,:,:,:].reshape(1,input_height,input_width,3), verbose=0) y_pr=model.predict(testX[jj,:,:,:].reshape(1,input_height,input_width,3), verbose=0)
@ -433,7 +485,8 @@ def run(_config, n_classes, n_epochs, input_height,
elif task=='reading_order': elif task=='reading_order':
configuration() configuration()
model = machine_based_reading_order_model(n_classes,input_height,input_width,weight_decay,pretraining) model = machine_based_reading_order_model(
n_classes, input_height, input_width, weight_decay, pretraining)
dir_flow_train_imgs = os.path.join(dir_train, 'images') dir_flow_train_imgs = os.path.join(dir_train, 'images')
dir_flow_train_labels = os.path.join(dir_train, 'labels') dir_flow_train_labels = os.path.join(dir_train, 'labels')
@ -447,20 +500,26 @@ def run(_config, n_classes, n_epochs, input_height,
#f1score_tot = [0] #f1score_tot = [0]
indexer_start = 0 indexer_start = 0
# opt = SGD(learning_rate=0.01, momentum=0.9)
opt_adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(loss="binary_crossentropy", model.compile(loss="binary_crossentropy",
optimizer = opt_adam,metrics=['accuracy']) #optimizer=SGD(learning_rate=0.01, momentum=0.9),
optimizer=Adam(learning_rate=0.0001), # rs: why not learning_rate?
metrics=['accuracy'])
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)]
if save_interval: if save_interval:
save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config))
trainXY = generate_arrays_from_folder_reading_order(
dir_flow_train_labels, dir_flow_train_imgs,
n_batch, input_height, input_width, n_classes,
thetha, augmentation)
for i in range(n_epochs): for i in range(n_epochs):
if save_interval: history = model.fit(trainXY,
history = model.fit(generate_arrays_from_folder_reading_order(dir_flow_train_labels, dir_flow_train_imgs, n_batch, input_height, input_width, n_classes, thetha, augmentation), steps_per_epoch=num_rows / n_batch, verbose=1, callbacks=[save_weights_callback]) steps_per_epoch=num_rows / n_batch,
else: verbose=1,
history = model.fit(generate_arrays_from_folder_reading_order(dir_flow_train_labels, dir_flow_train_imgs, n_batch, input_height, input_width, n_classes, thetha, augmentation), steps_per_epoch=num_rows / n_batch, verbose=1) callbacks=callbacks)
model.save( os.path.join(dir_output,'model_'+str(i+indexer_start) )) model.save(os.path.join(dir_output, 'model_'+str(i+indexer_start) ))
with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp: with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp:
json.dump(_config, fp) # encode dict into JSON json.dump(_config, fp) # encode dict into JSON

View file

@ -12,7 +12,6 @@ from shapely import set_precision
from shapely.ops import unary_union, nearest_points from shapely.ops import unary_union, nearest_points
from .rotate import rotate_image, rotation_image_new from .rotate import rotate_image, rotation_image_new
from . import ensure_array
def contours_in_same_horizon(cy_main_hor): def contours_in_same_horizon(cy_main_hor):
""" """
@ -249,12 +248,14 @@ def return_contours_of_image(image):
return contours, hierarchy return contours, hierarchy
def dilate_textline_contours(all_found_textline_polygons): def dilate_textline_contours(all_found_textline_polygons):
from . import ensure_array
return [ensure_array( return [ensure_array(
[polygon2contour(contour2polygon(contour, dilate=6)) [polygon2contour(contour2polygon(contour, dilate=6))
for contour in region]) for contour in region])
for region in all_found_textline_polygons] for region in all_found_textline_polygons]
def dilate_textregion_contours(all_found_textregion_polygons): def dilate_textregion_contours(all_found_textregion_polygons):
from . import ensure_array
return ensure_array( return ensure_array(
[polygon2contour(contour2polygon(contour, dilate=6)) [polygon2contour(contour2polygon(contour, dilate=6))
for contour in all_found_textregion_polygons]) for contour in all_found_textregion_polygons])