mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-02-20 16:32:03 +01:00
drop TF1 vernacular, relax TF/Keras and Torch requirements…
- do not restrict TF version, but depend on tf-keras and set `TF_USE_LEGACY_KERAS=1` to avoid Keras 3 behaviour - relax Numpy version requirement up to v2 - relax Torch version requirement - drop TF1 session management code - drop TF1 config in favour of TF2 config code for memory growth - training.*: also simplify and limit line length - training.train: always train with TensorBoard callback
This commit is contained in:
parent
e2754da4f5
commit
3c3effcfda
8 changed files with 289 additions and 294 deletions
|
|
@ -1,2 +1,2 @@
|
|||
torch <= 2.0.1
|
||||
torch
|
||||
transformers <= 4.30.2
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
# ocrd includes opencv, numpy, shapely, click
|
||||
ocrd >= 3.3.0
|
||||
numpy <1.24.0
|
||||
numpy < 2.0
|
||||
scikit-learn >= 0.23.2
|
||||
tensorflow < 2.13
|
||||
tensorflow
|
||||
tf-keras # avoid keras 3 (also needs TF_USE_LEGACY_KERAS=1)
|
||||
numba <= 0.58.1
|
||||
scikit-image
|
||||
biopython
|
||||
|
|
|
|||
|
|
@ -56,14 +56,12 @@ except ImportError:
|
|||
TrOCRProcessor = VisionEncoderDecoderModel = None
|
||||
|
||||
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
||||
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
|
||||
tf_disable_interactive_logs()
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.keras import backend as K
|
||||
from tensorflow.keras.models import load_model
|
||||
tf.get_logger().setLevel("ERROR")
|
||||
warnings.filterwarnings("ignore")
|
||||
# use tf1 compatibility for keras backend
|
||||
from tensorflow.compat.v1.keras.backend import set_session
|
||||
from tensorflow.keras import layers
|
||||
from tensorflow.keras.layers import StringLookup
|
||||
|
||||
|
|
@ -277,14 +275,6 @@ class Eynollah:
|
|||
|
||||
t_start = time.time()
|
||||
|
||||
# #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
|
||||
# #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
|
||||
# #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
|
||||
# config = tf.compat.v1.ConfigProto()
|
||||
# config.gpu_options.allow_growth = True
|
||||
# #session = tf.InteractiveSession()
|
||||
# session = tf.compat.v1.Session(config=config)
|
||||
# set_session(session)
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
|
|
|
|||
|
|
@ -2,19 +2,19 @@
|
|||
Tool to load model and binarize a given image.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from glob import glob
|
||||
import os
|
||||
import logging
|
||||
from PIL import Image
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
from ocrd_utils import tf_disable_interactive_logs
|
||||
|
||||
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
|
||||
tf_disable_interactive_logs()
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import load_model
|
||||
from tensorflow.python.keras import backend as tensorflow_backend
|
||||
|
||||
from .utils import is_image_filename
|
||||
|
||||
|
|
@ -27,26 +27,17 @@ class SbbBinarizer:
|
|||
self.model_dir = model_dir
|
||||
self.logger = logger if logger else logging.getLogger('SbbBinarizer')
|
||||
|
||||
self.start_new_session()
|
||||
|
||||
self.model_files = glob(self.model_dir+"/*/", recursive = True)
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
except:
|
||||
self.logger.warning("no GPU device available")
|
||||
|
||||
self.model_files = glob(self.model_dir + "/*/", recursive=True)
|
||||
self.models = []
|
||||
for model_file in self.model_files:
|
||||
self.models.append(self.load_model(model_file))
|
||||
|
||||
def start_new_session(self):
|
||||
config = tf.compat.v1.ConfigProto()
|
||||
config.gpu_options.allow_growth = True
|
||||
|
||||
self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession()
|
||||
tensorflow_backend.set_session(self.session)
|
||||
|
||||
def end_session(self):
|
||||
tensorflow_backend.clear_session()
|
||||
self.session.close()
|
||||
del self.session
|
||||
|
||||
def load_model(self, model_name):
|
||||
model = load_model(os.path.join(self.model_dir, model_name), compile=False)
|
||||
model_height = model.layers[len(model.layers)-1].output_shape[1]
|
||||
|
|
@ -55,7 +46,6 @@ class SbbBinarizer:
|
|||
return model, model_height, model_width, n_classes
|
||||
|
||||
def predict(self, model_in, img, use_patches, n_batch_inference=5):
|
||||
tensorflow_backend.set_session(self.session)
|
||||
model, model_height, model_width, n_classes = model_in
|
||||
|
||||
img_org_h = img.shape[0]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import sys
|
||||
import click
|
||||
import tensorflow as tf
|
||||
|
||||
|
|
@ -5,8 +6,11 @@ from .models import resnet50_unet
|
|||
|
||||
|
||||
def configuration():
|
||||
gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
|
||||
session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
except:
|
||||
print("no GPU device available", file=sys.stderr)
|
||||
|
||||
@click.command()
|
||||
def build_model_load_pretrained_weights_and_save():
|
||||
|
|
|
|||
|
|
@ -1,16 +1,19 @@
|
|||
"""
|
||||
Tool to load model and predict for given image.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import warnings
|
||||
import json
|
||||
|
||||
import click
|
||||
import numpy as np
|
||||
import cv2
|
||||
from tensorflow.keras.models import load_model
|
||||
|
||||
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras.layers import *
|
||||
import click
|
||||
from tensorflow.python.keras import backend as tensorflow_backend
|
||||
from tensorflow.keras.models import load_model
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .gt_gen_utils import (
|
||||
|
|
@ -24,17 +27,29 @@ from .models import (
|
|||
PatchEncoder,
|
||||
Patches
|
||||
)
|
||||
from .metrics import (
|
||||
soft_dice_loss,
|
||||
weighted_categorical_crossentropy,
|
||||
)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
__doc__=\
|
||||
"""
|
||||
Tool to load model and predict for given image.
|
||||
"""
|
||||
class SBBPredict:
|
||||
def __init__(self,
|
||||
image,
|
||||
dir_in,
|
||||
model,
|
||||
task,
|
||||
config_params_model,
|
||||
patches,
|
||||
save,
|
||||
save_layout,
|
||||
ground_truth,
|
||||
xml_file,
|
||||
out,
|
||||
min_area):
|
||||
|
||||
class sbb_predict:
|
||||
def __init__(self,image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area):
|
||||
self.image=image
|
||||
self.dir_in=dir_in
|
||||
self.patches=patches
|
||||
|
|
@ -52,8 +67,9 @@ class sbb_predict:
|
|||
self.min_area = 0
|
||||
|
||||
def resize_image(self,img_in,input_height,input_width):
|
||||
return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
return cv2.resize(img_in, (input_width,
|
||||
input_height),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
def color_images(self,seg):
|
||||
ann_u=range(self.n_classes)
|
||||
|
|
@ -69,68 +85,6 @@ class sbb_predict:
|
|||
seg_img[:,:,2][seg==c]=c
|
||||
return seg_img
|
||||
|
||||
def otsu_copy_binary(self,img):
|
||||
img_r=np.zeros((img.shape[0],img.shape[1],3))
|
||||
img1=img[:,:,0]
|
||||
|
||||
#print(img.min())
|
||||
#print(img[:,:,0].min())
|
||||
#blur = cv2.GaussianBlur(img,(5,5))
|
||||
#ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||
retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||
|
||||
|
||||
|
||||
img_r[:,:,0]=threshold1
|
||||
img_r[:,:,1]=threshold1
|
||||
img_r[:,:,2]=threshold1
|
||||
#img_r=img_r/float(np.max(img_r))*255
|
||||
return img_r
|
||||
|
||||
def otsu_copy(self,img):
|
||||
img_r=np.zeros((img.shape[0],img.shape[1],3))
|
||||
#img1=img[:,:,0]
|
||||
|
||||
#print(img.min())
|
||||
#print(img[:,:,0].min())
|
||||
#blur = cv2.GaussianBlur(img,(5,5))
|
||||
#ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||
_, threshold1 = cv2.threshold(img[:,:,0], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||
_, threshold2 = cv2.threshold(img[:,:,1], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||
_, threshold3 = cv2.threshold(img[:,:,2], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||
|
||||
|
||||
|
||||
img_r[:,:,0]=threshold1
|
||||
img_r[:,:,1]=threshold2
|
||||
img_r[:,:,2]=threshold3
|
||||
###img_r=img_r/float(np.max(img_r))*255
|
||||
return img_r
|
||||
|
||||
def soft_dice_loss(self,y_true, y_pred, epsilon=1e-6):
|
||||
|
||||
axes = tuple(range(1, len(y_pred.shape)-1))
|
||||
|
||||
numerator = 2. * K.sum(y_pred * y_true, axes)
|
||||
|
||||
denominator = K.sum(K.square(y_pred) + K.square(y_true), axes)
|
||||
return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch
|
||||
|
||||
def weighted_categorical_crossentropy(self,weights=None):
|
||||
|
||||
def loss(y_true, y_pred):
|
||||
labels_floats = tf.cast(y_true, tf.float32)
|
||||
per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred)
|
||||
|
||||
if weights is not None:
|
||||
weight_mask = tf.maximum(tf.reduce_max(tf.constant(
|
||||
np.array(weights, dtype=np.float32)[None, None, None])
|
||||
* labels_floats, axis=-1), 1.0)
|
||||
per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None]
|
||||
return tf.reduce_mean(per_pixel_loss)
|
||||
return self.loss
|
||||
|
||||
|
||||
def IoU(self,Yi,y_predi):
|
||||
## mean Intersection over Union
|
||||
## Mean IoU = TP/(FN + TP + FP)
|
||||
|
|
@ -157,30 +111,28 @@ class sbb_predict:
|
|||
return mIoU
|
||||
|
||||
def start_new_session_and_model(self):
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
except:
|
||||
print("no GPU device available", file=sys.stderr)
|
||||
|
||||
config = tf.compat.v1.ConfigProto()
|
||||
config.gpu_options.allow_growth = True
|
||||
|
||||
session = tf.compat.v1.Session(config=config) # tf.InteractiveSession()
|
||||
tensorflow_backend.set_session(session)
|
||||
#tensorflow.keras.layers.custom_layer = PatchEncoder
|
||||
#tensorflow.keras.layers.custom_layer = Patches
|
||||
self.model = load_model(self.model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
|
||||
#config = tf.ConfigProto()
|
||||
#config.gpu_options.allow_growth=True
|
||||
|
||||
#self.session = tf.InteractiveSession()
|
||||
#keras.losses.custom_loss = self.weighted_categorical_crossentropy
|
||||
#self.model = load_model(self.model_dir , compile=False)
|
||||
|
||||
self.model = load_model(self.model_dir, compile=False,
|
||||
custom_objects={"PatchEncoder": PatchEncoder,
|
||||
"Patches": Patches})
|
||||
#keras.losses.custom_loss = weighted_categorical_crossentropy
|
||||
#self.model = load_model(self.model_dir, compile=False)
|
||||
|
||||
##if self.weights_dir!=None:
|
||||
##self.model.load_weights(self.weights_dir)
|
||||
|
||||
if self.task != 'classification' and self.task != 'reading_order':
|
||||
self.img_height=self.model.layers[len(self.model.layers)-1].output_shape[1]
|
||||
self.img_width=self.model.layers[len(self.model.layers)-1].output_shape[2]
|
||||
self.n_classes=self.model.layers[len(self.model.layers)-1].output_shape[3]
|
||||
last = self.model.layers[-1]
|
||||
self.img_height = last.output_shape[1]
|
||||
self.img_width = last.output_shape[2]
|
||||
self.n_classes = last.output_shape[3]
|
||||
|
||||
def visualize_model_output(self, prediction, img, task):
|
||||
if task == "binarization":
|
||||
|
|
@ -208,22 +160,17 @@ class sbb_predict:
|
|||
'15' : [255, 0, 255]}
|
||||
|
||||
layout_only = np.zeros(prediction.shape)
|
||||
|
||||
for unq_class in unique_classes:
|
||||
where = prediction[:,:,0]==unq_class
|
||||
rgb_class_unique = rgb_colors[str(int(unq_class))]
|
||||
layout_only[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0]
|
||||
layout_only[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1]
|
||||
layout_only[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2]
|
||||
|
||||
|
||||
layout_only[:,:,0][where] = rgb_class_unique[0]
|
||||
layout_only[:,:,1][where] = rgb_class_unique[1]
|
||||
layout_only[:,:,2][where] = rgb_class_unique[2]
|
||||
layout_only = layout_only.astype(np.int32)
|
||||
|
||||
img = self.resize_image(img, layout_only.shape[0], layout_only.shape[1])
|
||||
|
||||
layout_only = layout_only.astype(np.int32)
|
||||
img = img.astype(np.int32)
|
||||
|
||||
|
||||
|
||||
added_image = cv2.addWeighted(img,0.5,layout_only,0.1,0)
|
||||
|
||||
return added_image, layout_only
|
||||
|
|
@ -231,10 +178,10 @@ class sbb_predict:
|
|||
def predict(self, image_dir):
|
||||
if self.task == 'classification':
|
||||
classes_names = self.config_params_model['classification_classes_name']
|
||||
img_1ch = img=cv2.imread(image_dir, 0)
|
||||
|
||||
img_1ch = img_1ch / 255.0
|
||||
img_1ch = cv2.resize(img_1ch, (self.config_params_model['input_height'], self.config_params_model['input_width']), interpolation=cv2.INTER_NEAREST)
|
||||
img_1ch = cv2.imread(image_dir, 0) / 255.0
|
||||
img_1ch = cv2.resize(img_1ch, (self.config_params_model['input_height'],
|
||||
self.config_params_model['input_width']),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
||||
img_in[0, :, :, 0] = img_1ch[:, :]
|
||||
img_in[0, :, :, 1] = img_1ch[:, :]
|
||||
|
|
@ -244,23 +191,27 @@ class sbb_predict:
|
|||
index_class = np.argmax(label_p_pred[0])
|
||||
|
||||
print("Predicted Class: {}".format(classes_names[str(int(index_class))]))
|
||||
|
||||
elif self.task == 'reading_order':
|
||||
img_height = self.config_params_model['input_height']
|
||||
img_width = self.config_params_model['input_width']
|
||||
|
||||
tree_xml, root_xml, bb_coord_printspace, file_name, id_paragraph, id_header, co_text_paragraph, co_text_header, tot_region_ref, x_len, y_len, index_tot_regions, img_poly = read_xml(self.xml_file)
|
||||
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_header)
|
||||
tree_xml, root_xml, bb_coord_printspace, file_name, \
|
||||
id_paragraph, id_header, \
|
||||
co_text_paragraph, co_text_header, \
|
||||
tot_region_ref, x_len, y_len, index_tot_regions, \
|
||||
img_poly = read_xml(self.xml_file)
|
||||
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = \
|
||||
find_new_features_of_contours(co_text_header)
|
||||
|
||||
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
|
||||
|
||||
|
||||
for j in range(len(cy_main)):
|
||||
img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1
|
||||
img_header_and_sep[int(y_max_main[j]): int(y_max_main[j]) + 12,
|
||||
int(x_min_main[j]): int(x_max_main[j])] = 1
|
||||
|
||||
co_text_all = co_text_paragraph + co_text_header
|
||||
id_all_text = id_paragraph + id_header
|
||||
|
||||
|
||||
##texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ]
|
||||
##texts_corr_order_index_int = [int(x) for x in texts_corr_order_index]
|
||||
texts_corr_order_index_int = list(np.array(range(len(co_text_all))))
|
||||
|
|
@ -271,7 +222,8 @@ class sbb_predict:
|
|||
#print(np.shape(co_text_all[0]), len( np.shape(co_text_all[0]) ),'co_text_all')
|
||||
#co_text_all = filter_contours_area_of_image_tables(img_poly, co_text_all, _, max_area, min_area)
|
||||
#print(co_text_all,'co_text_all')
|
||||
co_text_all, texts_corr_order_index_int, _ = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, self.min_area)
|
||||
co_text_all, texts_corr_order_index_int, _ = filter_contours_area_of_image(
|
||||
img_poly, co_text_all, texts_corr_order_index_int, max_area, self.min_area)
|
||||
|
||||
#print(texts_corr_order_index_int)
|
||||
|
||||
|
|
@ -664,17 +616,15 @@ class sbb_predict:
|
|||
help="min area size of regions considered for reading order detection. The default value is zero and means that all text regions are considered for reading order.",
|
||||
)
|
||||
def main(image, dir_in, model, patches, save, save_layout, ground_truth, xml_file, out, min_area):
|
||||
assert image or dir_in, "Either a single image -i or a dir_in -di is required"
|
||||
assert image or dir_in, "Either a single image -i or a dir_in -di input is required"
|
||||
with open(os.path.join(model,'config.json')) as f:
|
||||
config_params_model = json.load(f)
|
||||
task = config_params_model['task']
|
||||
if task != 'classification' and task != 'reading_order':
|
||||
if image and not save:
|
||||
print("Error: You used one of segmentation or binarization task with image input but not set -s, you need a filename to save visualized output with -s")
|
||||
sys.exit(1)
|
||||
if dir_in and not out:
|
||||
print("Error: You used one of segmentation or binarization task with dir_in but not set -out")
|
||||
sys.exit(1)
|
||||
x=sbb_predict(image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area)
|
||||
assert not image or save, "For segmentation or binarization, an input single image -i also requires an output filename -s"
|
||||
assert not dir_in or out, "For segmentation or binarization, an input directory -di also requires an output directory -o"
|
||||
x = SBBPredict(image, dir_in, model, task, config_params_model,
|
||||
patches, save, save_layout, ground_truth, xml_file, out,
|
||||
min_area)
|
||||
x.run()
|
||||
|
||||
|
|
|
|||
|
|
@ -28,14 +28,14 @@ from eynollah.training.utils import (
|
|||
)
|
||||
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
|
||||
import tensorflow as tf
|
||||
from tensorflow.compat.v1.keras.backend import set_session
|
||||
from tensorflow.keras.optimizers import SGD, Adam
|
||||
from sacred import Experiment
|
||||
from tensorflow.keras.models import load_model
|
||||
from tensorflow.keras.callbacks import Callback, TensorBoard
|
||||
from sacred import Experiment
|
||||
from tqdm import tqdm
|
||||
from sklearn.metrics import f1_score
|
||||
from tensorflow.keras.callbacks import Callback
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
|
@ -63,10 +63,11 @@ class SaveWeightsAfterSteps(Callback):
|
|||
|
||||
|
||||
def configuration():
|
||||
config = tf.compat.v1.ConfigProto()
|
||||
config.gpu_options.allow_growth = True
|
||||
session = tf.compat.v1.Session(config=config)
|
||||
set_session(session)
|
||||
try:
|
||||
for device in tf.config.list_physical_devices('GPU'):
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
except:
|
||||
print("no GPU device available", file=sys.stderr)
|
||||
|
||||
|
||||
def get_dirs_or_files(input_data):
|
||||
|
|
@ -171,12 +172,11 @@ def run(_config, n_classes, n_epochs, input_height,
|
|||
else:
|
||||
list_all_possible_foreground_rgbs = None
|
||||
|
||||
if task == "segmentation" or task == "enhancement" or task == "binarization":
|
||||
if task in ["segmentation", "enhancement", "binarization"]:
|
||||
if data_is_provided:
|
||||
dir_train_flowing = os.path.join(dir_output, 'train')
|
||||
dir_eval_flowing = os.path.join(dir_output, 'eval')
|
||||
|
||||
|
||||
dir_flow_train_imgs = os.path.join(dir_train_flowing, 'images')
|
||||
dir_flow_train_labels = os.path.join(dir_train_flowing, 'labels')
|
||||
|
||||
|
|
@ -227,176 +227,228 @@ def run(_config, n_classes, n_epochs, input_height,
|
|||
segs_list_test=np.array(os.listdir(dir_seg_val))
|
||||
|
||||
# writing patches into a sub-folder in order to be flowed from directory.
|
||||
provide_patches(imgs_list, segs_list, dir_img, dir_seg, dir_flow_train_imgs,
|
||||
dir_flow_train_labels, input_height, input_width, blur_k,
|
||||
blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background,adding_rgb_foreground, add_red_textlines, channels_shuffling,
|
||||
scaling, shifting, degrading, brightening, scales, degrade_scales, brightness,
|
||||
flip_index,shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization,
|
||||
rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=augmentation,
|
||||
patches=patches, dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds, dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs)
|
||||
|
||||
provide_patches(imgs_list_test, segs_list_test, dir_img_val, dir_seg_val,
|
||||
dir_flow_eval_imgs, dir_flow_eval_labels, input_height, input_width,
|
||||
blur_k, blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling,
|
||||
scaling, shifting, degrading, brightening, scales, degrade_scales, brightness,
|
||||
flip_index, shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization,
|
||||
rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=patches,dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds,dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs )
|
||||
common_args = [input_height, input_width,
|
||||
blur_k, blur_aug,
|
||||
padding_white, padding_black,
|
||||
flip_aug, binarization,
|
||||
adding_rgb_background,
|
||||
adding_rgb_foreground,
|
||||
add_red_textlines,
|
||||
channels_shuffling,
|
||||
scaling, shifting, degrading, brightening,
|
||||
scales, degrade_scales, brightness,
|
||||
flip_index, shuffle_indexes,
|
||||
scaling_bluring, scaling_brightness, scaling_binarization,
|
||||
rotation, rotation_not_90, thetha,
|
||||
scaling_flip, task,
|
||||
]
|
||||
common_kwargs = dict(patches=
|
||||
patches,
|
||||
dir_img_bin=
|
||||
dir_img_bin,
|
||||
number_of_backgrounds_per_image=
|
||||
number_of_backgrounds_per_image,
|
||||
list_all_possible_background_images=
|
||||
list_all_possible_background_images,
|
||||
dir_rgb_backgrounds=
|
||||
dir_rgb_backgrounds,
|
||||
dir_rgb_foregrounds=
|
||||
dir_rgb_foregrounds,
|
||||
list_all_possible_foreground_rgbs=
|
||||
list_all_possible_foreground_rgbs,
|
||||
)
|
||||
provide_patches(imgs_list, segs_list,
|
||||
dir_img, dir_seg,
|
||||
dir_flow_train_imgs,
|
||||
dir_flow_train_labels,
|
||||
*common_args,
|
||||
augmentation=augmentation,
|
||||
**common_kwargs)
|
||||
provide_patches(imgs_list_test, segs_list_test,
|
||||
dir_img_val, dir_seg_val,
|
||||
dir_flow_eval_imgs,
|
||||
dir_flow_eval_labels,
|
||||
*common_args,
|
||||
augmentation=False,
|
||||
**common_kwargs)
|
||||
|
||||
if weighted_loss:
|
||||
weights = np.zeros(n_classes)
|
||||
if data_is_provided:
|
||||
for obj in os.listdir(dir_flow_train_labels):
|
||||
try:
|
||||
label_obj = cv2.imread(dir_flow_train_labels + '/' + obj)
|
||||
label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes)
|
||||
weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0)
|
||||
except:
|
||||
pass
|
||||
dirs = dir_flow_train_labels
|
||||
else:
|
||||
|
||||
for obj in os.listdir(dir_seg):
|
||||
try:
|
||||
label_obj = cv2.imread(dir_seg + '/' + obj)
|
||||
label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes)
|
||||
weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0)
|
||||
except:
|
||||
pass
|
||||
dirs = dir_seg
|
||||
for obj in os.listdir(dirs):
|
||||
label_file = os.path.join(dirs, + obj)
|
||||
try:
|
||||
label_obj = cv2.imread(label_file)
|
||||
label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes)
|
||||
weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0)
|
||||
except Exception as e:
|
||||
print("error reading data file '%s': %s" % (label_file, e), file=sys.stderr)
|
||||
|
||||
weights = 1.00 / weights
|
||||
|
||||
weights = weights / float(np.sum(weights))
|
||||
weights = weights / float(np.min(weights))
|
||||
weights = weights / float(np.sum(weights))
|
||||
|
||||
if continue_training:
|
||||
if backbone_type=='nontransformer':
|
||||
if is_loss_soft_dice and (task == "segmentation" or task == "binarization"):
|
||||
model = load_model(dir_of_start_model, compile=True, custom_objects={'soft_dice_loss': soft_dice_loss})
|
||||
if weighted_loss and (task == "segmentation" or task == "binarization"):
|
||||
model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)})
|
||||
if not is_loss_soft_dice and not weighted_loss:
|
||||
if backbone_type == 'nontransformer':
|
||||
if is_loss_soft_dice and task in ["segmentation", "binarization"]:
|
||||
model = load_model(dir_of_start_model, compile=True,
|
||||
custom_objects={'soft_dice_loss': soft_dice_loss})
|
||||
elif weighted_loss and task in ["segmentation", "binarization"]:
|
||||
model = load_model(dir_of_start_model, compile=True,
|
||||
custom_objects={'loss': weighted_categorical_crossentropy(weights)})
|
||||
else:
|
||||
model = load_model(dir_of_start_model , compile=True)
|
||||
elif backbone_type=='transformer':
|
||||
if is_loss_soft_dice and (task == "segmentation" or task == "binarization"):
|
||||
model = load_model(dir_of_start_model, compile=True, custom_objects={"PatchEncoder": PatchEncoder, "Patches": Patches,'soft_dice_loss': soft_dice_loss})
|
||||
if weighted_loss and (task == "segmentation" or task == "binarization"):
|
||||
model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)})
|
||||
if not is_loss_soft_dice and not weighted_loss:
|
||||
model = load_model(dir_of_start_model , compile=True,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
|
||||
|
||||
elif backbone_type == 'transformer':
|
||||
if is_loss_soft_dice and task in ["segmentation", "binarization"]:
|
||||
model = load_model(dir_of_start_model, compile=True,
|
||||
custom_objects={"PatchEncoder": PatchEncoder,
|
||||
"Patches": Patches,
|
||||
'soft_dice_loss': soft_dice_loss})
|
||||
elif weighted_loss and task in ["segmentation", "binarization"]:
|
||||
model = load_model(dir_of_start_model, compile=True,
|
||||
custom_objects={'loss': weighted_categorical_crossentropy(weights)})
|
||||
else:
|
||||
model = load_model(dir_of_start_model, compile=True,
|
||||
custom_objects = {"PatchEncoder": PatchEncoder,
|
||||
"Patches": Patches})
|
||||
else:
|
||||
index_start = 0
|
||||
if backbone_type=='nontransformer':
|
||||
model = resnet50_unet(n_classes, input_height, input_width, task, weight_decay, pretraining)
|
||||
elif backbone_type=='transformer':
|
||||
if backbone_type == 'nontransformer':
|
||||
model = resnet50_unet(n_classes,
|
||||
input_height,
|
||||
input_width,
|
||||
task,
|
||||
weight_decay,
|
||||
pretraining)
|
||||
elif backbone_type == 'transformer':
|
||||
num_patches_x = transformer_num_patches_xy[0]
|
||||
num_patches_y = transformer_num_patches_xy[1]
|
||||
num_patches = num_patches_x * num_patches_y
|
||||
|
||||
if transformer_cnn_first:
|
||||
if input_height != (num_patches_y * transformer_patchsize_y * 32):
|
||||
print("Error: transformer_patchsize_y or transformer_num_patches_xy height value error . input_height should be equal to ( transformer_num_patches_xy height value * transformer_patchsize_y * 32)")
|
||||
sys.exit(1)
|
||||
if input_width != (num_patches_x * transformer_patchsize_x * 32):
|
||||
print("Error: transformer_patchsize_x or transformer_num_patches_xy width value error . input_width should be equal to ( transformer_num_patches_xy width value * transformer_patchsize_x * 32)")
|
||||
sys.exit(1)
|
||||
if (transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x)) != 0:
|
||||
print("Error: transformer_projection_dim error. The remainder when parameter transformer_projection_dim is divided by (transformer_patchsize_y*transformer_patchsize_x) should be zero")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
model = vit_resnet50_unet(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining)
|
||||
model_builder = vit_resnet50_unet
|
||||
multiple_of_32 = True
|
||||
else:
|
||||
if input_height != (num_patches_y * transformer_patchsize_y):
|
||||
print("Error: transformer_patchsize_y or transformer_num_patches_xy height value error . input_height should be equal to ( transformer_num_patches_xy height value * transformer_patchsize_y)")
|
||||
sys.exit(1)
|
||||
if input_width != (num_patches_x * transformer_patchsize_x):
|
||||
print("Error: transformer_patchsize_x or transformer_num_patches_xy width value error . input_width should be equal to ( transformer_num_patches_xy width value * transformer_patchsize_x)")
|
||||
sys.exit(1)
|
||||
if (transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x)) != 0:
|
||||
print("Error: transformer_projection_dim error. The remainder when parameter transformer_projection_dim is divided by (transformer_patchsize_y*transformer_patchsize_x) should be zero")
|
||||
sys.exit(1)
|
||||
model = vit_resnet50_unet_transformer_before_cnn(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining)
|
||||
model_builder = vit_resnet50_unet_transformer_before_cnn
|
||||
multiple_of_32 = False
|
||||
|
||||
assert input_height == num_patches_y * transformer_patchsize_y * (32 if multiple_of_32 else 1), \
|
||||
"transformer_patchsize_y or transformer_num_patches_xy height value error: " \
|
||||
"input_height should be equal to " \
|
||||
"(transformer_num_patches_xy height value * transformer_patchsize_y%s)" % \
|
||||
" * 32" if multiple_of_32 else ""
|
||||
assert input_width == num_patches_x * transformer_patchsize_x * (32 if multiple_of_32 else 1), \
|
||||
"transformer_patchsize_x or transformer_num_patches_xy width value error: " \
|
||||
"input_width should be equal to " \
|
||||
"(transformer_num_patches_xy width value * transformer_patchsize_x%s)" % \
|
||||
" * 32" if multiple_of_32 else ""
|
||||
assert 0 == transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x), \
|
||||
"transformer_projection_dim error: " \
|
||||
"The remainder when parameter transformer_projection_dim is divided by " \
|
||||
"(transformer_patchsize_y*transformer_patchsize_x) should be zero"
|
||||
|
||||
model = model_builder(
|
||||
n_classes,
|
||||
transformer_patchsize_x,
|
||||
transformer_patchsize_y,
|
||||
num_patches,
|
||||
transformer_mlp_head_units,
|
||||
transformer_layers,
|
||||
transformer_num_heads,
|
||||
transformer_projection_dim,
|
||||
input_height,
|
||||
input_width,
|
||||
task,
|
||||
weight_decay,
|
||||
pretraining)
|
||||
|
||||
#if you want to see the model structure just uncomment model summary.
|
||||
model.summary()
|
||||
|
||||
|
||||
if task == "segmentation" or task == "binarization":
|
||||
if not is_loss_soft_dice and not weighted_loss:
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])
|
||||
if task in ["segmentation", "binarization"]:
|
||||
if is_loss_soft_dice:
|
||||
model.compile(loss=soft_dice_loss,
|
||||
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])
|
||||
if weighted_loss:
|
||||
model.compile(loss=weighted_categorical_crossentropy(weights),
|
||||
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])
|
||||
elif task == "enhancement":
|
||||
model.compile(loss='mean_squared_error',
|
||||
optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])
|
||||
|
||||
loss = soft_dice_loss
|
||||
elif weighted_loss:
|
||||
loss = weighted_categorical_crossentropy(weights)
|
||||
else:
|
||||
loss = 'categorical_crossentropy'
|
||||
else: # task == "enhancement"
|
||||
loss = 'mean_squared_error'
|
||||
model.compile(loss=loss,
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
metrics=['accuracy'])
|
||||
|
||||
# generating train and evaluation data
|
||||
train_gen = data_gen(dir_flow_train_imgs, dir_flow_train_labels, batch_size=n_batch,
|
||||
input_height=input_height, input_width=input_width, n_classes=n_classes, task=task)
|
||||
val_gen = data_gen(dir_flow_eval_imgs, dir_flow_eval_labels, batch_size=n_batch,
|
||||
input_height=input_height, input_width=input_width, n_classes=n_classes, task=task)
|
||||
gen_kwargs = dict(batch_size=n_batch,
|
||||
input_height=input_height,
|
||||
input_width=input_width,
|
||||
n_classes=n_classes,
|
||||
task=task)
|
||||
train_gen = data_gen(dir_flow_train_imgs, dir_flow_train_labels, **gen_kwargs)
|
||||
val_gen = data_gen(dir_flow_eval_imgs, dir_flow_eval_labels, **gen_kwargs)
|
||||
|
||||
##img_validation_patches = os.listdir(dir_flow_eval_imgs)
|
||||
##score_best=[]
|
||||
##score_best.append(0)
|
||||
|
||||
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)]
|
||||
if save_interval:
|
||||
save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config)
|
||||
|
||||
callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config))
|
||||
|
||||
for i in tqdm(range(index_start, n_epochs + index_start)):
|
||||
if save_interval:
|
||||
model.fit(
|
||||
train_gen,
|
||||
steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1,
|
||||
validation_data=val_gen,
|
||||
validation_steps=1,
|
||||
epochs=1, callbacks=[save_weights_callback])
|
||||
else:
|
||||
model.fit(
|
||||
train_gen,
|
||||
steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1,
|
||||
validation_data=val_gen,
|
||||
validation_steps=1,
|
||||
epochs=1)
|
||||
model.fit(
|
||||
train_gen,
|
||||
steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1,
|
||||
validation_data=val_gen,
|
||||
validation_steps=1,
|
||||
epochs=1,
|
||||
callbacks=callbacks)
|
||||
|
||||
model.save(os.path.join(dir_output,'model_'+str(i)))
|
||||
|
||||
with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp:
|
||||
dir_model = os.path.join(dir_output, 'model_' + str(i))
|
||||
model.save(dir_model)
|
||||
with open(os.path.join(dir_model, "config.json"), "w") as fp:
|
||||
json.dump(_config, fp) # encode dict into JSON
|
||||
|
||||
#os.system('rm -rf '+dir_train_flowing)
|
||||
#os.system('rm -rf '+dir_eval_flowing)
|
||||
|
||||
#model.save(dir_output+'/'+'model'+'.h5')
|
||||
|
||||
elif task=='classification':
|
||||
configuration()
|
||||
model = resnet50_classifier(n_classes, input_height, input_width, weight_decay, pretraining)
|
||||
model = resnet50_classifier(n_classes,
|
||||
input_height,
|
||||
input_width,
|
||||
weight_decay,
|
||||
pretraining)
|
||||
|
||||
opt_adam = Adam(learning_rate=0.001)
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer = opt_adam,metrics=['accuracy'])
|
||||
|
||||
optimizer=Adam(learning_rate=0.001), # rs: why not learning_rate?
|
||||
metrics=['accuracy'])
|
||||
|
||||
list_classes = list(classification_classes_name.values())
|
||||
testX, testY = generate_data_from_folder_evaluation(dir_eval, input_height, input_width, n_classes, list_classes)
|
||||
|
||||
y_tot=np.zeros((testX.shape[0],n_classes))
|
||||
trainXY = generate_data_from_folder_training(
|
||||
dir_train, n_batch, input_height, input_width, n_classes, list_classes)
|
||||
testX, testY = generate_data_from_folder_evaluation(
|
||||
dir_eval, input_height, input_width, n_classes, list_classes)
|
||||
|
||||
y_tot = np.zeros((testX.shape[0], n_classes))
|
||||
score_best= [0]
|
||||
|
||||
num_rows = return_number_of_total_training_data(dir_train)
|
||||
weights=[]
|
||||
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)]
|
||||
|
||||
for i in range(n_epochs):
|
||||
history = model.fit( generate_data_from_folder_training(dir_train, n_batch , input_height, input_width, n_classes, list_classes), steps_per_epoch=num_rows / n_batch, verbose=1)#,class_weight=weights)
|
||||
|
||||
history = model.fit(trainXY,
|
||||
steps_per_epoch=num_rows / n_batch,
|
||||
#class_weight=weights)
|
||||
verbose=1,
|
||||
callbacks=callbacks)
|
||||
y_pr_class = []
|
||||
for jj in range(testY.shape[0]):
|
||||
y_pr=model.predict(testX[jj,:,:,:].reshape(1,input_height,input_width,3), verbose=0)
|
||||
|
|
@ -433,7 +485,8 @@ def run(_config, n_classes, n_epochs, input_height,
|
|||
|
||||
elif task=='reading_order':
|
||||
configuration()
|
||||
model = machine_based_reading_order_model(n_classes,input_height,input_width,weight_decay,pretraining)
|
||||
model = machine_based_reading_order_model(
|
||||
n_classes, input_height, input_width, weight_decay, pretraining)
|
||||
|
||||
dir_flow_train_imgs = os.path.join(dir_train, 'images')
|
||||
dir_flow_train_labels = os.path.join(dir_train, 'labels')
|
||||
|
|
@ -447,20 +500,26 @@ def run(_config, n_classes, n_epochs, input_height,
|
|||
|
||||
#f1score_tot = [0]
|
||||
indexer_start = 0
|
||||
# opt = SGD(learning_rate=0.01, momentum=0.9)
|
||||
opt_adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
|
||||
model.compile(loss="binary_crossentropy",
|
||||
optimizer = opt_adam,metrics=['accuracy'])
|
||||
#optimizer=SGD(learning_rate=0.01, momentum=0.9),
|
||||
optimizer=Adam(learning_rate=0.0001), # rs: why not learning_rate?
|
||||
metrics=['accuracy'])
|
||||
|
||||
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)]
|
||||
if save_interval:
|
||||
save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config)
|
||||
callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config))
|
||||
|
||||
trainXY = generate_arrays_from_folder_reading_order(
|
||||
dir_flow_train_labels, dir_flow_train_imgs,
|
||||
n_batch, input_height, input_width, n_classes,
|
||||
thetha, augmentation)
|
||||
|
||||
for i in range(n_epochs):
|
||||
if save_interval:
|
||||
history = model.fit(generate_arrays_from_folder_reading_order(dir_flow_train_labels, dir_flow_train_imgs, n_batch, input_height, input_width, n_classes, thetha, augmentation), steps_per_epoch=num_rows / n_batch, verbose=1, callbacks=[save_weights_callback])
|
||||
else:
|
||||
history = model.fit(generate_arrays_from_folder_reading_order(dir_flow_train_labels, dir_flow_train_imgs, n_batch, input_height, input_width, n_classes, thetha, augmentation), steps_per_epoch=num_rows / n_batch, verbose=1)
|
||||
model.save( os.path.join(dir_output,'model_'+str(i+indexer_start) ))
|
||||
history = model.fit(trainXY,
|
||||
steps_per_epoch=num_rows / n_batch,
|
||||
verbose=1,
|
||||
callbacks=callbacks)
|
||||
model.save(os.path.join(dir_output, 'model_'+str(i+indexer_start) ))
|
||||
|
||||
with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp:
|
||||
json.dump(_config, fp) # encode dict into JSON
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from shapely import set_precision
|
|||
from shapely.ops import unary_union, nearest_points
|
||||
|
||||
from .rotate import rotate_image, rotation_image_new
|
||||
from . import ensure_array
|
||||
|
||||
def contours_in_same_horizon(cy_main_hor):
|
||||
"""
|
||||
|
|
@ -249,12 +248,14 @@ def return_contours_of_image(image):
|
|||
return contours, hierarchy
|
||||
|
||||
def dilate_textline_contours(all_found_textline_polygons):
|
||||
from . import ensure_array
|
||||
return [ensure_array(
|
||||
[polygon2contour(contour2polygon(contour, dilate=6))
|
||||
for contour in region])
|
||||
for region in all_found_textline_polygons]
|
||||
|
||||
def dilate_textregion_contours(all_found_textregion_polygons):
|
||||
from . import ensure_array
|
||||
return ensure_array(
|
||||
[polygon2contour(contour2polygon(contour, dilate=6))
|
||||
for contour in all_found_textregion_polygons])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue