training: use proper Keras callbacks and top-level loop

This commit is contained in:
Robert Sachunsky 2026-01-22 11:25:00 +01:00
parent 3c3effcfda
commit 87d7ffbdd8
5 changed files with 84 additions and 100 deletions

View file

@ -1,17 +1,9 @@
import sys import sys
import click import click
import tensorflow as tf
from .models import resnet50_unet from .models import resnet50_unet
def configuration():
try:
for device in tf.config.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(device, True)
except:
print("no GPU device available", file=sys.stderr)
@click.command() @click.command()
def build_model_load_pretrained_weights_and_save(): def build_model_load_pretrained_weights_and_save():
n_classes = 2 n_classes = 2
@ -21,8 +13,6 @@ def build_model_load_pretrained_weights_and_save():
pretraining = False pretraining = False
dir_of_weights = 'model_bin_sbb_ens.h5' dir_of_weights = 'model_bin_sbb_ens.h5'
# configuration()
model = resnet50_unet(n_classes, input_height, input_width, weight_decay, pretraining) model = resnet50_unet(n_classes, input_height, input_width, weight_decay, pretraining)
model.load_weights(dir_of_weights) model.load_weights(dir_of_weights)
model.save('./name_in_another_python_version.h5') model.save('./name_in_another_python_version.h5')

View file

@ -653,6 +653,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
num_col = int(text_comments.split('num_col')[1]) num_col = int(text_comments.split('num_col')[1])
comment_is_sub_element = True comment_is_sub_element = True
if not comment_is_sub_element: if not comment_is_sub_element:
# FIXME: look in /Page/@custom as well
num_col = None num_col = None
if num_col: if num_col:

View file

@ -1,3 +1,6 @@
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
import tensorflow as tf import tensorflow as tf
from tensorflow import keras from tensorflow import keras
from tensorflow.keras.models import * from tensorflow.keras.models import *

View file

@ -32,7 +32,7 @@ os.environ['TF_USE_LEGACY_KERAS'] = '1' # avoid Keras 3 after TF 2.15
import tensorflow as tf import tensorflow as tf
from tensorflow.keras.optimizers import SGD, Adam from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import Callback, TensorBoard from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sacred import Experiment from sacred import Experiment
from tqdm import tqdm from tqdm import tqdm
from sklearn.metrics import f1_score from sklearn.metrics import f1_score
@ -40,26 +40,28 @@ from sklearn.metrics import f1_score
import numpy as np import numpy as np
import cv2 import cv2
class SaveWeightsAfterSteps(Callback): class SaveWeightsAfterSteps(ModelCheckpoint):
def __init__(self, save_interval, save_path, _config): def __init__(self, save_interval, save_path, _config, **kwargs):
super(SaveWeightsAfterSteps, self).__init__() if save_interval:
self.save_interval = save_interval # batches
self.save_path = save_path super().__init__(
self.step_count = 0 os.path.join(save_path, "model_step_{batch:04d}"),
save_freq=save_interval,
verbose=1,
**kwargs)
else:
super().__init__(
os.path.join(save_path, "model_{epoch:02d}"),
save_freq="epoch",
verbose=1,
**kwargs)
self._config = _config self._config = _config
def on_train_batch_end(self, batch, logs=None): # overwrite tf-keras (Keras 2) implementation to get our _config JSON in
self.step_count += 1 def _save_handler(self, filepath):
super()._save_handler(filepath)
if self.step_count % self.save_interval ==0: with open(os.path.join(filepath, "config.json"), "w") as fp:
save_file = f"{self.save_path}/model_step_{self.step_count}" json.dump(self._config, fp) # encode dict into JSON
#os.system('mkdir '+save_file)
self.model.save(save_file)
with open(os.path.join(os.path.join(self.save_path, f"model_step_{self.step_count}"),"config.json"), "w") as fp:
json.dump(self._config, fp) # encode dict into JSON
print(f"saved model as steps {self.step_count} to {save_file}")
def configuration(): def configuration():
@ -396,23 +398,19 @@ def run(_config, n_classes, n_epochs, input_height,
##score_best=[] ##score_best=[]
##score_best.append(0) ##score_best.append(0)
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)] callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False),
SaveWeightsAfterSteps(0, dir_output, _config)]
if save_interval: if save_interval:
callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config)) callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config))
for i in tqdm(range(index_start, n_epochs + index_start)): model.fit(
model.fit( train_gen,
train_gen, steps_per_epoch=len(os.listdir(dir_flow_train_imgs)) // n_batch - 1,
steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1, validation_data=val_gen,
validation_data=val_gen, #validation_steps=1, # rs: only one batch??
validation_steps=1, validation_steps=len(os.listdir(dir_flow_eval_imgs)) // n_batch - 1,
epochs=1, epochs=n_epochs,
callbacks=callbacks) callbacks=callbacks)
dir_model = os.path.join(dir_output, 'model_' + str(i))
model.save(dir_model)
with open(os.path.join(dir_model, "config.json"), "w") as fp:
json.dump(_config, fp) # encode dict into JSON
#os.system('rm -rf '+dir_train_flowing) #os.system('rm -rf '+dir_train_flowing)
#os.system('rm -rf '+dir_eval_flowing) #os.system('rm -rf '+dir_eval_flowing)
@ -434,54 +432,49 @@ def run(_config, n_classes, n_epochs, input_height,
list_classes = list(classification_classes_name.values()) list_classes = list(classification_classes_name.values())
trainXY = generate_data_from_folder_training( trainXY = generate_data_from_folder_training(
dir_train, n_batch, input_height, input_width, n_classes, list_classes) dir_train, n_batch, input_height, input_width, n_classes, list_classes)
testX, testY = generate_data_from_folder_evaluation( testXY = generate_data_from_folder_evaluation(
dir_eval, input_height, input_width, n_classes, list_classes) dir_eval, input_height, input_width, n_classes, list_classes)
y_tot = np.zeros((testX.shape[0], n_classes)) y_tot = np.zeros((testX.shape[0], n_classes))
score_best= [0]
num_rows = return_number_of_total_training_data(dir_train) num_rows = return_number_of_total_training_data(dir_train)
weights=[] callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False),
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)] SaveWeightsAfterSteps(0, dir_output, _config,
monitor='val_f1',
save_best_only=True, mode='max')]
for i in range(n_epochs): history = model.fit(trainXY,
history = model.fit(trainXY, steps_per_epoch=num_rows / n_batch,
steps_per_epoch=num_rows / n_batch, #class_weight=weights)
#class_weight=weights) validation_data=testXY,
verbose=1, verbose=1,
callbacks=callbacks) epochs=n_epochs,
y_pr_class = [] metrics=[F1Score(average='macro', name='f1')],
for jj in range(testY.shape[0]): callbacks=callbacks)
y_pr=model.predict(testX[jj,:,:,:].reshape(1,input_height,input_width,3), verbose=0)
y_pr_ind= np.argmax(y_pr,axis=1)
y_pr_class.append(y_pr_ind)
y_pr_class = np.array(y_pr_class)
f1score=f1_score(np.argmax(testY,axis=1), y_pr_class, average='macro')
print(i,f1score)
if f1score>score_best[0]:
score_best[0]=f1score
model.save(os.path.join(dir_output,'model_best'))
if f1score > f1_threshold_classification:
weights.append(model.get_weights() )
if len(weights) >= 1: usable_checkpoints = np.flatnonzero(np.array(history['val_f1']) > f1_threshold_classification)
new_weights=list() if len(usable_checkpoints) >= 1:
for weights_list_tuple in zip(*weights): print("averaging over usable checkpoints", usable_checkpoints)
new_weights.append( [np.array(weights_).mean(axis=0) for weights_ in zip(*weights_list_tuple)] ) all_weights = []
for epoch in usable_checkpoints:
cp_path = os.path.join(dir_output, 'model_{epoch:02d}'.format(epoch=epoch))
assert os.path.isdir(cp_path)
model = load_model(cp_path, compile=False)
all_weights.append(model.get_weights())
new_weights = []
for layer_weights in zip(*all_weights):
layer_weights = np.array([np.array(weights).mean(axis=0)
for weights in zip(*layer_weights)])
new_weights.append(layer_weights)
new_weights = [np.array(x) for x in new_weights] #model = tf.keras.models.clone_model(model)
model_weight_averaged=tf.keras.models.clone_model(model) model.set_weights(new_weights)
model_weight_averaged.set_weights(new_weights)
cp_path = os.path.join(dir_output, 'model_ens_avg')
model_weight_averaged.save(os.path.join(dir_output,'model_ens_avg')) model.save(cp_path)
with open(os.path.join( os.path.join(dir_output,'model_ens_avg'), "config.json"), "w") as fp: with open(os.path.join(cp_path, "config.json"), "w") as fp:
json.dump(_config, fp) # encode dict into JSON json.dump(_config, fp) # encode dict into JSON
print("ensemble model saved under", cp_path)
with open(os.path.join( os.path.join(dir_output,'model_best'), "config.json"), "w") as fp:
json.dump(_config, fp) # encode dict into JSON
elif task=='reading_order': elif task=='reading_order':
configuration() configuration()
@ -505,7 +498,8 @@ def run(_config, n_classes, n_epochs, input_height,
optimizer=Adam(learning_rate=0.0001), # rs: why not learning_rate? optimizer=Adam(learning_rate=0.0001), # rs: why not learning_rate?
metrics=['accuracy']) metrics=['accuracy'])
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False)] callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False),
SaveWeightsAfterSteps(0, dir_output, _config)]
if save_interval: if save_interval:
callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config)) callbacks.append(SaveWeightsAfterSteps(save_interval, dir_output, _config))
@ -514,20 +508,16 @@ def run(_config, n_classes, n_epochs, input_height,
n_batch, input_height, input_width, n_classes, n_batch, input_height, input_width, n_classes,
thetha, augmentation) thetha, augmentation)
for i in range(n_epochs): history = model.fit(trainXY,
history = model.fit(trainXY, steps_per_epoch=num_rows / n_batch,
steps_per_epoch=num_rows / n_batch, verbose=1,
verbose=1, epochs=n_epochs,
callbacks=callbacks) callbacks=callbacks)
model.save(os.path.join(dir_output, 'model_'+str(i+indexer_start) )) '''
if f1score>f1score_tot[0]:
with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp: f1score_tot[0] = f1score
json.dump(_config, fp) # encode dict into JSON model_dir = os.path.join(dir_out,'model_best')
''' model.save(model_dir)
if f1score>f1score_tot[0]: '''
f1score_tot[0] = f1score
model_dir = os.path.join(dir_out,'model_best')
model.save(model_dir)
'''

View file

@ -1,6 +1,6 @@
sacred sacred
seaborn seaborn
numpy <1.24.0 numpy
tqdm tqdm
imutils imutils
scipy scipy