training.train: simplify+fix classification data loaders…

- unify `generate_data_from_folder_training` w/ `..._evaluation`
- instead of recreating array after every batch, just zero out
- cast image results to uint8 instead of uint16
- cast categorical results to float instead of int
This commit is contained in:
Robert Sachunsky 2026-02-05 11:58:50 +01:00
parent 82d649061a
commit f03124f747
2 changed files with 25 additions and 68 deletions

View file

@ -430,13 +430,13 @@ def run(_config,
metrics=['accuracy', F1Score(average='macro', name='f1')]) metrics=['accuracy', F1Score(average='macro', name='f1')])
list_classes = list(classification_classes_name.values()) list_classes = list(classification_classes_name.values())
trainXY = generate_data_from_folder_training( trainXY = generate_data_from_folder(
dir_train, n_batch, input_height, input_width, n_classes, list_classes) dir_train, n_batch, input_height, input_width, n_classes, list_classes, shuffle=True)
testXY = generate_data_from_folder_evaluation( testXY = generate_data_from_folder(
dir_eval, input_height, input_width, n_classes, list_classes) dir_eval, n_batch, input_height, input_width, n_classes, list_classes)
epoch_size_train = return_number_of_total_training_data(dir_train)
epoch_size_eval = return_number_of_total_training_data(dir_eval)
y_tot = np.zeros((testX.shape[0], n_classes))
num_rows = return_number_of_total_training_data(dir_train)
callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False), callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False),
SaveWeightsAfterSteps(0, dir_output, _config, SaveWeightsAfterSteps(0, dir_output, _config,
monitor='val_f1', monitor='val_f1',
@ -444,9 +444,10 @@ def run(_config,
mode='max')] mode='max')]
history = model.fit(trainXY, history = model.fit(trainXY,
steps_per_epoch=num_rows / n_batch, steps_per_epoch=epoch_size_train // n_batch,
#class_weight=weights) #class_weight=weights)
validation_data=testXY, validation_data=testXY,
validation_steps=epoch_size_eval // n_batch,
verbose=1, verbose=1,
epochs=n_epochs, epochs=n_epochs,
callbacks=callbacks, callbacks=callbacks,

View file

@ -166,50 +166,7 @@ def return_number_of_total_training_data(path_classes):
def generate_data_from_folder_evaluation(path_classes, height, width, n_classes, list_classes): def generate_data_from_folder(path_classes, batchsize, height, width, n_classes, list_classes, shuffle=False):
#sub_classes = os.listdir(path_classes)
#n_classes = len(sub_classes)
all_imgs = []
labels = []
#dicts =dict()
#indexer= 0
for indexer, sub_c in enumerate(list_classes):
sub_files = os.listdir(os.path.join(path_classes,sub_c ))
sub_files = [os.path.join(path_classes,sub_c )+'/' + x for x in sub_files]
#print( os.listdir(os.path.join(path_classes,sub_c )) )
all_imgs = all_imgs + sub_files
sub_labels = list( np.zeros( len(sub_files) ) +indexer )
#print( len(sub_labels) )
labels = labels + sub_labels
#dicts[sub_c] = indexer
#indexer +=1
categories = to_categorical(range(n_classes)).astype(np.int16)#[ [1 , 0, 0 , 0 , 0 , 0] , [0 , 1, 0 , 0 , 0 , 0] , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0] , [0 , 0, 0 , 0 , 0 , 1] ]
ret_x= np.zeros((len(labels), height,width, 3)).astype(np.int16)
ret_y= np.zeros((len(labels), n_classes)).astype(np.int16)
#print(all_imgs)
for i in range(len(all_imgs)):
row = all_imgs[i]
#####img = cv2.imread(row, 0)
#####img= resize_image (img, height, width)
#####img = img.astype(np.uint16)
#####ret_x[i, :,:,0] = img[:,:]
#####ret_x[i, :,:,1] = img[:,:]
#####ret_x[i, :,:,2] = img[:,:]
img = cv2.imread(row)
img= resize_image (img, height, width)
img = img.astype(np.uint16)
ret_x[i, :,:] = img[:,:,:]
ret_y[i, :] = categories[ int( labels[i] ) ][:]
return ret_x/255., ret_y
def generate_data_from_folder_training(path_classes, batchsize, height, width, n_classes, list_classes):
#sub_classes = os.listdir(path_classes) #sub_classes = os.listdir(path_classes)
#n_classes = len(sub_classes) #n_classes = len(sub_classes)
@ -228,43 +185,42 @@ def generate_data_from_folder_training(path_classes, batchsize, height, width, n
labels = labels + sub_labels labels = labels + sub_labels
#dicts[sub_c] = indexer #dicts[sub_c] = indexer
#indexer +=1 #indexer +=1
ids = np.array(range(len(labels))) if shuffle:
random.shuffle(ids) ids = np.array(range(len(labels)))
random.shuffle(ids)
shuffled_labels = np.array(labels)[ids] labels = np.array(labels)[ids]
shuffled_files = np.array(all_imgs)[ids] all_imgs = np.array(all_imgs)[ids]
categories = to_categorical(range(n_classes)).astype(np.int16)#[ [1 , 0, 0 , 0 , 0 , 0] , [0 , 1, 0 , 0 , 0 , 0] , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0] , [0 , 0, 0 , 0 , 0 , 1] ] categories = to_categorical(range(n_classes)).astype(np.int16)#[ [1 , 0, 0 , 0 , 0 , 0] , [0 , 1, 0 , 0 , 0 , 0] , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0] , [0 , 0, 0 , 0 , 0 , 1] ]
ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16) ret_x= np.zeros((batchsize, height,width, 3)).astype(np.uint8)
ret_y= np.zeros((batchsize, n_classes)).astype(np.int16) ret_y= np.zeros((batchsize, n_classes)).astype(float)
batchcount = 0 batchcount = 0
while True: while True:
for i in range(len(shuffled_files)): for lab, img in zip(labels, all_imgs):
row = shuffled_files[i] ###img = cv2.imread(img, 0)
#print(row)
###img = cv2.imread(row, 0)
###img= resize_image (img, height, width) ###img= resize_image (img, height, width)
###img = img.astype(np.uint16) ###img = img.astype(np.uint16)
###ret_x[batchcount, :,:,0] = img[:,:] ###ret_x[batchcount, :,:,0] = img[:,:]
###ret_x[batchcount, :,:,1] = img[:,:] ###ret_x[batchcount, :,:,1] = img[:,:]
###ret_x[batchcount, :,:,2] = img[:,:] ###ret_x[batchcount, :,:,2] = img[:,:]
img = cv2.imread(row) img = cv2.imread(img)
img= resize_image (img, height, width) img= resize_image (img, height, width)
img = img.astype(np.uint16) img = img.astype(np.uint16)
ret_x[batchcount, :,:,:] = img[:,:,:] ret_x[batchcount, :,:,:] = img[:,:,:]
#print(int(shuffled_labels[i]) ) #print(int(shuffled_labels[i]) )
#print( categories[int(shuffled_labels[i])] ) #print( categories[int(shuffled_labels[i])] )
ret_y[batchcount, :] = categories[ int( shuffled_labels[i] ) ][:] ret_y[batchcount, :] = categories[int(lab)][:]
batchcount+=1 batchcount+=1
if batchcount>=batchsize: if batchcount>=batchsize:
ret_x = ret_x/255. ret_x = ret_x//255
yield ret_x, ret_y yield ret_x, ret_y
ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16) ret_x[:] = 0
ret_y= np.zeros((batchsize, n_classes)).astype(np.int16) ret_y[:] = 0
batchcount = 0 batchcount = 0
def do_brightening(img, factor): def do_brightening(img, factor):