From f03124f747db7edef03d968e1b10db0e7638850d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Feb 2026 11:58:50 +0100 Subject: [PATCH] =?UTF-8?q?training.train:=20simplify+fix=20classification?= =?UTF-8?q?=20data=20loaders=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - unify `generate_data_from_folder_training` w/ `..._evaluation` - instead of recreating array after every batch, just zero out - cast image results to uint8 instead of uint16 - cast categorical results to float instead of int --- src/eynollah/training/train.py | 15 ++++--- src/eynollah/training/utils.py | 78 ++++++++-------------------------- 2 files changed, 25 insertions(+), 68 deletions(-) diff --git a/src/eynollah/training/train.py b/src/eynollah/training/train.py index effc920..0f8d0e9 100644 --- a/src/eynollah/training/train.py +++ b/src/eynollah/training/train.py @@ -430,13 +430,13 @@ def run(_config, metrics=['accuracy', F1Score(average='macro', name='f1')]) list_classes = list(classification_classes_name.values()) - trainXY = generate_data_from_folder_training( - dir_train, n_batch, input_height, input_width, n_classes, list_classes) - testXY = generate_data_from_folder_evaluation( - dir_eval, input_height, input_width, n_classes, list_classes) + trainXY = generate_data_from_folder( + dir_train, n_batch, input_height, input_width, n_classes, list_classes, shuffle=True) + testXY = generate_data_from_folder( + dir_eval, n_batch, input_height, input_width, n_classes, list_classes) + epoch_size_train = return_number_of_total_training_data(dir_train) + epoch_size_eval = return_number_of_total_training_data(dir_eval) - y_tot = np.zeros((testX.shape[0], n_classes)) - num_rows = return_number_of_total_training_data(dir_train) callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False), SaveWeightsAfterSteps(0, dir_output, _config, monitor='val_f1', @@ -444,9 +444,10 @@ def run(_config, mode='max')] history = model.fit(trainXY, - steps_per_epoch=num_rows / n_batch, + steps_per_epoch=epoch_size_train // n_batch, #class_weight=weights) validation_data=testXY, + validation_steps=epoch_size_eval // n_batch, verbose=1, epochs=n_epochs, callbacks=callbacks, diff --git a/src/eynollah/training/utils.py b/src/eynollah/training/utils.py index 61b2536..5b25a4f 100644 --- a/src/eynollah/training/utils.py +++ b/src/eynollah/training/utils.py @@ -166,50 +166,7 @@ def return_number_of_total_training_data(path_classes): -def generate_data_from_folder_evaluation(path_classes, height, width, n_classes, list_classes): - #sub_classes = os.listdir(path_classes) - #n_classes = len(sub_classes) - all_imgs = [] - labels = [] - #dicts =dict() - #indexer= 0 - for indexer, sub_c in enumerate(list_classes): - sub_files = os.listdir(os.path.join(path_classes,sub_c )) - sub_files = [os.path.join(path_classes,sub_c )+'/' + x for x in sub_files] - #print( os.listdir(os.path.join(path_classes,sub_c )) ) - all_imgs = all_imgs + sub_files - sub_labels = list( np.zeros( len(sub_files) ) +indexer ) - - #print( len(sub_labels) ) - labels = labels + sub_labels - #dicts[sub_c] = indexer - #indexer +=1 - - - categories = to_categorical(range(n_classes)).astype(np.int16)#[ [1 , 0, 0 , 0 , 0 , 0] , [0 , 1, 0 , 0 , 0 , 0] , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0] , [0 , 0, 0 , 0 , 0 , 1] ] - ret_x= np.zeros((len(labels), height,width, 3)).astype(np.int16) - ret_y= np.zeros((len(labels), n_classes)).astype(np.int16) - - #print(all_imgs) - for i in range(len(all_imgs)): - row = all_imgs[i] - #####img = cv2.imread(row, 0) - #####img= resize_image (img, height, width) - #####img = img.astype(np.uint16) - #####ret_x[i, :,:,0] = img[:,:] - #####ret_x[i, :,:,1] = img[:,:] - #####ret_x[i, :,:,2] = img[:,:] - - img = cv2.imread(row) - img= resize_image (img, height, width) - img = img.astype(np.uint16) - ret_x[i, :,:] = img[:,:,:] - - ret_y[i, :] = categories[ int( labels[i] ) ][:] - - return ret_x/255., ret_y - -def generate_data_from_folder_training(path_classes, batchsize, height, width, n_classes, list_classes): +def generate_data_from_folder(path_classes, batchsize, height, width, n_classes, list_classes, shuffle=False): #sub_classes = os.listdir(path_classes) #n_classes = len(sub_classes) @@ -228,43 +185,42 @@ def generate_data_from_folder_training(path_classes, batchsize, height, width, n labels = labels + sub_labels #dicts[sub_c] = indexer #indexer +=1 - - ids = np.array(range(len(labels))) - random.shuffle(ids) - - shuffled_labels = np.array(labels)[ids] - shuffled_files = np.array(all_imgs)[ids] + + if shuffle: + ids = np.array(range(len(labels))) + random.shuffle(ids) + labels = np.array(labels)[ids] + all_imgs = np.array(all_imgs)[ids] + categories = to_categorical(range(n_classes)).astype(np.int16)#[ [1 , 0, 0 , 0 , 0 , 0] , [0 , 1, 0 , 0 , 0 , 0] , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0] , [0 , 0, 0 , 0 , 0 , 1] ] - ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16) - ret_y= np.zeros((batchsize, n_classes)).astype(np.int16) + ret_x= np.zeros((batchsize, height,width, 3)).astype(np.uint8) + ret_y= np.zeros((batchsize, n_classes)).astype(float) batchcount = 0 while True: - for i in range(len(shuffled_files)): - row = shuffled_files[i] - #print(row) - ###img = cv2.imread(row, 0) + for lab, img in zip(labels, all_imgs): + ###img = cv2.imread(img, 0) ###img= resize_image (img, height, width) ###img = img.astype(np.uint16) ###ret_x[batchcount, :,:,0] = img[:,:] ###ret_x[batchcount, :,:,1] = img[:,:] ###ret_x[batchcount, :,:,2] = img[:,:] - img = cv2.imread(row) + img = cv2.imread(img) img= resize_image (img, height, width) img = img.astype(np.uint16) ret_x[batchcount, :,:,:] = img[:,:,:] #print(int(shuffled_labels[i]) ) #print( categories[int(shuffled_labels[i])] ) - ret_y[batchcount, :] = categories[ int( shuffled_labels[i] ) ][:] + ret_y[batchcount, :] = categories[int(lab)][:] batchcount+=1 if batchcount>=batchsize: - ret_x = ret_x/255. + ret_x = ret_x//255 yield ret_x, ret_y - ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16) - ret_y= np.zeros((batchsize, n_classes)).astype(np.int16) + ret_x[:] = 0 + ret_y[:] = 0 batchcount = 0 def do_brightening(img, factor):