training.train: simplify+fix classification data loaders…

- unify `generate_data_from_folder_training` w/ `..._evaluation` - instead of recreating array after every batch, just zero out - cast image results to uint8 instead of uint16 - cast categorical results to float instead of int
2026-02-20 16:32:03 +01:00 · 2026-02-05 11:58:50 +01:00 · 2026-02-05 11:58:50 +01:00 · f03124f747
commit f03124f747
parent 82d649061a
2 changed files with 25 additions and 68 deletions
--- a/src/eynollah/training/train.py
+++ b/src/eynollah/training/train.py
@ -430,13 +430,13 @@ def run(_config,
                      metrics=['accuracy', F1Score(average='macro', name='f1')])
        list_classes = list(classification_classes_name.values())
-        trainXY = generate_data_from_folder_training(
+        trainXY = generate_data_from_folder(
-            dir_train, n_batch, input_height, input_width, n_classes, list_classes)
+            dir_train, n_batch, input_height, input_width, n_classes, list_classes, shuffle=True)
-        testXY = generate_data_from_folder_evaluation(
+        testXY = generate_data_from_folder(
-            dir_eval, input_height, input_width, n_classes, list_classes)
+            dir_eval, n_batch, input_height, input_width, n_classes, list_classes)
        epoch_size_train = return_number_of_total_training_data(dir_train)
        epoch_size_eval = return_number_of_total_training_data(dir_eval)
        y_tot = np.zeros((testX.shape[0], n_classes))
        num_rows = return_number_of_total_training_data(dir_train)
        callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False),
                     SaveWeightsAfterSteps(0, dir_output, _config,
                                           monitor='val_f1',
@ -444,9 +444,10 @@ def run(_config,
                                           mode='max')]
        history = model.fit(trainXY,
-                            steps_per_epoch=num_rows / n_batch,
+                            steps_per_epoch=epoch_size_train // n_batch,
                            #class_weight=weights)
                            validation_data=testXY,
                            validation_steps=epoch_size_eval // n_batch,
                            verbose=1,
                            epochs=n_epochs,
                            callbacks=callbacks,
--- a/src/eynollah/training/utils.py
+++ b/src/eynollah/training/utils.py
@ -166,50 +166,7 @@ def return_number_of_total_training_data(path_classes):
-def generate_data_from_folder_evaluation(path_classes, height, width, n_classes, list_classes):
+def generate_data_from_folder(path_classes, batchsize, height, width, n_classes, list_classes, shuffle=False):
    #sub_classes = os.listdir(path_classes)
    #n_classes = len(sub_classes)
    all_imgs = []
    labels = []
    #dicts =dict()
    #indexer= 0
    for indexer, sub_c in enumerate(list_classes):
        sub_files =  os.listdir(os.path.join(path_classes,sub_c  )) 
        sub_files = [os.path.join(path_classes,sub_c  )+'/' + x for x in sub_files]
        #print(     os.listdir(os.path.join(path_classes,sub_c  ))     )
        all_imgs = all_imgs + sub_files
        sub_labels = list( np.zeros( len(sub_files) ) +indexer )
        #print( len(sub_labels) )
        labels = labels + sub_labels
        #dicts[sub_c] = indexer
        #indexer +=1 
    categories =  to_categorical(range(n_classes)).astype(np.int16)#[  [1 , 0, 0 , 0 , 0 , 0]  , [0 , 1, 0 , 0 , 0 , 0]  , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0]  , [0 , 0, 0 , 0 , 0 , 1] ]
    ret_x= np.zeros((len(labels), height,width, 3)).astype(np.int16)
    ret_y= np.zeros((len(labels), n_classes)).astype(np.int16)
    #print(all_imgs)
    for i in range(len(all_imgs)):
        row = all_imgs[i]
        #####img = cv2.imread(row, 0)
        #####img= resize_image (img, height, width)
        #####img = img.astype(np.uint16)
        #####ret_x[i, :,:,0] = img[:,:]
        #####ret_x[i, :,:,1] = img[:,:]
        #####ret_x[i, :,:,2] = img[:,:]
        img = cv2.imread(row)
        img= resize_image (img, height, width)
        img = img.astype(np.uint16)
        ret_x[i, :,:] = img[:,:,:]
        ret_y[i, :] =  categories[ int( labels[i] ) ][:]
    return ret_x/255., ret_y
 def generate_data_from_folder_training(path_classes, batchsize, height, width, n_classes, list_classes):
    #sub_classes = os.listdir(path_classes)
    #n_classes = len(sub_classes)
@ -228,43 +185,42 @@ def generate_data_from_folder_training(path_classes, batchsize, height, width, n
        labels = labels + sub_labels
        #dicts[sub_c] = indexer
        #indexer +=1 
-        
+
-    ids = np.array(range(len(labels)))
+    if shuffle:
-    random.shuffle(ids)
+        ids = np.array(range(len(labels)))
-    
+        random.shuffle(ids)
-    shuffled_labels = np.array(labels)[ids]
+        labels = np.array(labels)[ids]
-    shuffled_files = np.array(all_imgs)[ids]
+        all_imgs = np.array(all_imgs)[ids]
    categories = to_categorical(range(n_classes)).astype(np.int16)#[  [1 , 0, 0 , 0 , 0 , 0]  , [0 , 1, 0 , 0 , 0 , 0]  , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0]  , [0 , 0, 0 , 0 , 0 , 1] ]
-    ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16)
+    ret_x= np.zeros((batchsize, height,width, 3)).astype(np.uint8)
-    ret_y= np.zeros((batchsize, n_classes)).astype(np.int16)
+    ret_y= np.zeros((batchsize, n_classes)).astype(float)
    batchcount = 0
    while True:
-        for i in range(len(shuffled_files)):
+        for lab, img in zip(labels, all_imgs):
-            row = shuffled_files[i]
+            ###img = cv2.imread(img, 0)
            #print(row)
            ###img = cv2.imread(row, 0)
            ###img= resize_image (img, height, width)
            ###img = img.astype(np.uint16)
            ###ret_x[batchcount, :,:,0] = img[:,:]
            ###ret_x[batchcount, :,:,1] = img[:,:]
            ###ret_x[batchcount, :,:,2] = img[:,:]
-            img = cv2.imread(row)
+            img = cv2.imread(img)
            img= resize_image (img, height, width)
            img = img.astype(np.uint16)
            ret_x[batchcount, :,:,:] = img[:,:,:]
            #print(int(shuffled_labels[i]) )
            #print( categories[int(shuffled_labels[i])] )
-            ret_y[batchcount, :] =  categories[ int( shuffled_labels[i] ) ][:]
+            ret_y[batchcount, :] =  categories[int(lab)][:]
            batchcount+=1
            if batchcount>=batchsize:
-                ret_x = ret_x/255.
+                ret_x = ret_x//255
                yield ret_x, ret_y
-                ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16)
+                ret_x[:] = 0
-                ret_y= np.zeros((batchsize, n_classes)).astype(np.int16)
+                ret_y[:] = 0
                batchcount = 0
 def do_brightening(img, factor):