training.train: simplify+fix classification data loaders…

- unify `generate_data_from_folder_training` w/ `..._evaluation` - instead of recreating array after every batch, just zero out - cast image results to uint8 instead of uint16 - cast categorical results to float instead of int
2026-07-13 23:29:15 +02:00 · 2026-02-05 11:58:50 +01:00 · 2026-02-05 11:58:50 +01:00 · f03124f747
commit f03124f747
parent 82d649061a
2 changed files with 25 additions and 68 deletions
--- a/src/eynollah/training/train.py
+++ b/src/eynollah/training/train.py
@ -430,13 +430,13 @@ def run(_config,
                      metrics=['accuracy', F1Score(average='macro', name='f1')])

        list_classes = list(classification_classes_name.values())
-        trainXY = generate_data_from_folder_training(
-            dir_train, n_batch, input_height, input_width, n_classes, list_classes)
-        testXY = generate_data_from_folder_evaluation(
-            dir_eval, input_height, input_width, n_classes, list_classes)
+        trainXY = generate_data_from_folder(
+            dir_train, n_batch, input_height, input_width, n_classes, list_classes, shuffle=True)
+        testXY = generate_data_from_folder(
+            dir_eval, n_batch, input_height, input_width, n_classes, list_classes)
+        epoch_size_train = return_number_of_total_training_data(dir_train)
+        epoch_size_eval = return_number_of_total_training_data(dir_eval)

-        y_tot = np.zeros((testX.shape[0], n_classes))
-        num_rows = return_number_of_total_training_data(dir_train)
        callbacks = [TensorBoard(os.path.join(dir_output, 'logs'), write_graph=False),
                     SaveWeightsAfterSteps(0, dir_output, _config,
                                           monitor='val_f1',
@ -444,9 +444,10 @@ def run(_config,
                                           mode='max')]
        
        history = model.fit(trainXY,
-                            steps_per_epoch=num_rows / n_batch,
+                            steps_per_epoch=epoch_size_train // n_batch,
                            #class_weight=weights)
                            validation_data=testXY,
+                            validation_steps=epoch_size_eval // n_batch,
                            verbose=1,
                            epochs=n_epochs,
                            callbacks=callbacks,
--- a/src/eynollah/training/utils.py
+++ b/src/eynollah/training/utils.py
@ -166,50 +166,7 @@ def return_number_of_total_training_data(path_classes):
        
    
    
-def generate_data_from_folder_evaluation(path_classes, height, width, n_classes, list_classes):
-    #sub_classes = os.listdir(path_classes)
-    #n_classes = len(sub_classes)
-    all_imgs = []
-    labels = []
-    #dicts =dict()
-    #indexer= 0
-    for indexer, sub_c in enumerate(list_classes):
-        sub_files =  os.listdir(os.path.join(path_classes,sub_c  )) 
-        sub_files = [os.path.join(path_classes,sub_c  )+'/' + x for x in sub_files]
-        #print(     os.listdir(os.path.join(path_classes,sub_c  ))     )
-        all_imgs = all_imgs + sub_files
-        sub_labels = list( np.zeros( len(sub_files) ) +indexer )
-
-        #print( len(sub_labels) )
-        labels = labels + sub_labels
-        #dicts[sub_c] = indexer
-        #indexer +=1 
-        
-
-    categories =  to_categorical(range(n_classes)).astype(np.int16)#[  [1 , 0, 0 , 0 , 0 , 0]  , [0 , 1, 0 , 0 , 0 , 0]  , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0]  , [0 , 0, 0 , 0 , 0 , 1] ]
-    ret_x= np.zeros((len(labels), height,width, 3)).astype(np.int16)
-    ret_y= np.zeros((len(labels), n_classes)).astype(np.int16)
-    
-    #print(all_imgs)
-    for i in range(len(all_imgs)):
-        row = all_imgs[i]
-        #####img = cv2.imread(row, 0)
-        #####img= resize_image (img, height, width)
-        #####img = img.astype(np.uint16)
-        #####ret_x[i, :,:,0] = img[:,:]
-        #####ret_x[i, :,:,1] = img[:,:]
-        #####ret_x[i, :,:,2] = img[:,:]
-        
-        img = cv2.imread(row)
-        img= resize_image (img, height, width)
-        img = img.astype(np.uint16)
-        ret_x[i, :,:] = img[:,:,:]
-        
-        ret_y[i, :] =  categories[ int( labels[i] ) ][:]
-    
-    return ret_x/255., ret_y
-
-def generate_data_from_folder_training(path_classes, batchsize, height, width, n_classes, list_classes):
+def generate_data_from_folder(path_classes, batchsize, height, width, n_classes, list_classes, shuffle=False):
    #sub_classes = os.listdir(path_classes)
    #n_classes = len(sub_classes)

@ -228,43 +185,42 @@ def generate_data_from_folder_training(path_classes, batchsize, height, width, n
        labels = labels + sub_labels
        #dicts[sub_c] = indexer
        #indexer +=1 
-        
-    ids = np.array(range(len(labels)))
-    random.shuffle(ids)
-    
-    shuffled_labels = np.array(labels)[ids]
-    shuffled_files = np.array(all_imgs)[ids]
+
+    if shuffle:
+        ids = np.array(range(len(labels)))
+        random.shuffle(ids)
+        labels = np.array(labels)[ids]
+        all_imgs = np.array(all_imgs)[ids]
+
    categories = to_categorical(range(n_classes)).astype(np.int16)#[  [1 , 0, 0 , 0 , 0 , 0]  , [0 , 1, 0 , 0 , 0 , 0]  , [0 , 0, 1 , 0 , 0 , 0] , [0 , 0, 0 , 1 , 0 , 0] , [0 , 0, 0 , 0 , 1 , 0]  , [0 , 0, 0 , 0 , 0 , 1] ]
-    ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16)
-    ret_y= np.zeros((batchsize, n_classes)).astype(np.int16)
+    ret_x= np.zeros((batchsize, height,width, 3)).astype(np.uint8)
+    ret_y= np.zeros((batchsize, n_classes)).astype(float)
    batchcount = 0
    while True:
-        for i in range(len(shuffled_files)):
-            row = shuffled_files[i]
-            #print(row)
-            ###img = cv2.imread(row, 0)
+        for lab, img in zip(labels, all_imgs):
+            ###img = cv2.imread(img, 0)
            ###img= resize_image (img, height, width)
            ###img = img.astype(np.uint16)
            ###ret_x[batchcount, :,:,0] = img[:,:]
            ###ret_x[batchcount, :,:,1] = img[:,:]
            ###ret_x[batchcount, :,:,2] = img[:,:]
            
-            img = cv2.imread(row)
+            img = cv2.imread(img)
            img= resize_image (img, height, width)
            img = img.astype(np.uint16)
            ret_x[batchcount, :,:,:] = img[:,:,:]
            
            #print(int(shuffled_labels[i]) )
            #print( categories[int(shuffled_labels[i])] )
-            ret_y[batchcount, :] =  categories[ int( shuffled_labels[i] ) ][:]
+            ret_y[batchcount, :] =  categories[int(lab)][:]
            
            batchcount+=1
            
            if batchcount>=batchsize:
-                ret_x = ret_x/255.
+                ret_x = ret_x//255
                yield ret_x, ret_y
-                ret_x= np.zeros((batchsize, height,width, 3)).astype(np.int16)
-                ret_y= np.zeros((batchsize, n_classes)).astype(np.int16)
+                ret_x[:] = 0
+                ret_y[:] = 0
                batchcount = 0

 def do_brightening(img, factor):