Transformer+CNN structure is added to vision transformer type

2025-06-09 20:00:05 +02:00 · 2024-06-12 17:39:57 +02:00 · 2024-06-12 17:39:57 +02:00 · 743f2e97d6
commit 743f2e97d6
parent f1fd74c7eb
3 changed files with 176 additions and 39 deletions
--- a/config_params.json
+++ b/config_params.json
@ -2,9 +2,9 @@
    "backbone_type" : "transformer",
    "task": "binarization",
    "n_classes" : 2,
-    "n_epochs" : 1,
+    "n_epochs" : 2,
    "input_height" : 224,
-    "input_width" : 672,
+    "input_width" : 224,
    "weight_decay" : 1e-6,
    "n_batch" : 1,
    "learning_rate": 1e-4,
@ -22,10 +22,14 @@
    "scaling_flip" : false,
    "rotation": false,
    "rotation_not_90": false,
-    "transformer_num_patches_xy": [7, 7],
-    "transformer_patchsize_x": 3,
-    "transformer_patchsize_y": 1,
-    "transformer_projection_dim": 192,
+    "transformer_num_patches_xy": [56, 56],
+    "transformer_patchsize_x": 4,
+    "transformer_patchsize_y": 4,
+    "transformer_projection_dim": 64,
+    "transformer_mlp_head_units": [128, 64],
+    "transformer_layers": 1,
+    "transformer_num_heads": 1,
+    "transformer_cnn_first": false,
    "blur_k" : ["blur","guass","median"],
    "scales" : [0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.4],
    "brightness" : [1.3, 1.5, 1.7, 2],