Introduce model_zoo to Eynollah_ocr

2025-11-16 09:24:19 +01:00 · 2025-10-20 21:14:52 +02:00 · 2025-10-20 21:14:52 +02:00 · 062f317d2e
commit 062f317d2e
parent d609a532bf
4 changed files with 149 additions and 138 deletions
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -271,12 +271,12 @@ class Eynollah:
        if self.ocr:
            if self.tr:
                loadable.append(('ocr', 'tr'))
-                loadable.append(('ocr_tr_processor', 'tr'))
+                loadable.append(('trocr_processor', 'tr'))
            else:
                loadable.append('ocr')
                loadable.append('num_to_char')

-        self.models = self.model_zoo.load_models(*loadable)
+        self.model_zoo.load_models(*loadable)

    def __del__(self):
        if hasattr(self, 'executor') and getattr(self, 'executor'):
@ -338,8 +338,8 @@ class Eynollah:
    def predict_enhancement(self, img):
        self.logger.debug("enter predict_enhancement")

-        img_height_model = self.models["enhancement"].layers[-1].output_shape[1]
-        img_width_model = self.models["enhancement"].layers[-1].output_shape[2]
+        img_height_model = self.model_zoo.get("enhancement").layers[-1].output_shape[1]
+        img_width_model = self.model_zoo.get("enhancement").layers[-1].output_shape[2]
        if img.shape[0] < img_height_model:
            img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
        if img.shape[1] < img_width_model:
@ -380,7 +380,7 @@ class Eynollah:
                    index_y_d = img_h - img_height_model

                img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
-                label_p_pred = self.models["enhancement"].predict(img_patch, verbose=0)
+                label_p_pred = self.model_zoo.get("enhancement").predict(img_patch, verbose=0)
                seg = label_p_pred[0, :, :, :] * 255

                if i == 0 and j == 0:
@ -555,7 +555,7 @@ class Eynollah:
            img_in[0, :, :, 1] = img_1ch[:, :]
            img_in[0, :, :, 2] = img_1ch[:, :]

-        label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0)
+        label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
        num_col = np.argmax(label_p_pred[0]) + 1

        self.logger.info("Found %s columns (%s)", num_col, label_p_pred)
@ -573,7 +573,7 @@ class Eynollah:
        self.logger.info("Detected %s DPI", dpi)
        if self.input_binary:
            img = self.imread()
-            prediction_bin = self.do_prediction(True, img, self.models["binarization"], n_batch_inference=5)
+            prediction_bin = self.do_prediction(True, img, self.model_zoo.get("binarization"), n_batch_inference=5)
            prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
            prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
            img= np.copy(prediction_bin)
@ -613,7 +613,7 @@ class Eynollah:
                img_in[0, :, :, 1] = img_1ch[:, :]
                img_in[0, :, :, 2] = img_1ch[:, :]

-            label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0)
+            label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
            num_col = np.argmax(label_p_pred[0]) + 1
            
        elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
@ -634,7 +634,7 @@ class Eynollah:
                img_in[0, :, :, 1] = img_1ch[:, :]
                img_in[0, :, :, 2] = img_1ch[:, :]

-            label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0)
+            label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
            num_col = np.argmax(label_p_pred[0]) + 1

            if num_col > self.num_col_upper:
@ -1486,7 +1486,7 @@ class Eynollah:
        cont_page = []
        if not self.ignore_page_extraction:
            img = np.copy(self.image)#cv2.GaussianBlur(self.image, (5, 5), 0)
-            img_page_prediction = self.do_prediction(False, img, self.models["page"])
+            img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
            imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(imgray, 0, 255, 0)
            ##thresh = cv2.dilate(thresh, KERNEL, iterations=3)
@ -1534,7 +1534,7 @@ class Eynollah:
            else:
                img = self.imread()
            img = cv2.GaussianBlur(img, (5, 5), 0)
-            img_page_prediction = self.do_prediction(False, img, self.models["page"])
+            img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))

            imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(imgray, 0, 255, 0)
@ -1560,7 +1560,7 @@ class Eynollah:
        self.logger.debug("enter extract_text_regions")
        img_height_h = img.shape[0]
        img_width_h = img.shape[1]
-        model_region = self.models["region_fl"] if patches else self.models["region_fl_np"]
+        model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np")

        if self.light_version:
            thresholding_for_fl_light_version = True
@ -1595,7 +1595,7 @@ class Eynollah:
        self.logger.debug("enter extract_text_regions")
        img_height_h = img.shape[0]
        img_width_h = img.shape[1]
-        model_region = self.models["region_fl"] if patches else self.models["region_fl_np"]
+        model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np")

        if not patches:
            img = otsu_copy_binary(img)
@ -1816,14 +1816,14 @@ class Eynollah:
        img_w = img_org.shape[1]
        img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w))

-        prediction_textline = self.do_prediction(use_patches, img, self.models["textline"],
+        prediction_textline = self.do_prediction(use_patches, img, self.model_zoo.get("textline"),
                                                 marginal_of_patch_percent=0.15,
                                                 n_batch_inference=3,
                                                 thresholding_for_artificial_class_in_light_version=self.textline_light,
                                                 threshold_art_class_textline=self.threshold_art_class_textline)
        #if not self.textline_light:
            #if num_col_classifier==1:
-                #prediction_textline_nopatch = self.do_prediction(False, img, self.models["textline"])
+                #prediction_textline_nopatch = self.do_prediction(False, img, self.model_zoo.get_model("textline"))
                #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0

        prediction_textline = resize_image(prediction_textline, img_h, img_w)
@ -1894,7 +1894,7 @@ class Eynollah:
            
        #cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0])

-        prediction_textline_longshot = self.do_prediction(False, img, self.models["textline"])
+        prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
        prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
        
        
@ -1927,7 +1927,7 @@ class Eynollah:
        img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
        img_resized = resize_image(img,img_h_new, img_w_new )

-        prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.models["region"])
+        prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_zoo.get("region"))

        prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
        image_page, page_coord, cont_page = self.extract_page()
@ -2043,7 +2043,7 @@ class Eynollah:
        #if self.input_binary:
            #img_bin = np.copy(img_resized)
        ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
-            ###prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5)
+            ###prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get_model("binarization"), n_batch_inference=5)

            ####print("inside bin ", time.time()-t_bin)
            ###prediction_bin=prediction_bin[:,:,0]
@ -2058,7 +2058,7 @@ class Eynollah:
        ###else:
            ###img_bin = np.copy(img_resized)
        if (self.ocr and self.tr) and not self.input_binary:
-            prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5)
+            prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get("binarization"), n_batch_inference=5)
            prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
            prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
            prediction_bin = prediction_bin.astype(np.uint16)
@ -2090,14 +2090,14 @@ class Eynollah:
                self.logger.debug("resized to %dx%d for %d cols",
                                  img_resized.shape[1], img_resized.shape[0], num_col_classifier)
                prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
-                    True, img_resized, self.models["region_1_2"], n_batch_inference=1,
+                    True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=1,
                    thresholding_for_some_classes_in_light_version=True,
                    threshold_art_class_layout=self.threshold_art_class_layout)
            else:
                prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
                confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
                prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
-                    False, self.image_page_org_size, self.models["region_1_2"], n_batch_inference=1,
+                    False, self.image_page_org_size, self.model_zoo.get("region_1_2"), n_batch_inference=1,
                    thresholding_for_artificial_class_in_light_version=True,
                    threshold_art_class_layout=self.threshold_art_class_layout)
                ys = slice(*self.page_coord[0:2])
@ -2111,10 +2111,10 @@ class Eynollah:
            self.logger.debug("resized to %dx%d (new_h=%d) for %d cols",
                              img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier)
            prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
-                True, img_resized, self.models["region_1_2"], n_batch_inference=2,
+                True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=2,
                thresholding_for_some_classes_in_light_version=True,
                threshold_art_class_layout=self.threshold_art_class_layout)
-        ###prediction_regions_org = self.do_prediction(True, img_bin, self.models["region"],
+        ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_zoo.get_model("region"),
        ###n_batch_inference=3,
        ###thresholding_for_some_classes_in_light_version=True)
        #print("inside 3 ", time.time()-t_in)
@ -2194,7 +2194,7 @@ class Eynollah:
        ratio_x=1

        img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
-        prediction_regions_org_y = self.do_prediction(True, img, self.models["region"])
+        prediction_regions_org_y = self.do_prediction(True, img, self.model_zoo.get("region"))
        prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h )

        #plt.imshow(prediction_regions_org_y[:,:,0])
@ -2209,7 +2209,7 @@ class Eynollah:
            _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
            img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))

-            prediction_regions_org = self.do_prediction(True, img, self.models["region"])
+            prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
            prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )

            prediction_regions_org=prediction_regions_org[:,:,0]
@ -2217,7 +2217,7 @@ class Eynollah:

            img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))

-            prediction_regions_org2 = self.do_prediction(True, img, self.models["region_p2"], marginal_of_patch_percent=0.2)
+            prediction_regions_org2 = self.do_prediction(True, img, self.model_zoo.get("region_p2"), marginal_of_patch_percent=0.2)
            prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )

            mask_zeros2 = (prediction_regions_org2[:,:,0] == 0)
@ -2241,7 +2241,7 @@ class Eynollah:
                if self.input_binary:
                    prediction_bin = np.copy(img_org)
                else:
-                    prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5)
+                    prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
                    prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
                    prediction_bin = 255 * (prediction_bin[:,:,0]==0)
                    prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
@ -2251,7 +2251,7 @@ class Eynollah:

                img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))

-                prediction_regions_org = self.do_prediction(True, img, self.models["region"])
+                prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
                prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
                prediction_regions_org=prediction_regions_org[:,:,0]

@ -2278,7 +2278,7 @@ class Eynollah:
        except:
            if self.input_binary:
                prediction_bin = np.copy(img_org)
-                prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5)
+                prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
                prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
                prediction_bin = 255 * (prediction_bin[:,:,0]==0)
                prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
@ -2289,14 +2289,14 @@ class Eynollah:


            img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
-            prediction_regions_org = self.do_prediction(True, img, self.models["region"])
+            prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
            prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
            prediction_regions_org=prediction_regions_org[:,:,0]

            #mask_lines_only=(prediction_regions_org[:,:]==3)*1
            #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))

-            #prediction_regions_org = self.do_prediction(True, img, self.models["region"])
+            #prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get_model("region"))
            #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
            #prediction_regions_org = prediction_regions_org[:,:,0]
            #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
@ -2667,13 +2667,13 @@ class Eynollah:
        img_width_h = img_org.shape[1]
        patches = False
        if self.light_version:
-            prediction_table, _ = self.do_prediction_new_concept(patches, img, self.models["table"])
+            prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_zoo.get("table"))
            prediction_table = prediction_table.astype(np.int16)
            return prediction_table[:,:,0]
        else:
            if num_col_classifier < 4 and num_col_classifier > 2:
-                prediction_table = self.do_prediction(patches, img, self.models["table"])
-                pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"])
+                prediction_table = self.do_prediction(patches, img, self.model_zoo.get("table"))
+                pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_zoo.get("table"))
                pre_updown = cv2.flip(pre_updown, -1)

                prediction_table[:,:,0][pre_updown[:,:,0]==1]=1
@ -2692,8 +2692,8 @@ class Eynollah:
                xs = slice(w_start, w_start + img.shape[1])
                img_new[ys, xs] = img

-                prediction_ext = self.do_prediction(patches, img_new, self.models["table"])
-                pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"])
+                prediction_ext = self.do_prediction(patches, img_new, self.model_zoo.get("table"))
+                pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_zoo.get("table"))
                pre_updown = cv2.flip(pre_updown, -1)

                prediction_table = prediction_ext[ys, xs]
@ -2714,8 +2714,8 @@ class Eynollah:
                xs = slice(w_start, w_start + img.shape[1])
                img_new[ys, xs] = img

-                prediction_ext = self.do_prediction(patches, img_new, self.models["table"])
-                pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"])
+                prediction_ext = self.do_prediction(patches, img_new, self.model_zoo.get("table"))
+                pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_zoo.get("table"))
                pre_updown = cv2.flip(pre_updown, -1)

                prediction_table = prediction_ext[ys, xs]
@ -2727,10 +2727,10 @@ class Eynollah:
                prediction_table = np.zeros(img.shape)
                img_w_half = img.shape[1] // 2

-                pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.models["table"])
-                pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.models["table"])
-                pre_full = self.do_prediction(patches, img[:,:,:], self.models["table"])
-                pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"])
+                pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_zoo.get("table"))
+                pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_zoo.get("table"))
+                pre_full = self.do_prediction(patches, img[:,:,:], self.model_zoo.get("table"))
+                pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_zoo.get("table"))
                pre_updown = cv2.flip(pre_updown, -1)

                prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4)
@ -3522,7 +3522,7 @@ class Eynollah:
                tot_counter += 1
                batch.append(j)
                if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
-                    y_pr = self.models["reading_order"].predict(input_1 , verbose=0)
+                    y_pr = self.model_zoo.get("reading_order").predict(input_1 , verbose=0)
                    for jb, j in enumerate(batch):
                        if y_pr[jb][0]>=0.5:
                            post_list.append(j)
@ -4105,7 +4105,7 @@ class Eynollah:
                gc.collect()
                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
                    image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)),
-                    self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], textline_light=True)
+                    self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), textline_light=True)
            else:
                ocr_all_textlines = None
            
@ -4614,27 +4614,27 @@ class Eynollah:
                if len(all_found_textline_polygons):
                    ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
                        image_page, all_found_textline_polygons, all_box_coord,
-                        self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
+                        self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
                    
                if len(all_found_textline_polygons_marginals_left):
                    ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(
                        image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left,
-                        self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
+                        self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
                    
                if len(all_found_textline_polygons_marginals_right):
                    ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(
                        image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right,
-                        self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
+                        self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
                
                if self.full_layout and len(all_found_textline_polygons):
                    ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(
                        image_page, all_found_textline_polygons_h, all_box_coord_h,
-                        self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
+                        self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
                    
                if self.full_layout and len(polygons_of_drop_capitals):
                    ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(
                        image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)),
-                        self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
+                        self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)

            else:
                if self.light_version:
@ -4646,7 +4646,7 @@ class Eynollah:
                gc.collect()

                torch.cuda.empty_cache()
-                self.models["ocr"].to(self.device)
+                self.model_zoo.get("ocr").to(self.device)

                ind_tot = 0
                #cv2.imwrite('./img_out.png', image_page)
@ -4683,7 +4683,7 @@ class Eynollah:
                        img_croped = img_poly_on_img[y:y+h, x:x+w, :]
                        #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped)
                        text_ocr = self.return_ocr_of_textline_without_common_section(
-                            img_croped, self.models["ocr"], self.models['ocr_tr_processor'], self.device, w, h2w_ratio, ind_tot)
+                            img_croped, self.model_zoo.get("ocr"), self.model_zoo.get("trocr_processor"), self.device, w, h2w_ratio, ind_tot)
                        ocr_textline_in_textregion.append(text_ocr)
                        ind_tot = ind_tot +1
                    ocr_all_textlines.append(ocr_textline_in_textregion)
--- a/src/eynollah/eynollah_ocr.py
+++ b/src/eynollah/eynollah_ocr.py
@ -1,6 +1,6 @@
 # pyright: reportPossiblyUnboundVariable=false

-from logging import getLogger
+from logging import Logger, getLogger
 from typing import Optional
 from pathlib import Path
 import os
@ -8,23 +8,31 @@ import json
 import gc
 import sys
 import math
-import cv2
 import time

 from keras.layers import StringLookup
-
-from eynollah.utils.resize import resize_image
-from eynollah.utils.utils_ocr import break_curved_line_into_small_pieces_and_then_merge, decode_batch_predictions, fit_text_single_line, get_contours_and_bounding_boxes, get_orientation_moments, preprocess_and_resize_image_for_ocrcnn_model, return_textlines_split_if_needed, rotate_image_with_padding
-
-from .utils import is_image_filename
-
+import cv2
 import xml.etree.ElementTree as ET
 import tensorflow as tf
 from keras.models import load_model
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
+from eynollah.model_zoo import EynollahModelZoo
 import torch

+from .utils import is_image_filename
+from .utils.resize import resize_image
+from .utils.utils_ocr import (
+    break_curved_line_into_small_pieces_and_then_merge,
+    decode_batch_predictions,
+    fit_text_single_line,
+    get_contours_and_bounding_boxes,
+    get_orientation_moments,
+    preprocess_and_resize_image_for_ocrcnn_model,
+    return_textlines_split_if_needed,
+    rotate_image_with_padding,
+)
+
 # cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
 if sys.version_info < (3, 10):
    import importlib_resources
@ -43,68 +51,51 @@ class Eynollah_ocr:
        model_name=None,
        dir_xmls=None,
        tr_ocr=False,
-        batch_size=None,
-        export_textline_images_and_text=False,
-        do_not_mask_with_textline_contour=False,
+        batch_size: Optional[int]=None,
+        export_textline_images_and_text: bool=False,
+        do_not_mask_with_textline_contour: bool=False,
        pref_of_dataset=None,
-        min_conf_value_of_textline_text : Optional[float]=None,
-        logger=None,
+        min_conf_value_of_textline_text : float=0.3,
+        logger: Optional[Logger]=None,
    ):
-        self.model_name = model_name
        self.tr_ocr = tr_ocr
        self.export_textline_images_and_text = export_textline_images_and_text
        self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
        self.pref_of_dataset = pref_of_dataset
        self.logger = logger if logger else getLogger('eynollah')
+        self.model_zoo = EynollahModelZoo(basedir=dir_models)
        
-        if not export_textline_images_and_text:
-            if min_conf_value_of_textline_text:
-                self.min_conf_value_of_textline_text = float(min_conf_value_of_textline_text)
+        # TODO: Properly document what 'export_textline_images_and_text' is about
+        if export_textline_images_and_text:
+            self.logger.info("export_textline_images_and_text was set, so no actual models are loaded")
+            return
+
+        self.min_conf_value_of_textline_text = min_conf_value_of_textline_text
+        self.b_s = 2 if batch_size is None and tr_ocr else 8 if batch_size is None else batch_size
+
+        if tr_ocr:
+            self.model_zoo.load_model('trocr_processor', '')
+            if model_name:
+                self.model_zoo.load_model('ocr', 'tr', model_name)
            else:
-                self.min_conf_value_of_textline_text = 0.3
-            if tr_ocr:
-                assert TrOCRProcessor
-                self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
-                self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-                if self.model_name:
-                    self.model_ocr_dir = self.model_name
-                else:
-                    self.model_ocr_dir = dir_models + "/model_eynollah_ocr_trocr_20250919"
-                self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
-                self.model_ocr.to(self.device)
-                if not batch_size:
-                    self.b_s = 2
-                else:
-                    self.b_s = int(batch_size)
-
+                self.model_zoo.load_model('ocr', 'tr')
+            self.model_zoo.get('ocr').to(self.device)
+        else:
+            if model_name:
+                self.model_zoo.load_model('ocr', '', model_name)
            else:
-                if self.model_name:
-                    self.model_ocr_dir = self.model_name
-                else:
-                    self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930"
-                model_ocr = load_model(self.model_ocr_dir , compile=False)
-                
-                self.prediction_model = tf.keras.models.Model(
-                                model_ocr.get_layer(name = "image").input, 
-                                model_ocr.get_layer(name = "dense2").output)
-                if not batch_size:
-                    self.b_s = 8
-                else:
-                    self.b_s = int(batch_size)
-                    
-                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
-                    characters = json.load(config_file)
-                    
-                AUTOTUNE = tf.data.AUTOTUNE
+                self.model_zoo.load_model('ocr', '')
+            self.model_zoo.load_model('num_to_char')
+            self.end_character = len(self.model_zoo.load_model('characters')) + 2

-                # Mapping characters to integers.
-                char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
-
-                # Mapping integers back to original characters.
-                self.num_to_char = StringLookup(
-                    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
-                )
-                self.end_character = len(characters) + 2
+    @property
+    def device(self):
+        if torch.cuda.is_available():
+            self.logger.info("Using GPU acceleration")
+            return torch.device("cuda:0")
+        else:
+            self.logger.info("Using CPU processing")
+            return torch.device("cpu")

    def run(self, overwrite: bool = False,
            dir_in: Optional[str] = None,
@ -119,13 +110,16 @@ class Eynollah_ocr:
                       for image_filename in filter(is_image_filename,
                                                    os.listdir(dir_in))]
        else:
+            assert image_filename
            ls_imgs = [image_filename]

        if self.tr_ocr:
            tr_ocr_input_height_and_width = 384
            for dir_img in ls_imgs:
                file_name = Path(dir_img).stem
+                assert dir_xmls  # FIXME: check the logic
                dir_xml = os.path.join(dir_xmls, file_name+'.xml')
+                assert dir_out # FIXME: check the logic
                out_file_ocr = os.path.join(dir_out, file_name+'.xml')
                
                if os.path.exists(out_file_ocr):
@ -204,10 +198,10 @@ class Eynollah_ocr:
                                            cropped_lines = []
                                            indexer_b_s = 0
                                            
-                                            pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                            pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
                                            generated_ids_merged = self.model_ocr.generate(
                                                pixel_values_merged.to(self.device))
-                                            generated_text_merged = self.processor.batch_decode(
+                                            generated_text_merged = self.model_zoo.get('processor').batch_decode(
                                                generated_ids_merged, skip_special_tokens=True)
                                            
                                            extracted_texts = extracted_texts + generated_text_merged
@ -227,10 +221,10 @@ class Eynollah_ocr:
                                                cropped_lines = []
                                                indexer_b_s = 0
                                                
-                                                pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                                pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
                                                generated_ids_merged = self.model_ocr.generate(
                                                    pixel_values_merged.to(self.device))
-                                                generated_text_merged = self.processor.batch_decode(
+                                                generated_text_merged = self.model_zoo.get('processor').batch_decode(
                                                    generated_ids_merged, skip_special_tokens=True)
                                                
                                                extracted_texts = extracted_texts + generated_text_merged
@ -247,10 +241,10 @@ class Eynollah_ocr:
                                                cropped_lines = []
                                                indexer_b_s = 0
                                                
-                                                pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                                pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
                                                generated_ids_merged = self.model_ocr.generate(
                                                    pixel_values_merged.to(self.device))
-                                                generated_text_merged = self.processor.batch_decode(
+                                                generated_text_merged = self.model_zoo.get('processor').batch_decode(
                                                    generated_ids_merged, skip_special_tokens=True)
                                                
                                                extracted_texts = extracted_texts + generated_text_merged
@ -265,10 +259,10 @@ class Eynollah_ocr:
                                                cropped_lines = []
                                                indexer_b_s = 0
                                                
-                                                pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                                                pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
                                                generated_ids_merged = self.model_ocr.generate(
                                                    pixel_values_merged.to(self.device))
-                                                generated_text_merged = self.processor.batch_decode(
+                                                generated_text_merged = self.model_zoo.get('processor').batch_decode(
                                                    generated_ids_merged, skip_special_tokens=True)
                                                
                                                extracted_texts = extracted_texts + generated_text_merged
@ -282,9 +276,9 @@ class Eynollah_ocr:
                    cropped_lines = []
                    indexer_b_s = 0
                    
-                    pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                    pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
                    generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
-                    generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                    generated_text_merged = self.model_zoo.get('processor').batch_decode(generated_ids_merged, skip_special_tokens=True)
                    
                    extracted_texts = extracted_texts + generated_text_merged
                    
@ -299,10 +293,10 @@ class Eynollah_ocr:
                        ####n_start = i*self.b_s
                        ####n_end = (i+1)*self.b_s
                        ####imgs = cropped_lines[n_start:n_end]
-                    ####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
+                    ####pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
                    ####generated_ids_merged = self.model_ocr.generate(
                    ####    pixel_values_merged.to(self.device))
-                    ####generated_text_merged = self.processor.batch_decode(
+                    ####generated_text_merged = self.model_zoo.get('processor').batch_decode(
                    ####    generated_ids_merged, skip_special_tokens=True)
                    
                    ####extracted_texts = extracted_texts + generated_text_merged
--- a/src/eynollah/model_zoo.py
+++ b/src/eynollah/model_zoo.py
@ -11,7 +11,7 @@ from transformers import TrOCRProcessor, VisionEncoderDecoderModel

 from eynollah.patch_encoder import PatchEncoder, Patches

-SomeEynollahModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, Model]
+SomeEynollahModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, Model, List]


 # Dict mapping model_category to dict mapping variant (default is '') to Path
@ -114,14 +114,19 @@ DEFAULT_MODEL_VERSIONS: Dict[str, Dict[str, str]] = {
        '': "model_eynollah_ocr_cnnrnn_20250930",
    },

-    'ocr_tr_processor': {
+    'trocr_processor': {
        '': 'microsoft/trocr-base-printed',
        'htr': "microsoft/trocr-base-handwritten",
    },

    'num_to_char': {
-        '': 'model_eynollah_ocr_cnnrnn_20250930/characters_org.txt'
+        '': 'characters_org.txt'
    },
+
+    'characters': {
+        '': 'characters_org.txt'
+    },
+
 }


@ -142,7 +147,7 @@ class EynollahModelZoo():
        self.model_versions = deepcopy(DEFAULT_MODEL_VERSIONS)
        if model_overrides:
            self.override_models(*model_overrides)
-        self._loaded: Dict[Tuple[str, str], SomeEynollahModel] = {}
+        self._loaded: Dict[str, SomeEynollahModel] = {}

    def override_models(self, *model_overrides: Tuple[str, str, str]):
        """
@ -216,7 +221,9 @@ class EynollahModelZoo():
            model = self._load_ocr_model(variant=model_variant)
        elif model_category == 'num_to_char':
            model = self._load_num_to_char()
-        elif model_category == 'tr_processor':
+        elif model_category == 'characters':
+            model = self._load_characters()
+        elif model_category == 'trocr_processor':
            return TrOCRProcessor.from_pretrained(self.model_path(...))
        else:
            try:
@ -225,14 +232,13 @@ class EynollahModelZoo():
                self.logger.exception(e)
                model = load_model(model_path, compile=False, custom_objects={
                    "PatchEncoder": PatchEncoder, "Patches": Patches})
-        self._loaded[(model_category, model_variant)] = model
+        self._loaded[model_category] = model
        return model # type: ignore

-    def get_model(self, model_categeory, model_variant) -> SomeEynollahModel:
-        needle = (model_categeory, model_variant)
-        if needle not in self._loaded:
-            raise ValueError('Model/variant "{needle} not previously loaded with "load_model(..)"')
-        return self._loaded[needle]
+    def get(self, model_category) -> SomeEynollahModel:
+        if model_category not in self._loaded:
+            raise ValueError(f'Model "{model_category} not previously loaded with "load_model(..)"')
+        return self._loaded[model_category]

    def _load_ocr_model(self, variant: str) -> SomeEynollahModel:
        """
@ -247,15 +253,21 @@ class EynollahModelZoo():
            return Model(
                ocr_model.get_layer(name = "image").input,    # type: ignore
                ocr_model.get_layer(name = "dense2").output)  # type: ignore
+
+    def _load_characters(self) -> List[str]:
+        """
+        Load encoding for OCR
+        """
+        with open(self.model_path('ocr') / self.model_path('num_to_char', absolute=False), "r") as config_file:
+            return json.load(config_file)
                
-    def _load_num_to_char(self):
+    def _load_num_to_char(self) -> StringLookup:
        """
        Load decoder for OCR
        """
-        with open(self.model_path('ocr') / self.model_path('ocr', 'num_to_char', absolute=False), "r") as config_file:
-            characters = json.load(config_file)
+        characters = self._load_characters()
        # Mapping characters to integers.
-        char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
+        char_to_num = StringLookup(vocabulary=characters, mask_token=None)
        # Mapping integers back to original characters.
        return StringLookup(
            vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
@ -393,7 +393,12 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
    z = gaussian_filter1d(regions_without_separators_0, sigma_)
    return np.std(z)

-def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
+def find_num_col(
+    regions_without_separators,
+    num_col_classifier,
+    tables,
+    multiplier=3.8,
+):
    if not regions_without_separators.any():
        return 0, []
    #plt.imshow(regions_without_separators)