Introduce model_zoo to Eynollah_ocr

This commit is contained in:
kba 2025-10-20 21:14:52 +02:00
parent d609a532bf
commit 062f317d2e
4 changed files with 149 additions and 138 deletions

View file

@ -271,12 +271,12 @@ class Eynollah:
if self.ocr: if self.ocr:
if self.tr: if self.tr:
loadable.append(('ocr', 'tr')) loadable.append(('ocr', 'tr'))
loadable.append(('ocr_tr_processor', 'tr')) loadable.append(('trocr_processor', 'tr'))
else: else:
loadable.append('ocr') loadable.append('ocr')
loadable.append('num_to_char') loadable.append('num_to_char')
self.models = self.model_zoo.load_models(*loadable) self.model_zoo.load_models(*loadable)
def __del__(self): def __del__(self):
if hasattr(self, 'executor') and getattr(self, 'executor'): if hasattr(self, 'executor') and getattr(self, 'executor'):
@ -338,8 +338,8 @@ class Eynollah:
def predict_enhancement(self, img): def predict_enhancement(self, img):
self.logger.debug("enter predict_enhancement") self.logger.debug("enter predict_enhancement")
img_height_model = self.models["enhancement"].layers[-1].output_shape[1] img_height_model = self.model_zoo.get("enhancement").layers[-1].output_shape[1]
img_width_model = self.models["enhancement"].layers[-1].output_shape[2] img_width_model = self.model_zoo.get("enhancement").layers[-1].output_shape[2]
if img.shape[0] < img_height_model: if img.shape[0] < img_height_model:
img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
if img.shape[1] < img_width_model: if img.shape[1] < img_width_model:
@ -380,7 +380,7 @@ class Eynollah:
index_y_d = img_h - img_height_model index_y_d = img_h - img_height_model
img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
label_p_pred = self.models["enhancement"].predict(img_patch, verbose=0) label_p_pred = self.model_zoo.get("enhancement").predict(img_patch, verbose=0)
seg = label_p_pred[0, :, :, :] * 255 seg = label_p_pred[0, :, :, :] * 255
if i == 0 and j == 0: if i == 0 and j == 0:
@ -555,7 +555,7 @@ class Eynollah:
img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :]
label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0) label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
num_col = np.argmax(label_p_pred[0]) + 1 num_col = np.argmax(label_p_pred[0]) + 1
self.logger.info("Found %s columns (%s)", num_col, label_p_pred) self.logger.info("Found %s columns (%s)", num_col, label_p_pred)
@ -573,7 +573,7 @@ class Eynollah:
self.logger.info("Detected %s DPI", dpi) self.logger.info("Detected %s DPI", dpi)
if self.input_binary: if self.input_binary:
img = self.imread() img = self.imread()
prediction_bin = self.do_prediction(True, img, self.models["binarization"], n_batch_inference=5) prediction_bin = self.do_prediction(True, img, self.model_zoo.get("binarization"), n_batch_inference=5)
prediction_bin = 255 * (prediction_bin[:,:,0] == 0) prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
img= np.copy(prediction_bin) img= np.copy(prediction_bin)
@ -613,7 +613,7 @@ class Eynollah:
img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :]
label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0) label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
num_col = np.argmax(label_p_pred[0]) + 1 num_col = np.argmax(label_p_pred[0]) + 1
elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
@ -634,7 +634,7 @@ class Eynollah:
img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :]
label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0) label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
num_col = np.argmax(label_p_pred[0]) + 1 num_col = np.argmax(label_p_pred[0]) + 1
if num_col > self.num_col_upper: if num_col > self.num_col_upper:
@ -1486,7 +1486,7 @@ class Eynollah:
cont_page = [] cont_page = []
if not self.ignore_page_extraction: if not self.ignore_page_extraction:
img = np.copy(self.image)#cv2.GaussianBlur(self.image, (5, 5), 0) img = np.copy(self.image)#cv2.GaussianBlur(self.image, (5, 5), 0)
img_page_prediction = self.do_prediction(False, img, self.models["page"]) img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0) _, thresh = cv2.threshold(imgray, 0, 255, 0)
##thresh = cv2.dilate(thresh, KERNEL, iterations=3) ##thresh = cv2.dilate(thresh, KERNEL, iterations=3)
@ -1534,7 +1534,7 @@ class Eynollah:
else: else:
img = self.imread() img = self.imread()
img = cv2.GaussianBlur(img, (5, 5), 0) img = cv2.GaussianBlur(img, (5, 5), 0)
img_page_prediction = self.do_prediction(False, img, self.models["page"]) img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0) _, thresh = cv2.threshold(imgray, 0, 255, 0)
@ -1560,7 +1560,7 @@ class Eynollah:
self.logger.debug("enter extract_text_regions") self.logger.debug("enter extract_text_regions")
img_height_h = img.shape[0] img_height_h = img.shape[0]
img_width_h = img.shape[1] img_width_h = img.shape[1]
model_region = self.models["region_fl"] if patches else self.models["region_fl_np"] model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np")
if self.light_version: if self.light_version:
thresholding_for_fl_light_version = True thresholding_for_fl_light_version = True
@ -1595,7 +1595,7 @@ class Eynollah:
self.logger.debug("enter extract_text_regions") self.logger.debug("enter extract_text_regions")
img_height_h = img.shape[0] img_height_h = img.shape[0]
img_width_h = img.shape[1] img_width_h = img.shape[1]
model_region = self.models["region_fl"] if patches else self.models["region_fl_np"] model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np")
if not patches: if not patches:
img = otsu_copy_binary(img) img = otsu_copy_binary(img)
@ -1816,14 +1816,14 @@ class Eynollah:
img_w = img_org.shape[1] img_w = img_org.shape[1]
img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w))
prediction_textline = self.do_prediction(use_patches, img, self.models["textline"], prediction_textline = self.do_prediction(use_patches, img, self.model_zoo.get("textline"),
marginal_of_patch_percent=0.15, marginal_of_patch_percent=0.15,
n_batch_inference=3, n_batch_inference=3,
thresholding_for_artificial_class_in_light_version=self.textline_light, thresholding_for_artificial_class_in_light_version=self.textline_light,
threshold_art_class_textline=self.threshold_art_class_textline) threshold_art_class_textline=self.threshold_art_class_textline)
#if not self.textline_light: #if not self.textline_light:
#if num_col_classifier==1: #if num_col_classifier==1:
#prediction_textline_nopatch = self.do_prediction(False, img, self.models["textline"]) #prediction_textline_nopatch = self.do_prediction(False, img, self.model_zoo.get_model("textline"))
#prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0
prediction_textline = resize_image(prediction_textline, img_h, img_w) prediction_textline = resize_image(prediction_textline, img_h, img_w)
@ -1894,7 +1894,7 @@ class Eynollah:
#cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0]) #cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0])
prediction_textline_longshot = self.do_prediction(False, img, self.models["textline"]) prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
@ -1927,7 +1927,7 @@ class Eynollah:
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
img_resized = resize_image(img,img_h_new, img_w_new ) img_resized = resize_image(img,img_h_new, img_w_new )
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.models["region"]) prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_zoo.get("region"))
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
image_page, page_coord, cont_page = self.extract_page() image_page, page_coord, cont_page = self.extract_page()
@ -2043,7 +2043,7 @@ class Eynollah:
#if self.input_binary: #if self.input_binary:
#img_bin = np.copy(img_resized) #img_bin = np.copy(img_resized)
###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
###prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5) ###prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get_model("binarization"), n_batch_inference=5)
####print("inside bin ", time.time()-t_bin) ####print("inside bin ", time.time()-t_bin)
###prediction_bin=prediction_bin[:,:,0] ###prediction_bin=prediction_bin[:,:,0]
@ -2058,7 +2058,7 @@ class Eynollah:
###else: ###else:
###img_bin = np.copy(img_resized) ###img_bin = np.copy(img_resized)
if (self.ocr and self.tr) and not self.input_binary: if (self.ocr and self.tr) and not self.input_binary:
prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5) prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get("binarization"), n_batch_inference=5)
prediction_bin = 255 * (prediction_bin[:,:,0] == 0) prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
prediction_bin = prediction_bin.astype(np.uint16) prediction_bin = prediction_bin.astype(np.uint16)
@ -2090,14 +2090,14 @@ class Eynollah:
self.logger.debug("resized to %dx%d for %d cols", self.logger.debug("resized to %dx%d for %d cols",
img_resized.shape[1], img_resized.shape[0], num_col_classifier) img_resized.shape[1], img_resized.shape[0], num_col_classifier)
prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
True, img_resized, self.models["region_1_2"], n_batch_inference=1, True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=1,
thresholding_for_some_classes_in_light_version=True, thresholding_for_some_classes_in_light_version=True,
threshold_art_class_layout=self.threshold_art_class_layout) threshold_art_class_layout=self.threshold_art_class_layout)
else: else:
prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1])) confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept( prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
False, self.image_page_org_size, self.models["region_1_2"], n_batch_inference=1, False, self.image_page_org_size, self.model_zoo.get("region_1_2"), n_batch_inference=1,
thresholding_for_artificial_class_in_light_version=True, thresholding_for_artificial_class_in_light_version=True,
threshold_art_class_layout=self.threshold_art_class_layout) threshold_art_class_layout=self.threshold_art_class_layout)
ys = slice(*self.page_coord[0:2]) ys = slice(*self.page_coord[0:2])
@ -2111,10 +2111,10 @@ class Eynollah:
self.logger.debug("resized to %dx%d (new_h=%d) for %d cols", self.logger.debug("resized to %dx%d (new_h=%d) for %d cols",
img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier)
prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
True, img_resized, self.models["region_1_2"], n_batch_inference=2, True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=2,
thresholding_for_some_classes_in_light_version=True, thresholding_for_some_classes_in_light_version=True,
threshold_art_class_layout=self.threshold_art_class_layout) threshold_art_class_layout=self.threshold_art_class_layout)
###prediction_regions_org = self.do_prediction(True, img_bin, self.models["region"], ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_zoo.get_model("region"),
###n_batch_inference=3, ###n_batch_inference=3,
###thresholding_for_some_classes_in_light_version=True) ###thresholding_for_some_classes_in_light_version=True)
#print("inside 3 ", time.time()-t_in) #print("inside 3 ", time.time()-t_in)
@ -2194,7 +2194,7 @@ class Eynollah:
ratio_x=1 ratio_x=1
img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
prediction_regions_org_y = self.do_prediction(True, img, self.models["region"]) prediction_regions_org_y = self.do_prediction(True, img, self.model_zoo.get("region"))
prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h )
#plt.imshow(prediction_regions_org_y[:,:,0]) #plt.imshow(prediction_regions_org_y[:,:,0])
@ -2209,7 +2209,7 @@ class Eynollah:
_, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
prediction_regions_org = self.do_prediction(True, img, self.models["region"]) prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
@ -2217,7 +2217,7 @@ class Eynollah:
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))
prediction_regions_org2 = self.do_prediction(True, img, self.models["region_p2"], marginal_of_patch_percent=0.2) prediction_regions_org2 = self.do_prediction(True, img, self.model_zoo.get("region_p2"), marginal_of_patch_percent=0.2)
prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )
mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) mask_zeros2 = (prediction_regions_org2[:,:,0] == 0)
@ -2241,7 +2241,7 @@ class Eynollah:
if self.input_binary: if self.input_binary:
prediction_bin = np.copy(img_org) prediction_bin = np.copy(img_org)
else: else:
prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5) prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = 255 * (prediction_bin[:,:,0]==0)
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
@ -2251,7 +2251,7 @@ class Eynollah:
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
prediction_regions_org = self.do_prediction(True, img, self.models["region"]) prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
@ -2278,7 +2278,7 @@ class Eynollah:
except: except:
if self.input_binary: if self.input_binary:
prediction_bin = np.copy(img_org) prediction_bin = np.copy(img_org)
prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5) prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = 255 * (prediction_bin[:,:,0]==0)
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
@ -2289,14 +2289,14 @@ class Eynollah:
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
prediction_regions_org = self.do_prediction(True, img, self.models["region"]) prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
#mask_lines_only=(prediction_regions_org[:,:]==3)*1 #mask_lines_only=(prediction_regions_org[:,:]==3)*1
#img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
#prediction_regions_org = self.do_prediction(True, img, self.models["region"]) #prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get_model("region"))
#prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
#prediction_regions_org = prediction_regions_org[:,:,0] #prediction_regions_org = prediction_regions_org[:,:,0]
#prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
@ -2667,13 +2667,13 @@ class Eynollah:
img_width_h = img_org.shape[1] img_width_h = img_org.shape[1]
patches = False patches = False
if self.light_version: if self.light_version:
prediction_table, _ = self.do_prediction_new_concept(patches, img, self.models["table"]) prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_zoo.get("table"))
prediction_table = prediction_table.astype(np.int16) prediction_table = prediction_table.astype(np.int16)
return prediction_table[:,:,0] return prediction_table[:,:,0]
else: else:
if num_col_classifier < 4 and num_col_classifier > 2: if num_col_classifier < 4 and num_col_classifier > 2:
prediction_table = self.do_prediction(patches, img, self.models["table"]) prediction_table = self.do_prediction(patches, img, self.model_zoo.get("table"))
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"]) pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_zoo.get("table"))
pre_updown = cv2.flip(pre_updown, -1) pre_updown = cv2.flip(pre_updown, -1)
prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 prediction_table[:,:,0][pre_updown[:,:,0]==1]=1
@ -2692,8 +2692,8 @@ class Eynollah:
xs = slice(w_start, w_start + img.shape[1]) xs = slice(w_start, w_start + img.shape[1])
img_new[ys, xs] = img img_new[ys, xs] = img
prediction_ext = self.do_prediction(patches, img_new, self.models["table"]) prediction_ext = self.do_prediction(patches, img_new, self.model_zoo.get("table"))
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"]) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_zoo.get("table"))
pre_updown = cv2.flip(pre_updown, -1) pre_updown = cv2.flip(pre_updown, -1)
prediction_table = prediction_ext[ys, xs] prediction_table = prediction_ext[ys, xs]
@ -2714,8 +2714,8 @@ class Eynollah:
xs = slice(w_start, w_start + img.shape[1]) xs = slice(w_start, w_start + img.shape[1])
img_new[ys, xs] = img img_new[ys, xs] = img
prediction_ext = self.do_prediction(patches, img_new, self.models["table"]) prediction_ext = self.do_prediction(patches, img_new, self.model_zoo.get("table"))
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"]) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_zoo.get("table"))
pre_updown = cv2.flip(pre_updown, -1) pre_updown = cv2.flip(pre_updown, -1)
prediction_table = prediction_ext[ys, xs] prediction_table = prediction_ext[ys, xs]
@ -2727,10 +2727,10 @@ class Eynollah:
prediction_table = np.zeros(img.shape) prediction_table = np.zeros(img.shape)
img_w_half = img.shape[1] // 2 img_w_half = img.shape[1] // 2
pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.models["table"]) pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_zoo.get("table"))
pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.models["table"]) pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_zoo.get("table"))
pre_full = self.do_prediction(patches, img[:,:,:], self.models["table"]) pre_full = self.do_prediction(patches, img[:,:,:], self.model_zoo.get("table"))
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"]) pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_zoo.get("table"))
pre_updown = cv2.flip(pre_updown, -1) pre_updown = cv2.flip(pre_updown, -1)
prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4)
@ -3522,7 +3522,7 @@ class Eynollah:
tot_counter += 1 tot_counter += 1
batch.append(j) batch.append(j)
if tot_counter % inference_bs == 0 or tot_counter == len(ij_list): if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
y_pr = self.models["reading_order"].predict(input_1 , verbose=0) y_pr = self.model_zoo.get("reading_order").predict(input_1 , verbose=0)
for jb, j in enumerate(batch): for jb, j in enumerate(batch):
if y_pr[jb][0]>=0.5: if y_pr[jb][0]>=0.5:
post_list.append(j) post_list.append(j)
@ -4105,7 +4105,7 @@ class Eynollah:
gc.collect() gc.collect()
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)), image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)),
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], textline_light=True) self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), textline_light=True)
else: else:
ocr_all_textlines = None ocr_all_textlines = None
@ -4614,27 +4614,27 @@ class Eynollah:
if len(all_found_textline_polygons): if len(all_found_textline_polygons):
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
image_page, all_found_textline_polygons, all_box_coord, image_page, all_found_textline_polygons, all_box_coord,
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line) self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
if len(all_found_textline_polygons_marginals_left): if len(all_found_textline_polygons_marginals_left):
ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(
image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left, image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left,
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line) self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
if len(all_found_textline_polygons_marginals_right): if len(all_found_textline_polygons_marginals_right):
ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(
image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right, image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right,
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line) self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
if self.full_layout and len(all_found_textline_polygons): if self.full_layout and len(all_found_textline_polygons):
ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines( ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(
image_page, all_found_textline_polygons_h, all_box_coord_h, image_page, all_found_textline_polygons_h, all_box_coord_h,
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line) self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
if self.full_layout and len(polygons_of_drop_capitals): if self.full_layout and len(polygons_of_drop_capitals):
ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines( ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(
image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)), image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)),
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line) self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
else: else:
if self.light_version: if self.light_version:
@ -4646,7 +4646,7 @@ class Eynollah:
gc.collect() gc.collect()
torch.cuda.empty_cache() torch.cuda.empty_cache()
self.models["ocr"].to(self.device) self.model_zoo.get("ocr").to(self.device)
ind_tot = 0 ind_tot = 0
#cv2.imwrite('./img_out.png', image_page) #cv2.imwrite('./img_out.png', image_page)
@ -4683,7 +4683,7 @@ class Eynollah:
img_croped = img_poly_on_img[y:y+h, x:x+w, :] img_croped = img_poly_on_img[y:y+h, x:x+w, :]
#cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped)
text_ocr = self.return_ocr_of_textline_without_common_section( text_ocr = self.return_ocr_of_textline_without_common_section(
img_croped, self.models["ocr"], self.models['ocr_tr_processor'], self.device, w, h2w_ratio, ind_tot) img_croped, self.model_zoo.get("ocr"), self.model_zoo.get("trocr_processor"), self.device, w, h2w_ratio, ind_tot)
ocr_textline_in_textregion.append(text_ocr) ocr_textline_in_textregion.append(text_ocr)
ind_tot = ind_tot +1 ind_tot = ind_tot +1
ocr_all_textlines.append(ocr_textline_in_textregion) ocr_all_textlines.append(ocr_textline_in_textregion)

View file

@ -1,6 +1,6 @@
# pyright: reportPossiblyUnboundVariable=false # pyright: reportPossiblyUnboundVariable=false
from logging import getLogger from logging import Logger, getLogger
from typing import Optional from typing import Optional
from pathlib import Path from pathlib import Path
import os import os
@ -8,23 +8,31 @@ import json
import gc import gc
import sys import sys
import math import math
import cv2
import time import time
from keras.layers import StringLookup from keras.layers import StringLookup
import cv2
from eynollah.utils.resize import resize_image
from eynollah.utils.utils_ocr import break_curved_line_into_small_pieces_and_then_merge, decode_batch_predictions, fit_text_single_line, get_contours_and_bounding_boxes, get_orientation_moments, preprocess_and_resize_image_for_ocrcnn_model, return_textlines_split_if_needed, rotate_image_with_padding
from .utils import is_image_filename
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import tensorflow as tf import tensorflow as tf
from keras.models import load_model from keras.models import load_model
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
import numpy as np import numpy as np
from eynollah.model_zoo import EynollahModelZoo
import torch import torch
from .utils import is_image_filename
from .utils.resize import resize_image
from .utils.utils_ocr import (
break_curved_line_into_small_pieces_and_then_merge,
decode_batch_predictions,
fit_text_single_line,
get_contours_and_bounding_boxes,
get_orientation_moments,
preprocess_and_resize_image_for_ocrcnn_model,
return_textlines_split_if_needed,
rotate_image_with_padding,
)
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files # cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
if sys.version_info < (3, 10): if sys.version_info < (3, 10):
import importlib_resources import importlib_resources
@ -43,68 +51,51 @@ class Eynollah_ocr:
model_name=None, model_name=None,
dir_xmls=None, dir_xmls=None,
tr_ocr=False, tr_ocr=False,
batch_size=None, batch_size: Optional[int]=None,
export_textline_images_and_text=False, export_textline_images_and_text: bool=False,
do_not_mask_with_textline_contour=False, do_not_mask_with_textline_contour: bool=False,
pref_of_dataset=None, pref_of_dataset=None,
min_conf_value_of_textline_text : Optional[float]=None, min_conf_value_of_textline_text : float=0.3,
logger=None, logger: Optional[Logger]=None,
): ):
self.model_name = model_name
self.tr_ocr = tr_ocr self.tr_ocr = tr_ocr
self.export_textline_images_and_text = export_textline_images_and_text self.export_textline_images_and_text = export_textline_images_and_text
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
self.pref_of_dataset = pref_of_dataset self.pref_of_dataset = pref_of_dataset
self.logger = logger if logger else getLogger('eynollah') self.logger = logger if logger else getLogger('eynollah')
self.model_zoo = EynollahModelZoo(basedir=dir_models)
if not export_textline_images_and_text: # TODO: Properly document what 'export_textline_images_and_text' is about
if min_conf_value_of_textline_text: if export_textline_images_and_text:
self.min_conf_value_of_textline_text = float(min_conf_value_of_textline_text) self.logger.info("export_textline_images_and_text was set, so no actual models are loaded")
return
self.min_conf_value_of_textline_text = min_conf_value_of_textline_text
self.b_s = 2 if batch_size is None and tr_ocr else 8 if batch_size is None else batch_size
if tr_ocr:
self.model_zoo.load_model('trocr_processor', '')
if model_name:
self.model_zoo.load_model('ocr', 'tr', model_name)
else: else:
self.min_conf_value_of_textline_text = 0.3 self.model_zoo.load_model('ocr', 'tr')
if tr_ocr: self.model_zoo.get('ocr').to(self.device)
assert TrOCRProcessor else:
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") if model_name:
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.model_zoo.load_model('ocr', '', model_name)
if self.model_name:
self.model_ocr_dir = self.model_name
else:
self.model_ocr_dir = dir_models + "/model_eynollah_ocr_trocr_20250919"
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
self.model_ocr.to(self.device)
if not batch_size:
self.b_s = 2
else:
self.b_s = int(batch_size)
else: else:
if self.model_name: self.model_zoo.load_model('ocr', '')
self.model_ocr_dir = self.model_name self.model_zoo.load_model('num_to_char')
else: self.end_character = len(self.model_zoo.load_model('characters')) + 2
self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930"
model_ocr = load_model(self.model_ocr_dir , compile=False)
self.prediction_model = tf.keras.models.Model(
model_ocr.get_layer(name = "image").input,
model_ocr.get_layer(name = "dense2").output)
if not batch_size:
self.b_s = 8
else:
self.b_s = int(batch_size)
with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
characters = json.load(config_file)
AUTOTUNE = tf.data.AUTOTUNE
# Mapping characters to integers. @property
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) def device(self):
if torch.cuda.is_available():
# Mapping integers back to original characters. self.logger.info("Using GPU acceleration")
self.num_to_char = StringLookup( return torch.device("cuda:0")
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True else:
) self.logger.info("Using CPU processing")
self.end_character = len(characters) + 2 return torch.device("cpu")
def run(self, overwrite: bool = False, def run(self, overwrite: bool = False,
dir_in: Optional[str] = None, dir_in: Optional[str] = None,
@ -119,13 +110,16 @@ class Eynollah_ocr:
for image_filename in filter(is_image_filename, for image_filename in filter(is_image_filename,
os.listdir(dir_in))] os.listdir(dir_in))]
else: else:
assert image_filename
ls_imgs = [image_filename] ls_imgs = [image_filename]
if self.tr_ocr: if self.tr_ocr:
tr_ocr_input_height_and_width = 384 tr_ocr_input_height_and_width = 384
for dir_img in ls_imgs: for dir_img in ls_imgs:
file_name = Path(dir_img).stem file_name = Path(dir_img).stem
assert dir_xmls # FIXME: check the logic
dir_xml = os.path.join(dir_xmls, file_name+'.xml') dir_xml = os.path.join(dir_xmls, file_name+'.xml')
assert dir_out # FIXME: check the logic
out_file_ocr = os.path.join(dir_out, file_name+'.xml') out_file_ocr = os.path.join(dir_out, file_name+'.xml')
if os.path.exists(out_file_ocr): if os.path.exists(out_file_ocr):
@ -204,10 +198,10 @@ class Eynollah_ocr:
cropped_lines = [] cropped_lines = []
indexer_b_s = 0 indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate( generated_ids_merged = self.model_ocr.generate(
pixel_values_merged.to(self.device)) pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode( generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True) generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged extracted_texts = extracted_texts + generated_text_merged
@ -227,10 +221,10 @@ class Eynollah_ocr:
cropped_lines = [] cropped_lines = []
indexer_b_s = 0 indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate( generated_ids_merged = self.model_ocr.generate(
pixel_values_merged.to(self.device)) pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode( generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True) generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged extracted_texts = extracted_texts + generated_text_merged
@ -247,10 +241,10 @@ class Eynollah_ocr:
cropped_lines = [] cropped_lines = []
indexer_b_s = 0 indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate( generated_ids_merged = self.model_ocr.generate(
pixel_values_merged.to(self.device)) pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode( generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True) generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged extracted_texts = extracted_texts + generated_text_merged
@ -265,10 +259,10 @@ class Eynollah_ocr:
cropped_lines = [] cropped_lines = []
indexer_b_s = 0 indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate( generated_ids_merged = self.model_ocr.generate(
pixel_values_merged.to(self.device)) pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode( generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True) generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged extracted_texts = extracted_texts + generated_text_merged
@ -282,9 +276,9 @@ class Eynollah_ocr:
cropped_lines = [] cropped_lines = []
indexer_b_s = 0 indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) generated_text_merged = self.model_zoo.get('processor').batch_decode(generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged extracted_texts = extracted_texts + generated_text_merged
@ -299,10 +293,10 @@ class Eynollah_ocr:
####n_start = i*self.b_s ####n_start = i*self.b_s
####n_end = (i+1)*self.b_s ####n_end = (i+1)*self.b_s
####imgs = cropped_lines[n_start:n_end] ####imgs = cropped_lines[n_start:n_end]
####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values ####pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
####generated_ids_merged = self.model_ocr.generate( ####generated_ids_merged = self.model_ocr.generate(
#### pixel_values_merged.to(self.device)) #### pixel_values_merged.to(self.device))
####generated_text_merged = self.processor.batch_decode( ####generated_text_merged = self.model_zoo.get('processor').batch_decode(
#### generated_ids_merged, skip_special_tokens=True) #### generated_ids_merged, skip_special_tokens=True)
####extracted_texts = extracted_texts + generated_text_merged ####extracted_texts = extracted_texts + generated_text_merged

View file

@ -11,7 +11,7 @@ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from eynollah.patch_encoder import PatchEncoder, Patches from eynollah.patch_encoder import PatchEncoder, Patches
SomeEynollahModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, Model] SomeEynollahModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, Model, List]
# Dict mapping model_category to dict mapping variant (default is '') to Path # Dict mapping model_category to dict mapping variant (default is '') to Path
@ -114,14 +114,19 @@ DEFAULT_MODEL_VERSIONS: Dict[str, Dict[str, str]] = {
'': "model_eynollah_ocr_cnnrnn_20250930", '': "model_eynollah_ocr_cnnrnn_20250930",
}, },
'ocr_tr_processor': { 'trocr_processor': {
'': 'microsoft/trocr-base-printed', '': 'microsoft/trocr-base-printed',
'htr': "microsoft/trocr-base-handwritten", 'htr': "microsoft/trocr-base-handwritten",
}, },
'num_to_char': { 'num_to_char': {
'': 'model_eynollah_ocr_cnnrnn_20250930/characters_org.txt' '': 'characters_org.txt'
}, },
'characters': {
'': 'characters_org.txt'
},
} }
@ -142,7 +147,7 @@ class EynollahModelZoo():
self.model_versions = deepcopy(DEFAULT_MODEL_VERSIONS) self.model_versions = deepcopy(DEFAULT_MODEL_VERSIONS)
if model_overrides: if model_overrides:
self.override_models(*model_overrides) self.override_models(*model_overrides)
self._loaded: Dict[Tuple[str, str], SomeEynollahModel] = {} self._loaded: Dict[str, SomeEynollahModel] = {}
def override_models(self, *model_overrides: Tuple[str, str, str]): def override_models(self, *model_overrides: Tuple[str, str, str]):
""" """
@ -216,7 +221,9 @@ class EynollahModelZoo():
model = self._load_ocr_model(variant=model_variant) model = self._load_ocr_model(variant=model_variant)
elif model_category == 'num_to_char': elif model_category == 'num_to_char':
model = self._load_num_to_char() model = self._load_num_to_char()
elif model_category == 'tr_processor': elif model_category == 'characters':
model = self._load_characters()
elif model_category == 'trocr_processor':
return TrOCRProcessor.from_pretrained(self.model_path(...)) return TrOCRProcessor.from_pretrained(self.model_path(...))
else: else:
try: try:
@ -225,14 +232,13 @@ class EynollahModelZoo():
self.logger.exception(e) self.logger.exception(e)
model = load_model(model_path, compile=False, custom_objects={ model = load_model(model_path, compile=False, custom_objects={
"PatchEncoder": PatchEncoder, "Patches": Patches}) "PatchEncoder": PatchEncoder, "Patches": Patches})
self._loaded[(model_category, model_variant)] = model self._loaded[model_category] = model
return model # type: ignore return model # type: ignore
def get_model(self, model_categeory, model_variant) -> SomeEynollahModel: def get(self, model_category) -> SomeEynollahModel:
needle = (model_categeory, model_variant) if model_category not in self._loaded:
if needle not in self._loaded: raise ValueError(f'Model "{model_category} not previously loaded with "load_model(..)"')
raise ValueError('Model/variant "{needle} not previously loaded with "load_model(..)"') return self._loaded[model_category]
return self._loaded[needle]
def _load_ocr_model(self, variant: str) -> SomeEynollahModel: def _load_ocr_model(self, variant: str) -> SomeEynollahModel:
""" """
@ -247,15 +253,21 @@ class EynollahModelZoo():
return Model( return Model(
ocr_model.get_layer(name = "image").input, # type: ignore ocr_model.get_layer(name = "image").input, # type: ignore
ocr_model.get_layer(name = "dense2").output) # type: ignore ocr_model.get_layer(name = "dense2").output) # type: ignore
def _load_characters(self) -> List[str]:
"""
Load encoding for OCR
"""
with open(self.model_path('ocr') / self.model_path('num_to_char', absolute=False), "r") as config_file:
return json.load(config_file)
def _load_num_to_char(self): def _load_num_to_char(self) -> StringLookup:
""" """
Load decoder for OCR Load decoder for OCR
""" """
with open(self.model_path('ocr') / self.model_path('ocr', 'num_to_char', absolute=False), "r") as config_file: characters = self._load_characters()
characters = json.load(config_file)
# Mapping characters to integers. # Mapping characters to integers.
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) char_to_num = StringLookup(vocabulary=characters, mask_token=None)
# Mapping integers back to original characters. # Mapping integers back to original characters.
return StringLookup( return StringLookup(
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True

View file

@ -393,7 +393,12 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
z = gaussian_filter1d(regions_without_separators_0, sigma_) z = gaussian_filter1d(regions_without_separators_0, sigma_)
return np.std(z) return np.std(z)
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): def find_num_col(
regions_without_separators,
num_col_classifier,
tables,
multiplier=3.8,
):
if not regions_without_separators.any(): if not regions_without_separators.any():
return 0, [] return 0, []
#plt.imshow(regions_without_separators) #plt.imshow(regions_without_separators)