mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-26 23:34:13 +01:00
Introduce model_zoo to Eynollah_ocr
This commit is contained in:
parent
d609a532bf
commit
062f317d2e
4 changed files with 149 additions and 138 deletions
|
|
@ -271,12 +271,12 @@ class Eynollah:
|
|||
if self.ocr:
|
||||
if self.tr:
|
||||
loadable.append(('ocr', 'tr'))
|
||||
loadable.append(('ocr_tr_processor', 'tr'))
|
||||
loadable.append(('trocr_processor', 'tr'))
|
||||
else:
|
||||
loadable.append('ocr')
|
||||
loadable.append('num_to_char')
|
||||
|
||||
self.models = self.model_zoo.load_models(*loadable)
|
||||
self.model_zoo.load_models(*loadable)
|
||||
|
||||
def __del__(self):
|
||||
if hasattr(self, 'executor') and getattr(self, 'executor'):
|
||||
|
|
@ -338,8 +338,8 @@ class Eynollah:
|
|||
def predict_enhancement(self, img):
|
||||
self.logger.debug("enter predict_enhancement")
|
||||
|
||||
img_height_model = self.models["enhancement"].layers[-1].output_shape[1]
|
||||
img_width_model = self.models["enhancement"].layers[-1].output_shape[2]
|
||||
img_height_model = self.model_zoo.get("enhancement").layers[-1].output_shape[1]
|
||||
img_width_model = self.model_zoo.get("enhancement").layers[-1].output_shape[2]
|
||||
if img.shape[0] < img_height_model:
|
||||
img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
|
||||
if img.shape[1] < img_width_model:
|
||||
|
|
@ -380,7 +380,7 @@ class Eynollah:
|
|||
index_y_d = img_h - img_height_model
|
||||
|
||||
img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
|
||||
label_p_pred = self.models["enhancement"].predict(img_patch, verbose=0)
|
||||
label_p_pred = self.model_zoo.get("enhancement").predict(img_patch, verbose=0)
|
||||
seg = label_p_pred[0, :, :, :] * 255
|
||||
|
||||
if i == 0 and j == 0:
|
||||
|
|
@ -555,7 +555,7 @@ class Eynollah:
|
|||
img_in[0, :, :, 1] = img_1ch[:, :]
|
||||
img_in[0, :, :, 2] = img_1ch[:, :]
|
||||
|
||||
label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0)
|
||||
label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
|
||||
num_col = np.argmax(label_p_pred[0]) + 1
|
||||
|
||||
self.logger.info("Found %s columns (%s)", num_col, label_p_pred)
|
||||
|
|
@ -573,7 +573,7 @@ class Eynollah:
|
|||
self.logger.info("Detected %s DPI", dpi)
|
||||
if self.input_binary:
|
||||
img = self.imread()
|
||||
prediction_bin = self.do_prediction(True, img, self.models["binarization"], n_batch_inference=5)
|
||||
prediction_bin = self.do_prediction(True, img, self.model_zoo.get("binarization"), n_batch_inference=5)
|
||||
prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
|
||||
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
|
||||
img= np.copy(prediction_bin)
|
||||
|
|
@ -613,7 +613,7 @@ class Eynollah:
|
|||
img_in[0, :, :, 1] = img_1ch[:, :]
|
||||
img_in[0, :, :, 2] = img_1ch[:, :]
|
||||
|
||||
label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0)
|
||||
label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
|
||||
num_col = np.argmax(label_p_pred[0]) + 1
|
||||
|
||||
elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
|
||||
|
|
@ -634,7 +634,7 @@ class Eynollah:
|
|||
img_in[0, :, :, 1] = img_1ch[:, :]
|
||||
img_in[0, :, :, 2] = img_1ch[:, :]
|
||||
|
||||
label_p_pred = self.models["col_classifier"].predict(img_in, verbose=0)
|
||||
label_p_pred = self.model_zoo.get("col_classifier").predict(img_in, verbose=0)
|
||||
num_col = np.argmax(label_p_pred[0]) + 1
|
||||
|
||||
if num_col > self.num_col_upper:
|
||||
|
|
@ -1486,7 +1486,7 @@ class Eynollah:
|
|||
cont_page = []
|
||||
if not self.ignore_page_extraction:
|
||||
img = np.copy(self.image)#cv2.GaussianBlur(self.image, (5, 5), 0)
|
||||
img_page_prediction = self.do_prediction(False, img, self.models["page"])
|
||||
img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
|
||||
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
##thresh = cv2.dilate(thresh, KERNEL, iterations=3)
|
||||
|
|
@ -1534,7 +1534,7 @@ class Eynollah:
|
|||
else:
|
||||
img = self.imread()
|
||||
img = cv2.GaussianBlur(img, (5, 5), 0)
|
||||
img_page_prediction = self.do_prediction(False, img, self.models["page"])
|
||||
img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
|
||||
|
||||
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
|
@ -1560,7 +1560,7 @@ class Eynollah:
|
|||
self.logger.debug("enter extract_text_regions")
|
||||
img_height_h = img.shape[0]
|
||||
img_width_h = img.shape[1]
|
||||
model_region = self.models["region_fl"] if patches else self.models["region_fl_np"]
|
||||
model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np")
|
||||
|
||||
if self.light_version:
|
||||
thresholding_for_fl_light_version = True
|
||||
|
|
@ -1595,7 +1595,7 @@ class Eynollah:
|
|||
self.logger.debug("enter extract_text_regions")
|
||||
img_height_h = img.shape[0]
|
||||
img_width_h = img.shape[1]
|
||||
model_region = self.models["region_fl"] if patches else self.models["region_fl_np"]
|
||||
model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np")
|
||||
|
||||
if not patches:
|
||||
img = otsu_copy_binary(img)
|
||||
|
|
@ -1816,14 +1816,14 @@ class Eynollah:
|
|||
img_w = img_org.shape[1]
|
||||
img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w))
|
||||
|
||||
prediction_textline = self.do_prediction(use_patches, img, self.models["textline"],
|
||||
prediction_textline = self.do_prediction(use_patches, img, self.model_zoo.get("textline"),
|
||||
marginal_of_patch_percent=0.15,
|
||||
n_batch_inference=3,
|
||||
thresholding_for_artificial_class_in_light_version=self.textline_light,
|
||||
threshold_art_class_textline=self.threshold_art_class_textline)
|
||||
#if not self.textline_light:
|
||||
#if num_col_classifier==1:
|
||||
#prediction_textline_nopatch = self.do_prediction(False, img, self.models["textline"])
|
||||
#prediction_textline_nopatch = self.do_prediction(False, img, self.model_zoo.get_model("textline"))
|
||||
#prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0
|
||||
|
||||
prediction_textline = resize_image(prediction_textline, img_h, img_w)
|
||||
|
|
@ -1894,7 +1894,7 @@ class Eynollah:
|
|||
|
||||
#cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0])
|
||||
|
||||
prediction_textline_longshot = self.do_prediction(False, img, self.models["textline"])
|
||||
prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
|
||||
prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
|
||||
|
||||
|
||||
|
|
@ -1927,7 +1927,7 @@ class Eynollah:
|
|||
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
|
||||
img_resized = resize_image(img,img_h_new, img_w_new )
|
||||
|
||||
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.models["region"])
|
||||
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_zoo.get("region"))
|
||||
|
||||
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
|
||||
image_page, page_coord, cont_page = self.extract_page()
|
||||
|
|
@ -2043,7 +2043,7 @@ class Eynollah:
|
|||
#if self.input_binary:
|
||||
#img_bin = np.copy(img_resized)
|
||||
###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
|
||||
###prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5)
|
||||
###prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get_model("binarization"), n_batch_inference=5)
|
||||
|
||||
####print("inside bin ", time.time()-t_bin)
|
||||
###prediction_bin=prediction_bin[:,:,0]
|
||||
|
|
@ -2058,7 +2058,7 @@ class Eynollah:
|
|||
###else:
|
||||
###img_bin = np.copy(img_resized)
|
||||
if (self.ocr and self.tr) and not self.input_binary:
|
||||
prediction_bin = self.do_prediction(True, img_resized, self.models["binarization"], n_batch_inference=5)
|
||||
prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get("binarization"), n_batch_inference=5)
|
||||
prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
|
||||
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||
prediction_bin = prediction_bin.astype(np.uint16)
|
||||
|
|
@ -2090,14 +2090,14 @@ class Eynollah:
|
|||
self.logger.debug("resized to %dx%d for %d cols",
|
||||
img_resized.shape[1], img_resized.shape[0], num_col_classifier)
|
||||
prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
|
||||
True, img_resized, self.models["region_1_2"], n_batch_inference=1,
|
||||
True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=1,
|
||||
thresholding_for_some_classes_in_light_version=True,
|
||||
threshold_art_class_layout=self.threshold_art_class_layout)
|
||||
else:
|
||||
prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
|
||||
confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
|
||||
prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
|
||||
False, self.image_page_org_size, self.models["region_1_2"], n_batch_inference=1,
|
||||
False, self.image_page_org_size, self.model_zoo.get("region_1_2"), n_batch_inference=1,
|
||||
thresholding_for_artificial_class_in_light_version=True,
|
||||
threshold_art_class_layout=self.threshold_art_class_layout)
|
||||
ys = slice(*self.page_coord[0:2])
|
||||
|
|
@ -2111,10 +2111,10 @@ class Eynollah:
|
|||
self.logger.debug("resized to %dx%d (new_h=%d) for %d cols",
|
||||
img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier)
|
||||
prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
|
||||
True, img_resized, self.models["region_1_2"], n_batch_inference=2,
|
||||
True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=2,
|
||||
thresholding_for_some_classes_in_light_version=True,
|
||||
threshold_art_class_layout=self.threshold_art_class_layout)
|
||||
###prediction_regions_org = self.do_prediction(True, img_bin, self.models["region"],
|
||||
###prediction_regions_org = self.do_prediction(True, img_bin, self.model_zoo.get_model("region"),
|
||||
###n_batch_inference=3,
|
||||
###thresholding_for_some_classes_in_light_version=True)
|
||||
#print("inside 3 ", time.time()-t_in)
|
||||
|
|
@ -2194,7 +2194,7 @@ class Eynollah:
|
|||
ratio_x=1
|
||||
|
||||
img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
||||
prediction_regions_org_y = self.do_prediction(True, img, self.models["region"])
|
||||
prediction_regions_org_y = self.do_prediction(True, img, self.model_zoo.get("region"))
|
||||
prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h )
|
||||
|
||||
#plt.imshow(prediction_regions_org_y[:,:,0])
|
||||
|
|
@ -2209,7 +2209,7 @@ class Eynollah:
|
|||
_, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
||||
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
|
||||
|
||||
prediction_regions_org = self.do_prediction(True, img, self.models["region"])
|
||||
prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
|
||||
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||
|
||||
prediction_regions_org=prediction_regions_org[:,:,0]
|
||||
|
|
@ -2217,7 +2217,7 @@ class Eynollah:
|
|||
|
||||
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))
|
||||
|
||||
prediction_regions_org2 = self.do_prediction(True, img, self.models["region_p2"], marginal_of_patch_percent=0.2)
|
||||
prediction_regions_org2 = self.do_prediction(True, img, self.model_zoo.get("region_p2"), marginal_of_patch_percent=0.2)
|
||||
prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )
|
||||
|
||||
mask_zeros2 = (prediction_regions_org2[:,:,0] == 0)
|
||||
|
|
@ -2241,7 +2241,7 @@ class Eynollah:
|
|||
if self.input_binary:
|
||||
prediction_bin = np.copy(img_org)
|
||||
else:
|
||||
prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5)
|
||||
prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
|
||||
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
|
||||
prediction_bin = 255 * (prediction_bin[:,:,0]==0)
|
||||
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||
|
|
@ -2251,7 +2251,7 @@ class Eynollah:
|
|||
|
||||
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
||||
|
||||
prediction_regions_org = self.do_prediction(True, img, self.models["region"])
|
||||
prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
|
||||
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||
prediction_regions_org=prediction_regions_org[:,:,0]
|
||||
|
||||
|
|
@ -2278,7 +2278,7 @@ class Eynollah:
|
|||
except:
|
||||
if self.input_binary:
|
||||
prediction_bin = np.copy(img_org)
|
||||
prediction_bin = self.do_prediction(True, img_org, self.models["binarization"], n_batch_inference=5)
|
||||
prediction_bin = self.do_prediction(True, img_org, self.model_zoo.get("binarization"), n_batch_inference=5)
|
||||
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
|
||||
prediction_bin = 255 * (prediction_bin[:,:,0]==0)
|
||||
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||
|
|
@ -2289,14 +2289,14 @@ class Eynollah:
|
|||
|
||||
|
||||
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
||||
prediction_regions_org = self.do_prediction(True, img, self.models["region"])
|
||||
prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get("region"))
|
||||
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||
prediction_regions_org=prediction_regions_org[:,:,0]
|
||||
|
||||
#mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
||||
#img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
|
||||
|
||||
#prediction_regions_org = self.do_prediction(True, img, self.models["region"])
|
||||
#prediction_regions_org = self.do_prediction(True, img, self.model_zoo.get_model("region"))
|
||||
#prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||
#prediction_regions_org = prediction_regions_org[:,:,0]
|
||||
#prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
|
||||
|
|
@ -2667,13 +2667,13 @@ class Eynollah:
|
|||
img_width_h = img_org.shape[1]
|
||||
patches = False
|
||||
if self.light_version:
|
||||
prediction_table, _ = self.do_prediction_new_concept(patches, img, self.models["table"])
|
||||
prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_zoo.get("table"))
|
||||
prediction_table = prediction_table.astype(np.int16)
|
||||
return prediction_table[:,:,0]
|
||||
else:
|
||||
if num_col_classifier < 4 and num_col_classifier > 2:
|
||||
prediction_table = self.do_prediction(patches, img, self.models["table"])
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"])
|
||||
prediction_table = self.do_prediction(patches, img, self.model_zoo.get("table"))
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_zoo.get("table"))
|
||||
pre_updown = cv2.flip(pre_updown, -1)
|
||||
|
||||
prediction_table[:,:,0][pre_updown[:,:,0]==1]=1
|
||||
|
|
@ -2692,8 +2692,8 @@ class Eynollah:
|
|||
xs = slice(w_start, w_start + img.shape[1])
|
||||
img_new[ys, xs] = img
|
||||
|
||||
prediction_ext = self.do_prediction(patches, img_new, self.models["table"])
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"])
|
||||
prediction_ext = self.do_prediction(patches, img_new, self.model_zoo.get("table"))
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_zoo.get("table"))
|
||||
pre_updown = cv2.flip(pre_updown, -1)
|
||||
|
||||
prediction_table = prediction_ext[ys, xs]
|
||||
|
|
@ -2714,8 +2714,8 @@ class Eynollah:
|
|||
xs = slice(w_start, w_start + img.shape[1])
|
||||
img_new[ys, xs] = img
|
||||
|
||||
prediction_ext = self.do_prediction(patches, img_new, self.models["table"])
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.models["table"])
|
||||
prediction_ext = self.do_prediction(patches, img_new, self.model_zoo.get("table"))
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_zoo.get("table"))
|
||||
pre_updown = cv2.flip(pre_updown, -1)
|
||||
|
||||
prediction_table = prediction_ext[ys, xs]
|
||||
|
|
@ -2727,10 +2727,10 @@ class Eynollah:
|
|||
prediction_table = np.zeros(img.shape)
|
||||
img_w_half = img.shape[1] // 2
|
||||
|
||||
pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.models["table"])
|
||||
pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.models["table"])
|
||||
pre_full = self.do_prediction(patches, img[:,:,:], self.models["table"])
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.models["table"])
|
||||
pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_zoo.get("table"))
|
||||
pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_zoo.get("table"))
|
||||
pre_full = self.do_prediction(patches, img[:,:,:], self.model_zoo.get("table"))
|
||||
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_zoo.get("table"))
|
||||
pre_updown = cv2.flip(pre_updown, -1)
|
||||
|
||||
prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4)
|
||||
|
|
@ -3522,7 +3522,7 @@ class Eynollah:
|
|||
tot_counter += 1
|
||||
batch.append(j)
|
||||
if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
|
||||
y_pr = self.models["reading_order"].predict(input_1 , verbose=0)
|
||||
y_pr = self.model_zoo.get("reading_order").predict(input_1 , verbose=0)
|
||||
for jb, j in enumerate(batch):
|
||||
if y_pr[jb][0]>=0.5:
|
||||
post_list.append(j)
|
||||
|
|
@ -4105,7 +4105,7 @@ class Eynollah:
|
|||
gc.collect()
|
||||
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)),
|
||||
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], textline_light=True)
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), textline_light=True)
|
||||
else:
|
||||
ocr_all_textlines = None
|
||||
|
||||
|
|
@ -4614,27 +4614,27 @@ class Eynollah:
|
|||
if len(all_found_textline_polygons):
|
||||
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons, all_box_coord,
|
||||
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if len(all_found_textline_polygons_marginals_left):
|
||||
ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left,
|
||||
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if len(all_found_textline_polygons_marginals_right):
|
||||
ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right,
|
||||
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if self.full_layout and len(all_found_textline_polygons):
|
||||
ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons_h, all_box_coord_h,
|
||||
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if self.full_layout and len(polygons_of_drop_capitals):
|
||||
ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)),
|
||||
self.models["ocr"], self.b_s_ocr, self.models["num_to_char"], self.textline_light, self.curved_line)
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
else:
|
||||
if self.light_version:
|
||||
|
|
@ -4646,7 +4646,7 @@ class Eynollah:
|
|||
gc.collect()
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
self.models["ocr"].to(self.device)
|
||||
self.model_zoo.get("ocr").to(self.device)
|
||||
|
||||
ind_tot = 0
|
||||
#cv2.imwrite('./img_out.png', image_page)
|
||||
|
|
@ -4683,7 +4683,7 @@ class Eynollah:
|
|||
img_croped = img_poly_on_img[y:y+h, x:x+w, :]
|
||||
#cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped)
|
||||
text_ocr = self.return_ocr_of_textline_without_common_section(
|
||||
img_croped, self.models["ocr"], self.models['ocr_tr_processor'], self.device, w, h2w_ratio, ind_tot)
|
||||
img_croped, self.model_zoo.get("ocr"), self.model_zoo.get("trocr_processor"), self.device, w, h2w_ratio, ind_tot)
|
||||
ocr_textline_in_textregion.append(text_ocr)
|
||||
ind_tot = ind_tot +1
|
||||
ocr_all_textlines.append(ocr_textline_in_textregion)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# pyright: reportPossiblyUnboundVariable=false
|
||||
|
||||
from logging import getLogger
|
||||
from logging import Logger, getLogger
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
|
@ -8,23 +8,31 @@ import json
|
|||
import gc
|
||||
import sys
|
||||
import math
|
||||
import cv2
|
||||
import time
|
||||
|
||||
from keras.layers import StringLookup
|
||||
|
||||
from eynollah.utils.resize import resize_image
|
||||
from eynollah.utils.utils_ocr import break_curved_line_into_small_pieces_and_then_merge, decode_batch_predictions, fit_text_single_line, get_contours_and_bounding_boxes, get_orientation_moments, preprocess_and_resize_image_for_ocrcnn_model, return_textlines_split_if_needed, rotate_image_with_padding
|
||||
|
||||
from .utils import is_image_filename
|
||||
|
||||
import cv2
|
||||
import xml.etree.ElementTree as ET
|
||||
import tensorflow as tf
|
||||
from keras.models import load_model
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import numpy as np
|
||||
from eynollah.model_zoo import EynollahModelZoo
|
||||
import torch
|
||||
|
||||
from .utils import is_image_filename
|
||||
from .utils.resize import resize_image
|
||||
from .utils.utils_ocr import (
|
||||
break_curved_line_into_small_pieces_and_then_merge,
|
||||
decode_batch_predictions,
|
||||
fit_text_single_line,
|
||||
get_contours_and_bounding_boxes,
|
||||
get_orientation_moments,
|
||||
preprocess_and_resize_image_for_ocrcnn_model,
|
||||
return_textlines_split_if_needed,
|
||||
rotate_image_with_padding,
|
||||
)
|
||||
|
||||
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
|
||||
if sys.version_info < (3, 10):
|
||||
import importlib_resources
|
||||
|
|
@ -43,68 +51,51 @@ class Eynollah_ocr:
|
|||
model_name=None,
|
||||
dir_xmls=None,
|
||||
tr_ocr=False,
|
||||
batch_size=None,
|
||||
export_textline_images_and_text=False,
|
||||
do_not_mask_with_textline_contour=False,
|
||||
batch_size: Optional[int]=None,
|
||||
export_textline_images_and_text: bool=False,
|
||||
do_not_mask_with_textline_contour: bool=False,
|
||||
pref_of_dataset=None,
|
||||
min_conf_value_of_textline_text : Optional[float]=None,
|
||||
logger=None,
|
||||
min_conf_value_of_textline_text : float=0.3,
|
||||
logger: Optional[Logger]=None,
|
||||
):
|
||||
self.model_name = model_name
|
||||
self.tr_ocr = tr_ocr
|
||||
self.export_textline_images_and_text = export_textline_images_and_text
|
||||
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
|
||||
self.pref_of_dataset = pref_of_dataset
|
||||
self.logger = logger if logger else getLogger('eynollah')
|
||||
self.model_zoo = EynollahModelZoo(basedir=dir_models)
|
||||
|
||||
if not export_textline_images_and_text:
|
||||
if min_conf_value_of_textline_text:
|
||||
self.min_conf_value_of_textline_text = float(min_conf_value_of_textline_text)
|
||||
# TODO: Properly document what 'export_textline_images_and_text' is about
|
||||
if export_textline_images_and_text:
|
||||
self.logger.info("export_textline_images_and_text was set, so no actual models are loaded")
|
||||
return
|
||||
|
||||
self.min_conf_value_of_textline_text = min_conf_value_of_textline_text
|
||||
self.b_s = 2 if batch_size is None and tr_ocr else 8 if batch_size is None else batch_size
|
||||
|
||||
if tr_ocr:
|
||||
self.model_zoo.load_model('trocr_processor', '')
|
||||
if model_name:
|
||||
self.model_zoo.load_model('ocr', 'tr', model_name)
|
||||
else:
|
||||
self.min_conf_value_of_textline_text = 0.3
|
||||
if tr_ocr:
|
||||
assert TrOCRProcessor
|
||||
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
if self.model_name:
|
||||
self.model_ocr_dir = self.model_name
|
||||
else:
|
||||
self.model_ocr_dir = dir_models + "/model_eynollah_ocr_trocr_20250919"
|
||||
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
||||
self.model_ocr.to(self.device)
|
||||
if not batch_size:
|
||||
self.b_s = 2
|
||||
else:
|
||||
self.b_s = int(batch_size)
|
||||
|
||||
self.model_zoo.load_model('ocr', 'tr')
|
||||
self.model_zoo.get('ocr').to(self.device)
|
||||
else:
|
||||
if model_name:
|
||||
self.model_zoo.load_model('ocr', '', model_name)
|
||||
else:
|
||||
if self.model_name:
|
||||
self.model_ocr_dir = self.model_name
|
||||
else:
|
||||
self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250930"
|
||||
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
||||
|
||||
self.prediction_model = tf.keras.models.Model(
|
||||
model_ocr.get_layer(name = "image").input,
|
||||
model_ocr.get_layer(name = "dense2").output)
|
||||
if not batch_size:
|
||||
self.b_s = 8
|
||||
else:
|
||||
self.b_s = int(batch_size)
|
||||
|
||||
with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
|
||||
characters = json.load(config_file)
|
||||
|
||||
AUTOTUNE = tf.data.AUTOTUNE
|
||||
self.model_zoo.load_model('ocr', '')
|
||||
self.model_zoo.load_model('num_to_char')
|
||||
self.end_character = len(self.model_zoo.load_model('characters')) + 2
|
||||
|
||||
# Mapping characters to integers.
|
||||
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
|
||||
|
||||
# Mapping integers back to original characters.
|
||||
self.num_to_char = StringLookup(
|
||||
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
|
||||
)
|
||||
self.end_character = len(characters) + 2
|
||||
@property
|
||||
def device(self):
|
||||
if torch.cuda.is_available():
|
||||
self.logger.info("Using GPU acceleration")
|
||||
return torch.device("cuda:0")
|
||||
else:
|
||||
self.logger.info("Using CPU processing")
|
||||
return torch.device("cpu")
|
||||
|
||||
def run(self, overwrite: bool = False,
|
||||
dir_in: Optional[str] = None,
|
||||
|
|
@ -119,13 +110,16 @@ class Eynollah_ocr:
|
|||
for image_filename in filter(is_image_filename,
|
||||
os.listdir(dir_in))]
|
||||
else:
|
||||
assert image_filename
|
||||
ls_imgs = [image_filename]
|
||||
|
||||
if self.tr_ocr:
|
||||
tr_ocr_input_height_and_width = 384
|
||||
for dir_img in ls_imgs:
|
||||
file_name = Path(dir_img).stem
|
||||
assert dir_xmls # FIXME: check the logic
|
||||
dir_xml = os.path.join(dir_xmls, file_name+'.xml')
|
||||
assert dir_out # FIXME: check the logic
|
||||
out_file_ocr = os.path.join(dir_out, file_name+'.xml')
|
||||
|
||||
if os.path.exists(out_file_ocr):
|
||||
|
|
@ -204,10 +198,10 @@ class Eynollah_ocr:
|
|||
cropped_lines = []
|
||||
indexer_b_s = 0
|
||||
|
||||
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
|
||||
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
|
||||
generated_ids_merged = self.model_ocr.generate(
|
||||
pixel_values_merged.to(self.device))
|
||||
generated_text_merged = self.processor.batch_decode(
|
||||
generated_text_merged = self.model_zoo.get('processor').batch_decode(
|
||||
generated_ids_merged, skip_special_tokens=True)
|
||||
|
||||
extracted_texts = extracted_texts + generated_text_merged
|
||||
|
|
@ -227,10 +221,10 @@ class Eynollah_ocr:
|
|||
cropped_lines = []
|
||||
indexer_b_s = 0
|
||||
|
||||
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
|
||||
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
|
||||
generated_ids_merged = self.model_ocr.generate(
|
||||
pixel_values_merged.to(self.device))
|
||||
generated_text_merged = self.processor.batch_decode(
|
||||
generated_text_merged = self.model_zoo.get('processor').batch_decode(
|
||||
generated_ids_merged, skip_special_tokens=True)
|
||||
|
||||
extracted_texts = extracted_texts + generated_text_merged
|
||||
|
|
@ -247,10 +241,10 @@ class Eynollah_ocr:
|
|||
cropped_lines = []
|
||||
indexer_b_s = 0
|
||||
|
||||
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
|
||||
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
|
||||
generated_ids_merged = self.model_ocr.generate(
|
||||
pixel_values_merged.to(self.device))
|
||||
generated_text_merged = self.processor.batch_decode(
|
||||
generated_text_merged = self.model_zoo.get('processor').batch_decode(
|
||||
generated_ids_merged, skip_special_tokens=True)
|
||||
|
||||
extracted_texts = extracted_texts + generated_text_merged
|
||||
|
|
@ -265,10 +259,10 @@ class Eynollah_ocr:
|
|||
cropped_lines = []
|
||||
indexer_b_s = 0
|
||||
|
||||
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
|
||||
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
|
||||
generated_ids_merged = self.model_ocr.generate(
|
||||
pixel_values_merged.to(self.device))
|
||||
generated_text_merged = self.processor.batch_decode(
|
||||
generated_text_merged = self.model_zoo.get('processor').batch_decode(
|
||||
generated_ids_merged, skip_special_tokens=True)
|
||||
|
||||
extracted_texts = extracted_texts + generated_text_merged
|
||||
|
|
@ -282,9 +276,9 @@ class Eynollah_ocr:
|
|||
cropped_lines = []
|
||||
indexer_b_s = 0
|
||||
|
||||
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
|
||||
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
|
||||
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
|
||||
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
|
||||
generated_text_merged = self.model_zoo.get('processor').batch_decode(generated_ids_merged, skip_special_tokens=True)
|
||||
|
||||
extracted_texts = extracted_texts + generated_text_merged
|
||||
|
||||
|
|
@ -299,10 +293,10 @@ class Eynollah_ocr:
|
|||
####n_start = i*self.b_s
|
||||
####n_end = (i+1)*self.b_s
|
||||
####imgs = cropped_lines[n_start:n_end]
|
||||
####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
|
||||
####pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
|
||||
####generated_ids_merged = self.model_ocr.generate(
|
||||
#### pixel_values_merged.to(self.device))
|
||||
####generated_text_merged = self.processor.batch_decode(
|
||||
####generated_text_merged = self.model_zoo.get('processor').batch_decode(
|
||||
#### generated_ids_merged, skip_special_tokens=True)
|
||||
|
||||
####extracted_texts = extracted_texts + generated_text_merged
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
|||
|
||||
from eynollah.patch_encoder import PatchEncoder, Patches
|
||||
|
||||
SomeEynollahModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, Model]
|
||||
SomeEynollahModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, Model, List]
|
||||
|
||||
|
||||
# Dict mapping model_category to dict mapping variant (default is '') to Path
|
||||
|
|
@ -114,14 +114,19 @@ DEFAULT_MODEL_VERSIONS: Dict[str, Dict[str, str]] = {
|
|||
'': "model_eynollah_ocr_cnnrnn_20250930",
|
||||
},
|
||||
|
||||
'ocr_tr_processor': {
|
||||
'trocr_processor': {
|
||||
'': 'microsoft/trocr-base-printed',
|
||||
'htr': "microsoft/trocr-base-handwritten",
|
||||
},
|
||||
|
||||
'num_to_char': {
|
||||
'': 'model_eynollah_ocr_cnnrnn_20250930/characters_org.txt'
|
||||
'': 'characters_org.txt'
|
||||
},
|
||||
|
||||
'characters': {
|
||||
'': 'characters_org.txt'
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -142,7 +147,7 @@ class EynollahModelZoo():
|
|||
self.model_versions = deepcopy(DEFAULT_MODEL_VERSIONS)
|
||||
if model_overrides:
|
||||
self.override_models(*model_overrides)
|
||||
self._loaded: Dict[Tuple[str, str], SomeEynollahModel] = {}
|
||||
self._loaded: Dict[str, SomeEynollahModel] = {}
|
||||
|
||||
def override_models(self, *model_overrides: Tuple[str, str, str]):
|
||||
"""
|
||||
|
|
@ -216,7 +221,9 @@ class EynollahModelZoo():
|
|||
model = self._load_ocr_model(variant=model_variant)
|
||||
elif model_category == 'num_to_char':
|
||||
model = self._load_num_to_char()
|
||||
elif model_category == 'tr_processor':
|
||||
elif model_category == 'characters':
|
||||
model = self._load_characters()
|
||||
elif model_category == 'trocr_processor':
|
||||
return TrOCRProcessor.from_pretrained(self.model_path(...))
|
||||
else:
|
||||
try:
|
||||
|
|
@ -225,14 +232,13 @@ class EynollahModelZoo():
|
|||
self.logger.exception(e)
|
||||
model = load_model(model_path, compile=False, custom_objects={
|
||||
"PatchEncoder": PatchEncoder, "Patches": Patches})
|
||||
self._loaded[(model_category, model_variant)] = model
|
||||
self._loaded[model_category] = model
|
||||
return model # type: ignore
|
||||
|
||||
def get_model(self, model_categeory, model_variant) -> SomeEynollahModel:
|
||||
needle = (model_categeory, model_variant)
|
||||
if needle not in self._loaded:
|
||||
raise ValueError('Model/variant "{needle} not previously loaded with "load_model(..)"')
|
||||
return self._loaded[needle]
|
||||
def get(self, model_category) -> SomeEynollahModel:
|
||||
if model_category not in self._loaded:
|
||||
raise ValueError(f'Model "{model_category} not previously loaded with "load_model(..)"')
|
||||
return self._loaded[model_category]
|
||||
|
||||
def _load_ocr_model(self, variant: str) -> SomeEynollahModel:
|
||||
"""
|
||||
|
|
@ -247,15 +253,21 @@ class EynollahModelZoo():
|
|||
return Model(
|
||||
ocr_model.get_layer(name = "image").input, # type: ignore
|
||||
ocr_model.get_layer(name = "dense2").output) # type: ignore
|
||||
|
||||
def _load_characters(self) -> List[str]:
|
||||
"""
|
||||
Load encoding for OCR
|
||||
"""
|
||||
with open(self.model_path('ocr') / self.model_path('num_to_char', absolute=False), "r") as config_file:
|
||||
return json.load(config_file)
|
||||
|
||||
def _load_num_to_char(self):
|
||||
def _load_num_to_char(self) -> StringLookup:
|
||||
"""
|
||||
Load decoder for OCR
|
||||
"""
|
||||
with open(self.model_path('ocr') / self.model_path('ocr', 'num_to_char', absolute=False), "r") as config_file:
|
||||
characters = json.load(config_file)
|
||||
characters = self._load_characters()
|
||||
# Mapping characters to integers.
|
||||
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
|
||||
char_to_num = StringLookup(vocabulary=characters, mask_token=None)
|
||||
# Mapping integers back to original characters.
|
||||
return StringLookup(
|
||||
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
|
||||
|
|
|
|||
|
|
@ -393,7 +393,12 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
|
|||
z = gaussian_filter1d(regions_without_separators_0, sigma_)
|
||||
return np.std(z)
|
||||
|
||||
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
|
||||
def find_num_col(
|
||||
regions_without_separators,
|
||||
num_col_classifier,
|
||||
tables,
|
||||
multiplier=3.8,
|
||||
):
|
||||
if not regions_without_separators.any():
|
||||
return 0, []
|
||||
#plt.imshow(regions_without_separators)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue