|
|
@ -1073,8 +1073,9 @@ class Eynollah:
|
|
|
|
poly.put(poly_sub)
|
|
|
|
poly.put(poly_sub)
|
|
|
|
box_sub.put(boxes_sub_new)
|
|
|
|
box_sub.put(boxes_sub_new)
|
|
|
|
|
|
|
|
|
|
|
|
def get_regions_from_xy_2models(self,img,is_image_enhanced):
|
|
|
|
def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier):
|
|
|
|
self.logger.debug("enter get_regions_from_xy_2models")
|
|
|
|
self.logger.debug("enter get_regions_from_xy_2models")
|
|
|
|
|
|
|
|
erosion_hurts = False
|
|
|
|
img_org = np.copy(img)
|
|
|
|
img_org = np.copy(img)
|
|
|
|
img_height_h = img_org.shape[0]
|
|
|
|
img_height_h = img_org.shape[0]
|
|
|
|
img_width_h = img_org.shape[1]
|
|
|
|
img_width_h = img_org.shape[1]
|
|
|
@ -1094,6 +1095,13 @@ class Eynollah:
|
|
|
|
prediction_regions_org_y = prediction_regions_org_y[:,:,0]
|
|
|
|
prediction_regions_org_y = prediction_regions_org_y[:,:,0]
|
|
|
|
mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1
|
|
|
|
mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1
|
|
|
|
|
|
|
|
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6)
|
|
|
|
|
|
|
|
_, _ = find_num_col(img_only_regions, multiplier=6.0)
|
|
|
|
|
|
|
|
|
|
|
|
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
|
|
|
|
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
|
|
|
|
|
|
|
|
|
|
|
|
prediction_regions_org = self.do_prediction(True, img, model_region)
|
|
|
|
prediction_regions_org = self.do_prediction(True, img, model_region)
|
|
|
@ -1156,7 +1164,49 @@ class Eynollah:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
K.clear_session()
|
|
|
|
K.clear_session()
|
|
|
|
return text_regions_p_true
|
|
|
|
return text_regions_p_true, erosion_hurts
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prediction_regions_org = self.do_prediction(True, img, model_region)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prediction_regions_org = prediction_regions_org[:,:,0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
|
|
|
|
|
|
|
|
session_region.close()
|
|
|
|
|
|
|
|
del model_region
|
|
|
|
|
|
|
|
del session_region
|
|
|
|
|
|
|
|
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mask_lines_only = (prediction_regions_org[:,:] ==3)*1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mask_images_only=(prediction_regions_org[:,:] ==2)*1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
erosion_hurts = True
|
|
|
|
|
|
|
|
K.clear_session()
|
|
|
|
|
|
|
|
return text_regions_p_true, erosion_hurts
|
|
|
|
|
|
|
|
|
|
|
|
def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
|
|
|
def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
|
|
|
self.logger.debug("enter do_order_of_regions_full_layout")
|
|
|
|
self.logger.debug("enter do_order_of_regions_full_layout")
|
|
|
@ -1406,7 +1456,7 @@ class Eynollah:
|
|
|
|
return self.do_order_of_regions_full_layout(*args, **kwargs)
|
|
|
|
return self.do_order_of_regions_full_layout(*args, **kwargs)
|
|
|
|
return self.do_order_of_regions_no_full_layout(*args, **kwargs)
|
|
|
|
return self.do_order_of_regions_no_full_layout(*args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified):
|
|
|
|
def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts):
|
|
|
|
img_g = self.imread(grayscale=True, uint8=True)
|
|
|
|
img_g = self.imread(grayscale=True, uint8=True)
|
|
|
|
|
|
|
|
|
|
|
|
img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
|
|
|
|
img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
|
|
|
@ -1427,7 +1477,14 @@ class Eynollah:
|
|
|
|
mask_lines = mask_lines.astype(np.uint8)
|
|
|
|
mask_lines = mask_lines.astype(np.uint8)
|
|
|
|
img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
|
|
|
|
img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
|
|
|
|
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
|
|
|
|
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
|
|
|
|
img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if erosion_hurts:
|
|
|
|
|
|
|
|
img_only_regions = np.copy(img_only_regions_with_sep[:,:])
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
num_col, _ = find_num_col(img_only_regions, multiplier=6.0)
|
|
|
|
num_col, _ = find_num_col(img_only_regions, multiplier=6.0)
|
|
|
|
num_col = num_col + 1
|
|
|
|
num_col = num_col + 1
|
|
|
@ -1628,12 +1685,12 @@ class Eynollah:
|
|
|
|
self.logger.info("Enhancing took %ss ", str(time.time() - t0))
|
|
|
|
self.logger.info("Enhancing took %ss ", str(time.time() - t0))
|
|
|
|
|
|
|
|
|
|
|
|
t1 = time.time()
|
|
|
|
t1 = time.time()
|
|
|
|
text_regions_p_1 = self.get_regions_from_xy_2models(img_res, is_image_enhanced)
|
|
|
|
text_regions_p_1 ,erosion_hurts = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier)
|
|
|
|
self.logger.info("Textregion detection took %ss ", str(time.time() - t1))
|
|
|
|
self.logger.info("Textregion detection took %ss ", str(time.time() - t1))
|
|
|
|
|
|
|
|
|
|
|
|
t1 = time.time()
|
|
|
|
t1 = time.time()
|
|
|
|
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page = \
|
|
|
|
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page = \
|
|
|
|
self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified)
|
|
|
|
self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts)
|
|
|
|
self.logger.info("Graphics detection took %ss ", str(time.time() - t1))
|
|
|
|
self.logger.info("Graphics detection took %ss ", str(time.time() - t1))
|
|
|
|
self.logger.info('cont_page %s', cont_page)
|
|
|
|
self.logger.info('cont_page %s', cont_page)
|
|
|
|
|
|
|
|
|
|
|
|