PR #173 has been reverted. Additionally, for TrOCR, the cropped text lines will no longer be added to a list before prediction. Instead, for each batch size, the text line images will be collected and predictions will be made directly on them.

This commit is contained in:
vahidrezanezhad 2025-09-03 19:18:11 +02:00
parent d9ae7bd12c
commit 8c949cec71
6 changed files with 442 additions and 316 deletions

View file

@ -73,8 +73,6 @@ from .utils.contour import (
return_contours_of_interested_region_by_min_size, return_contours_of_interested_region_by_min_size,
return_contours_of_interested_textline, return_contours_of_interested_textline,
return_parent_contours, return_parent_contours,
dilate_textregion_contours,
dilate_textline_contours,
) )
from .utils.rotate import ( from .utils.rotate import (
rotate_image, rotate_image,
@ -112,8 +110,6 @@ from .utils.resize import resize_image
from .utils import ( from .utils import (
boosting_headers_by_longshot_region_segmentation, boosting_headers_by_longshot_region_segmentation,
crop_image_inside_box, crop_image_inside_box,
box2rect,
box2slice,
find_num_col, find_num_col,
otsu_copy_binary, otsu_copy_binary,
put_drop_out_from_only_drop_model, put_drop_out_from_only_drop_model,
@ -1750,7 +1746,7 @@ class Eynollah:
self.logger.debug("exit extract_text_regions") self.logger.debug("exit extract_text_regions")
return prediction_regions, prediction_regions2 return prediction_regions, prediction_regions2
def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001)
M_main_tot = [cv2.moments(polygons_of_textlines[j]) M_main_tot = [cv2.moments(polygons_of_textlines[j])
@ -1773,17 +1769,18 @@ class Eynollah:
all_found_textline_polygons.append(textlines_ins[::-1]) all_found_textline_polygons.append(textlines_ins[::-1])
slopes.append(slope_deskew) slopes.append(slope_deskew)
crop_coor = box2rect(boxes[index]) _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated)
all_box_coord.append(crop_coor) all_box_coord.append(crop_coor)
return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes
def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
if not len(contours): if not len(contours):
return [], [], [], [], [], [], [] return [], [], [], [], [], [], []
self.logger.debug("enter get_slopes_and_deskew_new_light") self.logger.debug("enter get_slopes_and_deskew_new_light")
results = self.executor.map(partial(do_work_of_slopes_new_light, results = self.executor.map(partial(do_work_of_slopes_new_light,
textline_mask_tot_ea=textline_mask_tot, textline_mask_tot_ea=textline_mask_tot,
image_page_rotated=image_page_rotated,
slope_deskew=slope_deskew,textline_light=self.textline_light, slope_deskew=slope_deskew,textline_light=self.textline_light,
logger=self.logger,), logger=self.logger,),
boxes, contours, contours_par, range(len(contours_par))) boxes, contours, contours_par, range(len(contours_par)))
@ -1791,12 +1788,13 @@ class Eynollah:
self.logger.debug("exit get_slopes_and_deskew_new_light") self.logger.debug("exit get_slopes_and_deskew_new_light")
return tuple(zip(*results)) return tuple(zip(*results))
def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
if not len(contours): if not len(contours):
return [], [], [], [], [], [], [] return [], [], [], [], [], [], []
self.logger.debug("enter get_slopes_and_deskew_new") self.logger.debug("enter get_slopes_and_deskew_new")
results = self.executor.map(partial(do_work_of_slopes_new, results = self.executor.map(partial(do_work_of_slopes_new,
textline_mask_tot_ea=textline_mask_tot, textline_mask_tot_ea=textline_mask_tot,
image_page_rotated=image_page_rotated,
slope_deskew=slope_deskew, slope_deskew=slope_deskew,
MAX_SLOPE=MAX_SLOPE, MAX_SLOPE=MAX_SLOPE,
KERNEL=KERNEL, KERNEL=KERNEL,
@ -1807,12 +1805,13 @@ class Eynollah:
self.logger.debug("exit get_slopes_and_deskew_new") self.logger.debug("exit get_slopes_and_deskew_new")
return tuple(zip(*results)) return tuple(zip(*results))
def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew): def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
if not len(contours): if not len(contours):
return [], [], [], [], [], [], [] return [], [], [], [], [], [], []
self.logger.debug("enter get_slopes_and_deskew_new_curved") self.logger.debug("enter get_slopes_and_deskew_new_curved")
results = self.executor.map(partial(do_work_of_slopes_new_curved, results = self.executor.map(partial(do_work_of_slopes_new_curved,
textline_mask_tot_ea=textline_mask_tot, textline_mask_tot_ea=textline_mask_tot,
image_page_rotated=image_page_rotated,
mask_texts_only=mask_texts_only, mask_texts_only=mask_texts_only,
num_col=num_col, num_col=num_col,
scale_par=scale_par, scale_par=scale_par,
@ -1994,9 +1993,9 @@ class Eynollah:
mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_texts_only = (prediction_regions_org[:,:] ==1)*1
mask_images_only=(prediction_regions_org[:,:] ==2)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
polygons_seplines = filter_contours_area_of_image( polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
@ -2035,7 +2034,7 @@ class Eynollah:
##polygons_of_images_fin.append(ploy_img_ind) ##polygons_of_images_fin.append(ploy_img_ind)
box = cv2.boundingRect(ploy_img_ind) box = cv2.boundingRect(ploy_img_ind)
page_coord_img = box2rect(box) _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
# cont_page.append(np.array([[page_coord[2], page_coord[0]], # cont_page.append(np.array([[page_coord[2], page_coord[0]],
# [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[0]],
# [page_coord[3], page_coord[1]], # [page_coord[3], page_coord[1]],
@ -2049,7 +2048,7 @@ class Eynollah:
if h < 150 or w < 150: if h < 150 or w < 150:
pass pass
else: else:
page_coord_img = box2rect(box) _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
# cont_page.append(np.array([[page_coord[2], page_coord[0]], # cont_page.append(np.array([[page_coord[2], page_coord[0]],
# [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[0]],
# [page_coord[3], page_coord[1]], # [page_coord[3], page_coord[1]],
@ -2060,7 +2059,7 @@ class Eynollah:
[page_coord_img[2], page_coord_img[1]]])) [page_coord_img[2], page_coord_img[1]]]))
self.logger.debug("exit get_regions_extract_images_only") self.logger.debug("exit get_regions_extract_images_only")
return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_images_fin, image_page, page_coord, cont_page return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
self.logger.debug("enter get_regions_light_v") self.logger.debug("enter get_regions_light_v")
@ -2176,31 +2175,31 @@ class Eynollah:
mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
mask_images_only=(prediction_regions_org[:,:] ==2)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
test_khat = np.zeros(prediction_regions_org.shape) test_khat = np.zeros(prediction_regions_org.shape)
test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1)) test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1))
#plt.imshow(test_khat[:,:]) #plt.imshow(test_khat[:,:])
#plt.show() #plt.show()
#for jv in range(1): #for jv in range(1):
#print(jv, hir_seplines[0][232][3]) #print(jv, hir_lines_xml[0][232][3])
#test_khat = np.zeros(prediction_regions_org.shape) #test_khat = np.zeros(prediction_regions_org.shape)
#test_khat = cv2.fillPoly(test_khat, pts = [polygons_seplines[232]], color=(1,1,1)) #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1))
#plt.imshow(test_khat[:,:]) #plt.imshow(test_khat[:,:])
#plt.show() #plt.show()
polygons_seplines = filter_contours_area_of_image( polygons_lines_xml = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
test_khat = np.zeros(prediction_regions_org.shape) test_khat = np.zeros(prediction_regions_org.shape)
test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1)) test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1))
#plt.imshow(test_khat[:,:]) #plt.imshow(test_khat[:,:])
#plt.show() #plt.show()
#sys.exit() #sys.exit()
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts) ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = np.zeros(prediction_regions_org.shape)
@ -2218,7 +2217,7 @@ class Eynollah:
#plt.show() #plt.show()
#print("inside 4 ", time.time()-t_in) #print("inside 4 ", time.time()-t_in)
self.logger.debug("exit get_regions_light_v") self.logger.debug("exit get_regions_light_v")
return text_regions_p_true, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin, confidence_matrix return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix
else: else:
img_bin = resize_image(img_bin,img_height_h, img_width_h ) img_bin = resize_image(img_bin,img_height_h, img_width_h )
self.logger.debug("exit get_regions_light_v") self.logger.debug("exit get_regions_light_v")
@ -2301,9 +2300,9 @@ class Eynollah:
mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_texts_only=(prediction_regions_org[:,:]==1)*1
mask_images_only=(prediction_regions_org[:,:]==2)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
polygons_seplines = filter_contours_area_of_image( polygons_lines_xml = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
@ -2315,7 +2314,7 @@ class Eynollah:
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
self.logger.debug("exit get_regions_from_xy_2models") self.logger.debug("exit get_regions_from_xy_2models")
return text_regions_p_true, erosion_hurts, polygons_seplines return text_regions_p_true, erosion_hurts, polygons_lines_xml
except: except:
if self.input_binary: if self.input_binary:
prediction_bin = np.copy(img_org) prediction_bin = np.copy(img_org)
@ -2350,9 +2349,9 @@ class Eynollah:
mask_texts_only = (prediction_regions_org == 1)*1 mask_texts_only = (prediction_regions_org == 1)*1
mask_images_only= (prediction_regions_org == 2)*1 mask_images_only= (prediction_regions_org == 2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
polygons_seplines = filter_contours_area_of_image( polygons_lines_xml = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
@ -2365,7 +2364,7 @@ class Eynollah:
erosion_hurts = True erosion_hurts = True
self.logger.debug("exit get_regions_from_xy_2models") self.logger.debug("exit get_regions_from_xy_2models")
return text_regions_p_true, erosion_hurts, polygons_seplines return text_regions_p_true, erosion_hurts, polygons_lines_xml
def do_order_of_regions_full_layout( def do_order_of_regions_full_layout(
self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
@ -3233,13 +3232,13 @@ class Eynollah:
if self.plotter: if self.plotter:
self.plotter.save_deskewed_image(slope_deskew) self.plotter.save_deskewed_image(slope_deskew)
self.logger.info("slope_deskew: %.2f°", slope_deskew) self.logger.info("slope_deskew: %.2f°", slope_deskew)
return slope_deskew return slope_deskew, slope_first
def run_marginals( def run_marginals(
self, textline_mask_tot_ea, mask_images, mask_lines, self, image_page, textline_mask_tot_ea, mask_images, mask_lines,
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
textline_mask_tot = textline_mask_tot_ea[:, :] image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :]
textline_mask_tot[mask_images[:, :] == 1] = 0 textline_mask_tot[mask_images[:, :] == 1] = 0
text_regions_p_1[mask_lines[:, :] == 1] = 3 text_regions_p_1[mask_lines[:, :] == 1] = 3
@ -3257,7 +3256,10 @@ class Eynollah:
except Exception as e: except Exception as e:
self.logger.error("exception %s", e) self.logger.error("exception %s", e)
return textline_mask_tot, text_regions_p if self.plotter:
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
return textline_mask_tot, text_regions_p, image_page_rotated
def run_boxes_no_full_layout( def run_boxes_no_full_layout(
self, image_page, textline_mask_tot, text_regions_p, self, image_page, textline_mask_tot, text_regions_p,
@ -3409,7 +3411,7 @@ class Eynollah:
text_regions_p[:,:][table_prediction[:,:]==1] = 10 text_regions_p[:,:][table_prediction[:,:]==1] = 10
img_revised_tab = text_regions_p[:,:] img_revised_tab = text_regions_p[:,:]
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
_, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
@ -3429,7 +3431,7 @@ class Eynollah:
else: else:
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
_, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
@ -4280,9 +4282,7 @@ class Eynollah:
def filter_contours_without_textline_inside( def filter_contours_without_textline_inside(
self, contours, text_con_org, contours_textline, self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions):
contours_only_text_parent_d_ordered,
conf_contours_textregions):
###contours_txtline_of_all_textregions = [] ###contours_txtline_of_all_textregions = []
###for jj in range(len(contours_textline)): ###for jj in range(len(contours_textline)):
###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj]
@ -4306,21 +4306,138 @@ class Eynollah:
###if np.any(results==1): ###if np.any(results==1):
###contours_with_textline.append(con_tr) ###contours_with_textline.append(con_tr)
textregion_index_to_del = set() textregion_index_to_del = []
for index_textregion, textlines_textregion in enumerate(contours_textline): for index_textregion, textlines_textregion in enumerate(contours_textline):
if len(textlines_textregion) == 0: if len(textlines_textregion)==0:
textregion_index_to_del.add(index_textregion) textregion_index_to_del.append(index_textregion)
def filterfun(lis):
if len(lis) == 0:
return []
return list(np.delete(lis, list(textregion_index_to_del)))
return (filterfun(contours), uniqe_args_trs = np.unique(textregion_index_to_del)
filterfun(text_con_org), uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1]
filterfun(conf_contours_textregions),
filterfun(contours_textline), for ind_u_a_trs in uniqe_args_trs_sorted:
filterfun(contours_only_text_parent_d_ordered), conf_contours_textregions.pop(ind_u_a_trs)
np.arange(len(contours) - len(textregion_index_to_del))) contours.pop(ind_u_a_trs)
contours_textline.pop(ind_u_a_trs)
text_con_org.pop(ind_u_a_trs)
if len(contours_only_text_parent_d_ordered) > 0:
contours_only_text_parent_d_ordered.pop(ind_u_a_trs)
return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours)))
def dilate_textlines(self, all_found_textline_polygons):
for j in range(len(all_found_textline_polygons)):
for i in range(len(all_found_textline_polygons[j])):
con_ind = all_found_textline_polygons[j][i]
con_ind = con_ind.astype(float)
x_differential = np.diff( con_ind[:,0,0])
y_differential = np.diff( con_ind[:,0,1])
x_min = float(np.min( con_ind[:,0,0] ))
y_min = float(np.min( con_ind[:,0,1] ))
x_max = float(np.max( con_ind[:,0,0] ))
y_max = float(np.max( con_ind[:,0,1] ))
if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70:
x_biger_than_x = np.abs(x_differential) > np.abs(y_differential)
mult = x_biger_than_x*x_differential
arg_min_mult = np.argmin(mult)
arg_max_mult = np.argmax(mult)
if y_differential[0]==0:
y_differential[0] = 0.1
if y_differential[-1]==0:
y_differential[-1]= 0.1
y_differential = [y_differential[ind] if y_differential[ind] != 0
else 0.5 * (y_differential[ind-1] + y_differential[ind+1])
for ind in range(len(y_differential))]
if y_differential[0]==0.1:
y_differential[0] = y_differential[1]
if y_differential[-1]==0.1:
y_differential[-1] = y_differential[-2]
y_differential.append(y_differential[0])
y_differential = [-1 if y_differential[ind] < 0 else 1
for ind in range(len(y_differential))]
y_differential = self.return_it_in_two_groups(y_differential)
y_differential = np.array(y_differential)
con_scaled = con_ind*1
con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential
con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8
con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8
try:
con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5
con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5
except:
pass
con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8
con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8
try:
con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5
con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5
except:
pass
else:
y_biger_than_x = np.abs(y_differential) > np.abs(x_differential)
mult = y_biger_than_x*y_differential
arg_min_mult = np.argmin(mult)
arg_max_mult = np.argmax(mult)
if x_differential[0]==0:
x_differential[0] = 0.1
if x_differential[-1]==0:
x_differential[-1]= 0.1
x_differential = [x_differential[ind] if x_differential[ind] != 0
else 0.5 * (x_differential[ind-1] + x_differential[ind+1])
for ind in range(len(x_differential))]
if x_differential[0]==0.1:
x_differential[0] = x_differential[1]
if x_differential[-1]==0.1:
x_differential[-1] = x_differential[-2]
x_differential.append(x_differential[0])
x_differential = [-1 if x_differential[ind] < 0 else 1
for ind in range(len(x_differential))]
x_differential = self.return_it_in_two_groups(x_differential)
x_differential = np.array(x_differential)
con_scaled = con_ind*1
con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential
con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8
con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8
try:
con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5
con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5
except:
pass
con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8
con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8
try:
con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5
con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5
except:
pass
con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1]
all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0]
return all_found_textline_polygons
def delete_regions_without_textlines( def delete_regions_without_textlines(
self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, self, slopes, all_found_textline_polygons, boxes_text, txt_con_org,
@ -4431,7 +4548,7 @@ class Eynollah:
self.logger.info("Enhancing took %.1fs ", time.time() - t0) self.logger.info("Enhancing took %.1fs ", time.time() - t0)
if self.extract_only_images: if self.extract_only_images:
text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, image_page, page_coord, cont_page = \ text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [], [], page_coord, [], [], [], [],
@ -4459,7 +4576,8 @@ class Eynollah:
all_found_textline_polygons=[ all_found_textline_polygons ] all_found_textline_polygons=[ all_found_textline_polygons ]
all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
all_found_textline_polygons)
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline") all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline")
@ -4477,7 +4595,7 @@ class Eynollah:
all_found_textline_polygons_marginals_right = [] all_found_textline_polygons_marginals_right = []
all_box_coord_marginals_left = [] all_box_coord_marginals_left = []
all_box_coord_marginals_right = [] all_box_coord_marginals_right = []
polygons_seplines = [] polygons_lines_xml = []
contours_tables = [] contours_tables = []
conf_contours_textregions =[0] conf_contours_textregions =[0]
@ -4491,13 +4609,13 @@ class Eynollah:
cont_page, page_coord, order_text_new, id_of_texts_tot, cont_page, page_coord, order_text_new, id_of_texts_tot,
all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_seplines, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
return pcgts return pcgts
#print("text region early -1 in %.1fs", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0)
t1 = time.time() t1 = time.time()
if self.light_version: if self.light_version:
text_regions_p_1, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
#print("text region early -2 in %.1fs", time.time() - t0) #print("text region early -2 in %.1fs", time.time() - t0)
@ -4510,9 +4628,9 @@ class Eynollah:
textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew)
else: else:
slope_deskew = self.run_deskew(textline_mask_tot_ea) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
#print("text region early -2,5 in %.1fs", time.time() - t0) #print("text region early -2,5 in %.1fs", time.time() - t0)
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
@ -4524,7 +4642,7 @@ class Eynollah:
textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
#print("text region early -4 in %.1fs", time.time() - t0) #print("text region early -4 in %.1fs", time.time() - t0)
else: else:
text_regions_p_1, erosion_hurts, polygons_seplines = \ text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
self.get_regions_from_xy_2models(img_res, is_image_enhanced, self.get_regions_from_xy_2models(img_res, is_image_enhanced,
num_col_classifier) num_col_classifier)
self.logger.info("Textregion detection took %.1fs ", time.time() - t1) self.logger.info("Textregion detection took %.1fs ", time.time() - t1)
@ -4551,7 +4669,7 @@ class Eynollah:
textline_mask_tot_ea = self.run_textline(image_page) textline_mask_tot_ea = self.run_textline(image_page)
self.logger.info("textline detection took %.1fs", time.time() - t1) self.logger.info("textline detection took %.1fs", time.time() - t1)
t1 = time.time() t1 = time.time()
slope_deskew = self.run_deskew(textline_mask_tot_ea) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
self.logger.info("deskewing took %.1fs", time.time() - t1) self.logger.info("deskewing took %.1fs", time.time() - t1)
elif num_col_classifier in (1,2): elif num_col_classifier in (1,2):
org_h_l_m = textline_mask_tot_ea.shape[0] org_h_l_m = textline_mask_tot_ea.shape[0]
@ -4569,12 +4687,9 @@ class Eynollah:
text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new )
textline_mask_tot, text_regions_p = \ textline_mask_tot, text_regions_p, image_page_rotated = \
self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines, self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
if self.plotter:
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
if self.light_version and num_col_classifier in (1,2): if self.light_version and num_col_classifier in (1,2):
image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
@ -4583,6 +4698,7 @@ class Eynollah:
textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m )
text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m )
table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
self.logger.info("detection of marginals took %.1fs", time.time() - t1) self.logger.info("detection of marginals took %.1fs", time.time() - t1)
#print("text region early 2 marginal in %.1fs", time.time() - t0) #print("text region early 2 marginal in %.1fs", time.time() - t0)
@ -4593,14 +4709,14 @@ class Eynollah:
boxes, boxes_d, polygons_of_marginals, contours_tables = \ boxes, boxes_d, polygons_of_marginals, contours_tables = \
self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
num_col_classifier, table_prediction, erosion_hurts) num_col_classifier, table_prediction, erosion_hurts)
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
else: else:
polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
num_col_classifier, img_only_regions, table_prediction, erosion_hurts, num_col_classifier, img_only_regions, table_prediction, erosion_hurts,
img_bin_light if self.light_version else None) img_bin_light if self.light_version else None)
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
if self.light_version: if self.light_version:
drop_label_in_full_layout = 4 drop_label_in_full_layout = 4
textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0
@ -4724,30 +4840,31 @@ class Eynollah:
[], [], page_coord, [], [], [], [], [], [], [], [], page_coord, [], [], [], [], [], [],
polygons_of_images, contours_tables, [], polygons_of_images, contours_tables, [],
polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [], polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [],
cont_page, polygons_seplines) cont_page, polygons_lines_xml)
else: else:
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [], [], page_coord, [], [], [], [],
polygons_of_images, polygons_of_images,
polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [],
cont_page, polygons_seplines, contours_tables) cont_page, polygons_lines_xml, contours_tables)
return pcgts return pcgts
#print("text region early 3 in %.1fs", time.time() - t0) #print("text region early 3 in %.1fs", time.time() - t0)
if self.light_version: if self.light_version:
contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) contours_only_text_parent = self.dilate_textregions_contours(
contours_only_text_parent)
contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one(
contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals)
#print("text region early 3.5 in %.1fs", time.time() - t0) #print("text region early 3.5 in %.1fs", time.time() - t0)
txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map)
#txt_con_org = dilate_textregion_contours(txt_con_org) #txt_con_org = self.dilate_textregions_contours(txt_con_org)
#contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent)
else: else:
txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map)
#print("text region early 4 in %.1fs", time.time() - t0) #print("text region early 4 in %.1fs", time.time() - t0)
boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)
@ -4759,11 +4876,11 @@ class Eynollah:
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org,
boxes_text, slope_deskew) image_page_rotated, boxes_text, slope_deskew)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org,
boxes_marginals, slope_deskew) image_page_rotated, boxes_marginals, slope_deskew)
#slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \
# self.delete_regions_without_textlines(slopes, all_found_textline_polygons, # self.delete_regions_without_textlines(slopes, all_found_textline_polygons,
@ -4771,10 +4888,14 @@ class Eynollah:
#slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \
# self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals,
# boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
#####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
all_found_textline_polygons)
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline")
all_found_textline_polygons_marginals = dilate_textline_contours(all_found_textline_polygons_marginals) all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(
all_found_textline_polygons_marginals)
contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \
index_by_text_par_con = self.filter_contours_without_textline_inside( index_by_text_par_con = self.filter_contours_without_textline_inside(
contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions)
@ -4783,11 +4904,11 @@ class Eynollah:
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \
index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea, txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
boxes_text, slope_deskew) image_page_rotated, boxes_text, slope_deskew)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
boxes_marginals, slope_deskew) image_page_rotated, boxes_marginals, slope_deskew)
#all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
# all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
else: else:
@ -4795,25 +4916,25 @@ class Eynollah:
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea, txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
boxes_text, slope_deskew) image_page_rotated, boxes_text, slope_deskew)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
boxes_marginals, slope_deskew) image_page_rotated, boxes_marginals, slope_deskew)
else: else:
scale_param = 1 scale_param = 1
textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode,
boxes_text, text_only, image_page_rotated, boxes_text, text_only,
num_col_classifier, scale_param, slope_deskew) num_col_classifier, scale_param, slope_deskew)
all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons = small_textlines_to_parent_adherence2(
all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode,
boxes_marginals, text_only, image_page_rotated, boxes_marginals, text_only,
num_col_classifier, scale_param, slope_deskew) num_col_classifier, scale_param, slope_deskew)
all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(
all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
@ -4950,7 +5071,7 @@ class Eynollah:
all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h)
return pcgts return pcgts
contours_only_text_parent_h = None contours_only_text_parent_h = None
@ -5042,7 +5163,7 @@ class Eynollah:
txt_con_org, page_coord, order_text_new, id_of_texts_tot, txt_con_org, page_coord, order_text_new, id_of_texts_tot,
all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_seplines, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
return pcgts return pcgts
@ -5178,8 +5299,12 @@ class Eynollah_ocr:
cropped_lines = [] cropped_lines = []
cropped_lines_region_indexer = [] cropped_lines_region_indexer = []
cropped_lines_meging_indexing = [] cropped_lines_meging_indexing = []
extracted_texts = []
indexer_text_region = 0 indexer_text_region = 0
indexer_b_s = 0
for nn in root1.iter(region_tags): for nn in root1.iter(region_tags):
for child_textregion in nn: for child_textregion in nn:
if child_textregion.tag.endswith("TextLine"): if child_textregion.tag.endswith("TextLine"):
@ -5204,40 +5329,105 @@ class Eynollah_ocr:
img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop = img_poly_on_img[y:y+h, x:x+w, :]
img_crop[mask_poly==0] = 255 img_crop[mask_poly==0] = 255
if h2w_ratio > 0.1: if h2w_ratio > 0.1:
cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) ) cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) )
cropped_lines_meging_indexing.append(0) cropped_lines_meging_indexing.append(0)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
else: else:
splited_images, _ = return_textlines_split_if_needed(img_crop, None) splited_images, _ = return_textlines_split_if_needed(img_crop, None)
#print(splited_images) #print(splited_images)
if splited_images: if splited_images:
cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
cropped_lines_meging_indexing.append(1) cropped_lines_meging_indexing.append(1)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
cropped_lines_meging_indexing.append(-1) cropped_lines_meging_indexing.append(-1)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
else: else:
cropped_lines.append(img_crop) cropped_lines.append(img_crop)
cropped_lines_meging_indexing.append(0) cropped_lines_meging_indexing.append(0)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
indexer_text_region = indexer_text_region +1 indexer_text_region = indexer_text_region +1
if indexer_b_s!=0:
extracted_texts = [] imgs = cropped_lines[:]
n_iterations = math.ceil(len(cropped_lines) / self.b_s) cropped_lines = []
indexer_b_s = 0
for i in range(n_iterations):
if i==(n_iterations-1):
n_start = i*self.b_s
imgs = cropped_lines[n_start:]
else:
n_start = i*self.b_s
n_end = (i+1)*self.b_s
imgs = cropped_lines[n_start:n_end]
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged extracted_texts = extracted_texts + generated_text_merged
####extracted_texts = []
####n_iterations = math.ceil(len(cropped_lines) / self.b_s)
####for i in range(n_iterations):
####if i==(n_iterations-1):
####n_start = i*self.b_s
####imgs = cropped_lines[n_start:]
####else:
####n_start = i*self.b_s
####n_end = (i+1)*self.b_s
####imgs = cropped_lines[n_start:n_end]
####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
####generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
####generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
####extracted_texts = extracted_texts + generated_text_merged
del cropped_lines del cropped_lines
gc.collect() gc.collect()
@ -5288,31 +5478,71 @@ class Eynollah_ocr:
#print(time.time() - t0 ,'elapsed time') #print(time.time() - t0 ,'elapsed time')
indexer = 0 indexer = 0
indexer_textregion = 0 indexer_textregion = 0
for nn in root1.iter(region_tags): for nn in root1.iter(region_tags):
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') #id_textregion = nn.attrib['id']
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') #id_textregions.append(id_textregion)
#textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
is_textregion_text = False
for childtest in nn:
if childtest.tag.endswith("TextEquiv"):
is_textregion_text = True
if not is_textregion_text:
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
has_textline = False has_textline = False
for child_textregion in nn: for child_textregion in nn:
if child_textregion.tag.endswith("TextLine"): if child_textregion.tag.endswith("TextLine"):
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
unicode_textline = ET.SubElement(text_subelement, 'Unicode') is_textline_text = False
unicode_textline.text = extracted_texts_merged[indexer] for childtest2 in child_textregion:
if childtest2.tag.endswith("TextEquiv"):
is_textline_text = True
if not is_textline_text:
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
##text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
unicode_textline = ET.SubElement(text_subelement, 'Unicode')
unicode_textline.text = extracted_texts_merged[indexer]
else:
for childtest3 in child_textregion:
if childtest3.tag.endswith("TextEquiv"):
for child_uc in childtest3:
if child_uc.tag.endswith("Unicode"):
##childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
child_uc.text = extracted_texts_merged[indexer]
indexer = indexer + 1 indexer = indexer + 1
has_textline = True has_textline = True
if has_textline: if has_textline:
unicode_textregion.text = text_by_textregion[indexer_textregion] if is_textregion_text:
for child4 in nn:
if child4.tag.endswith("TextEquiv"):
for childtr_uc in child4:
if childtr_uc.tag.endswith("Unicode"):
childtr_uc.text = text_by_textregion[indexer_textregion]
else:
unicode_textregion.text = text_by_textregion[indexer_textregion]
indexer_textregion = indexer_textregion + 1 indexer_textregion = indexer_textregion + 1
###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order]
##ordered_texts_sample = [text for _, text in sorted(sample_order)]
##tot_page_text = ' '.join(ordered_texts_sample)
##for page_element in root1.iter(link+'Page'):
##text_page = ET.SubElement(page_element, 'TextEquiv')
##unicode_textpage = ET.SubElement(text_page, 'Unicode')
##unicode_textpage.text = tot_page_text
ET.register_namespace("",name_space) ET.register_namespace("",name_space)
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
#print("Job done in %.1fs", time.time() - t0)
else: else:
###max_len = 280#512#280#512 ###max_len = 280#512#280#512
###padding_token = 1500#299#1500#299 ###padding_token = 1500#299#1500#299

View file

@ -1,4 +1,3 @@
from typing import Tuple
import time import time
import math import math
@ -299,17 +298,9 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_end_with_child_without_mother, x_end_with_child_without_mother,
new_main_sep_y) new_main_sep_y)
def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
return (box[1], box[1] + box[3],
box[0], box[0] + box[2])
def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]:
return (slice(box[1], box[1] + box[3]),
slice(box[0], box[0] + box[2]))
def crop_image_inside_box(box, img_org_copy): def crop_image_inside_box(box, img_org_copy):
image_box = img_org_copy[box2slice(box)] image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
return image_box, box2rect(box) return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
def otsu_copy_binary(img): def otsu_copy_binary(img):
img_r = np.zeros((img.shape[0], img.shape[1], 3)) img_r = np.zeros((img.shape[0], img.shape[1], 3))
@ -860,8 +851,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1) all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1)
percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels)
if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and
if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.7 and
percent_text_to_all_in_drop >= 0.3): percent_text_to_all_in_drop >= 0.3):
layout_in_patch[box0] = drop_capital_label layout_in_patch[box0] = drop_capital_label
else: else:
@ -965,11 +955,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom,
regions_model_full.shape[0] // zoom), regions_model_full.shape[0] // zoom),
interpolation=cv2.INTER_NEAREST) interpolation=cv2.INTER_NEAREST)
contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent] contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent]
### ###
cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
find_new_features_of_contours(contours_only_text_parent_z) find_new_features_of_contours(contours_only_text_parent)
length_con=x_max_main-x_min_main length_con=x_max_main-x_min_main
height_con=y_max_main-y_min_main height_con=y_max_main-y_min_main
@ -992,7 +982,8 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
contours_only_text_parent_main_d=[] contours_only_text_parent_main_d=[]
contours_only_text_parent_head_d=[] contours_only_text_parent_head_d=[]
for ii, con in enumerate(contours_only_text_parent_z): for ii in range(len(contours_only_text_parent)):
con=contours_only_text_parent[ii]
img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3))
img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
@ -1003,22 +994,23 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ): if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ):
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
contours_only_text_parent_head.append(contours_only_text_parent[ii]) contours_only_text_parent_head.append(con)
conf_contours_head.append(None) # why not conf_contours[ii], too?
if contours_only_text_parent_d_ordered is not None: if contours_only_text_parent_d_ordered is not None:
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_head.append(all_box_coord[ii]) all_box_coord_head.append(all_box_coord[ii])
slopes_head.append(slopes[ii]) slopes_head.append(slopes[ii])
all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
conf_contours_head.append(None)
else: else:
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
contours_only_text_parent_main.append(contours_only_text_parent[ii]) contours_only_text_parent_main.append(con)
conf_contours_main.append(conf_contours[ii]) conf_contours_main.append(conf_contours[ii])
if contours_only_text_parent_d_ordered is not None: if contours_only_text_parent_d_ordered is not None:
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
all_box_coord_main.append(all_box_coord[ii]) all_box_coord_main.append(all_box_coord[ii])
slopes_main.append(slopes[ii]) slopes_main.append(slopes[ii])
all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
#print(all_pixels,pixels_main,pixels_header) #print(all_pixels,pixels_main,pixels_header)
### to make it faster ### to make it faster
@ -1026,6 +1018,8 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
# regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
# regions_model_full.shape[0] // zoom), # regions_model_full.shape[0] // zoom),
# interpolation=cv2.INTER_NEAREST) # interpolation=cv2.INTER_NEAREST)
contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
### ###
return (regions_model_1, return (regions_model_1,
@ -1748,7 +1742,6 @@ def return_boxes_of_images_by_order_of_reading_new(
x_ending = np.array(x_ending) x_ending = np.array(x_ending)
y_type_2 = np.array(y_type_2) y_type_2 = np.array(y_type_2)
y_diff_type_2 = np.array(y_diff_type_2) y_diff_type_2 = np.array(y_diff_type_2)
all_columns = set(range(len(peaks_neg_tot) - 1))
if ((reading_order_type==1) or if ((reading_order_type==1) or
(reading_order_type==0 and (reading_order_type==0 and
@ -1870,7 +1863,7 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order.append(len(peaks_neg_tot)-2) x_end_by_order.append(len(peaks_neg_tot)-2)
else: else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = set() columns_covered_by_mothers = []
for dj in range(len(x_start_without_mother)): for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers = columns_covered_by_mothers + \
list(range(int(x_start_without_mother[dj]), list(range(int(x_start_without_mother[dj]),
@ -1882,7 +1875,7 @@ def return_boxes_of_images_by_order_of_reading_new(
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) x_starting = np.append(x_starting, columns_not_covered)
x_starting = np.append(x_starting, x_start_without_mother) x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_ending = np.append(x_ending, x_end_without_mother) x_ending = np.append(x_ending, x_end_without_mother)
@ -1913,7 +1906,7 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order.append(x_end_column_sort[ii]-1) x_end_by_order.append(x_end_column_sort[ii]-1)
else: else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = set() columns_covered_by_mothers = []
for dj in range(len(x_start_without_mother)): for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers = columns_covered_by_mothers + \
list(range(int(x_start_without_mother[dj]), list(range(int(x_start_without_mother[dj]),
@ -1925,12 +1918,12 @@ def return_boxes_of_images_by_order_of_reading_new(
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) x_starting = np.append(x_starting, columns_not_covered)
x_starting = np.append(x_starting, x_start_without_mother) x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_ending = np.append(x_ending, x_end_without_mother) x_ending = np.append(x_ending, x_end_without_mother)
columns_covered_by_with_child_no_mothers = set() columns_covered_by_with_child_no_mothers = []
for dj in range(len(x_end_with_child_without_mother)): for dj in range(len(x_end_with_child_without_mother)):
columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \
list(range(int(x_start_with_child_without_mother[dj]), list(range(int(x_start_with_child_without_mother[dj]),
@ -1974,7 +1967,7 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(x_diff_all_between_nm_wc)>0: if len(x_diff_all_between_nm_wc)>0:
biggest=np.argmax(x_diff_all_between_nm_wc) biggest=np.argmax(x_diff_all_between_nm_wc)
columns_covered_by_mothers = set() columns_covered_by_mothers = []
for dj in range(len(x_starting_all_between_nm_wc)): for dj in range(len(x_starting_all_between_nm_wc)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers = columns_covered_by_mothers + \
list(range(int(x_starting_all_between_nm_wc[dj]), list(range(int(x_starting_all_between_nm_wc[dj]),
@ -2099,7 +2092,8 @@ def return_boxes_of_images_by_order_of_reading_new(
x_start_by_order=[] x_start_by_order=[]
x_end_by_order=[] x_end_by_order=[]
if len(x_starting)>0: if len(x_starting)>0:
columns_covered_by_lines_covered_more_than_2col = set() all_columns = np.arange(len(peaks_neg_tot)-1)
columns_covered_by_lines_covered_more_than_2col = []
for dj in range(len(x_starting)): for dj in range(len(x_starting)):
if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns): if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns):
pass pass
@ -2112,21 +2106,22 @@ def return_boxes_of_images_by_order_of_reading_new(
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) x_starting = np.append(x_starting, columns_not_covered)
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
if len(new_main_sep_y) > 0: if len(new_main_sep_y) > 0:
x_starting = np.append(x_starting, 0) x_starting = np.append(x_starting, 0)
x_ending = np.append(x_ending, len(peaks_neg_tot) - 1) x_ending = np.append(x_ending, len(peaks_neg_tot)-1)
else: else:
x_starting = np.append(x_starting, x_starting[0]) x_starting = np.append(x_starting, x_starting[0])
x_ending = np.append(x_ending, x_ending[0]) x_ending = np.append(x_ending, x_ending[0])
else: else:
columns_not_covered = list(all_columns) all_columns = np.arange(len(peaks_neg_tot)-1)
columns_not_covered = list(set(all_columns))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) x_starting = np.append(x_starting, columns_not_covered)
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
ind_args=np.array(range(len(y_type_2))) ind_args=np.array(range(len(y_type_2)))

View file

@ -1,15 +1,7 @@
from typing import Sequence, Union
from numbers import Number
from functools import partial from functools import partial
import itertools
import cv2 import cv2
import numpy as np import numpy as np
from scipy.sparse.csgraph import minimum_spanning_tree from shapely import geometry
from shapely.geometry import Polygon, LineString
from shapely.geometry.polygon import orient
from shapely import set_precision
from shapely.ops import unary_union, nearest_points
from .rotate import rotate_image, rotation_image_new from .rotate import rotate_image, rotation_image_new
@ -45,28 +37,29 @@ def get_text_region_boxes_by_given_contours(contours):
return boxes, contours_new return boxes, contours_new
def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
found_polygons_early = [] found_polygons_early = []
for jv, contour in enumerate(contours): for jv,c in enumerate(contours):
if len(contour) < 3: # A polygon cannot have less than 3 points if len(c) < 3: # A polygon cannot have less than 3 points
continue continue
polygon = contour2polygon(contour, dilate=dilate) polygon = geometry.Polygon([point[0] for point in c])
area = polygon.area area = polygon.area
if (area >= min_area * np.prod(image.shape[:2]) and if (area >= min_area * np.prod(image.shape[:2]) and
area <= max_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and
hierarchy[0][jv][3] == -1): hierarchy[0][jv][3] == -1):
found_polygons_early.append(polygon2contour(polygon)) found_polygons_early.append(np.array([[point]
for point in polygon.exterior.coords], dtype=np.uint))
return found_polygons_early return found_polygons_early
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
found_polygons_early = [] found_polygons_early = []
for jv, contour in enumerate(contours): for jv,c in enumerate(contours):
if len(contour) < 3: # A polygon cannot have less than 3 points if len(c) < 3: # A polygon cannot have less than 3 points
continue continue
polygon = contour2polygon(contour, dilate=dilate) polygon = geometry.Polygon([point[0] for point in c])
# area = cv2.contourArea(contour) # area = cv2.contourArea(c)
area = polygon.area area = polygon.area
##print(np.prod(thresh.shape[:2])) ##print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area # Check that polygon has area greater than minimal area
@ -75,8 +68,9 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.
area <= max_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and
# hierarchy[0][jv][3]==-1 # hierarchy[0][jv][3]==-1
True): True):
# print(contour[0][0][1]) # print(c[0][0][1])
found_polygons_early.append(polygon2contour(polygon)) found_polygons_early.append(np.array([[point]
for point in polygon.exterior.coords], dtype=np.int32))
return found_polygons_early return found_polygons_early
def find_new_features_of_contours(contours_main): def find_new_features_of_contours(contours_main):
@ -141,12 +135,12 @@ def return_parent_contours(contours, hierarchy):
if hierarchy[0][i][3] == -1] if hierarchy[0][i][3] == -1]
return contours_parent return contours_parent
def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1 cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else: else:
cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -253,23 +247,30 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
return cont_int[0], index_r_con, confidence_contour return cont_int[0], index_r_con, confidence_contour
def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix, map=map): def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map):
if not len(cnts): if not len(cnts):
return [], [] return [], []
confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
#cnts = cnts/2
cnts = [(i/6).astype(int) for i in cnts]
results = map(partial(do_back_rotation_and_get_cnt_back,
img=img,
slope_first=slope_first,
confidence_matrix=confidence_matrix,
),
cnts, range(len(cnts)))
contours, indexes, conf_contours = tuple(zip(*results))
return [i*6 for i in contours], list(conf_contours)
confs = [] def return_contours_of_interested_textline(region_pre_p, pixel):
for cnt in cnts:
cnt_mask = np.zeros(confidence_matrix.shape)
cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt], color=1.0)
confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
return cnts, confs
def return_contours_of_interested_textline(region_pre_p, label):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1 cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else: else:
cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -292,12 +293,12 @@ def return_contours_of_image(image):
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return contours, hierarchy return contours, hierarchy
def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003): def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1 cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else: else:
cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -310,12 +311,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_si
return contours_imgs return contours_imgs
def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area): def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
# pixels of images are identified by 5 # pixels of images are identified by 5
if len(region_pre_p.shape) == 3: if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1 cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else: else:
cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8) cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
@ -331,97 +332,3 @@ def return_contours_of_interested_region_by_size(region_pre_p, label, min_area,
return img_ret[:, :, 0] return img_ret[:, :, 0]
def dilate_textline_contours(all_found_textline_polygons):
return [[polygon2contour(contour2polygon(contour, dilate=6))
for contour in region]
for region in all_found_textline_polygons]
def dilate_textregion_contours(all_found_textline_polygons):
return [polygon2contour(contour2polygon(contour, dilate=6))
for contour in all_found_textline_polygons]
def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
polygon = Polygon([point[0] for point in contour])
if dilate:
polygon = polygon.buffer(dilate)
if polygon.geom_type == 'GeometryCollection':
# heterogeneous result: filter zero-area shapes (LineString, Point)
polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0])
if polygon.geom_type == 'MultiPolygon':
# homogeneous result: construct convex hull to connect
polygon = join_polygons(polygon.geoms)
return make_valid(polygon)
def polygon2contour(polygon: Polygon) -> np.ndarray:
polygon = np.array(polygon.exterior.coords[:-1], dtype=int)
return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis]
def make_valid(polygon: Polygon) -> Polygon:
"""Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
def isint(x):
return isinstance(x, int) or int(x) == x
# make sure rounding does not invalidate
if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0:
polygon = Polygon(np.round(polygon.exterior.coords))
points = list(polygon.exterior.coords[:-1])
# try by re-arranging points
for split in range(1, len(points)):
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
break
# simplification may not be possible (at all) due to ordering
# in that case, try another starting point
polygon = Polygon(points[-split:]+points[:-split])
# try by simplification
for tolerance in range(int(polygon.area + 1.5)):
if polygon.is_valid:
break
# simplification may require a larger tolerance
polygon = polygon.simplify(tolerance + 1)
# try by enlarging
for tolerance in range(1, int(polygon.area + 2.5)):
if polygon.is_valid:
break
# enlargement may require a larger tolerance
polygon = polygon.buffer(tolerance)
assert polygon.is_valid, polygon.wkt
return polygon
def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon:
"""construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
# ensure input polygons are simply typed and all oriented equally
polygons = [orient(poly)
for poly in itertools.chain.from_iterable(
[poly.geoms
if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
else [poly]
for poly in polygons])]
npoly = len(polygons)
if npoly == 1:
return polygons[0]
# find min-dist path through all polygons (travelling salesman)
pairs = itertools.combinations(range(npoly), 2)
dists = np.zeros((npoly, npoly), dtype=float)
for i, j in pairs:
dist = polygons[i].distance(polygons[j])
if dist < 1e-5:
dist = 1e-5 # if pair merely touches, we still need to get an edge
dists[i, j] = dist
dists[j, i] = dist
dists = minimum_spanning_tree(dists, overwrite=True)
# add bridge polygons (where necessary)
for prevp, nextp in zip(*dists.nonzero()):
prevp = polygons[prevp]
nextp = polygons[nextp]
nearest = nearest_points(prevp, nextp)
bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1)
polygons.append(bridgep)
jointp = unary_union(polygons)
assert jointp.geom_type == 'Polygon', jointp.wkt
# follow-up calculations will necessarily be integer;
# so anticipate rounding here and then ensure validity
jointp2 = set_precision(jointp, 1.0)
if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
jointp2 = Polygon(np.round(jointp.exterior.coords))
jointp2 = make_valid(jointp2)
assert jointp2.geom_type == 'Polygon', jointp2.wkt
return jointp2

View file

@ -99,8 +99,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
except: except:
point_left=first_nonzero point_left=first_nonzero
if point_left == first_nonzero and point_right == last_nonzero:
return text_regions
if point_right>=mask_marginals.shape[1]: if point_right>=mask_marginals.shape[1]:

View file

@ -20,8 +20,6 @@ from .contour import (
from . import ( from . import (
find_num_col_deskew, find_num_col_deskew,
crop_image_inside_box, crop_image_inside_box,
box2rect,
box2slice,
) )
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -1349,26 +1347,24 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
return contours_rotated_clean return contours_rotated_clean
def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None): def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None):
if logger is None: if logger is None:
logger = getLogger(__package__) logger = getLogger(__package__)
if not np.prod(img_crop.shape):
return img_crop
if num_col == 1: if num_col == 1:
num_patches = int(img_crop.shape[1] / 200.0) num_patches = int(img_path.shape[1] / 200.0)
else: else:
num_patches = int(img_crop.shape[1] / 140.0) num_patches = int(img_path.shape[1] / 140.0)
# num_patches=int(img_crop.shape[1]/200.) # num_patches=int(img_path.shape[1]/200.)
if num_patches == 0: if num_patches == 0:
num_patches = 1 num_patches = 1
img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
# plt.imshow(img_patch_interest) # plt.imshow(img_patch_ineterst)
# plt.show() # plt.show()
length_x = int(img_crop.shape[1] / float(num_patches)) length_x = int(img_path.shape[1] / float(num_patches))
# margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
margin = int(0.04 * length_x) margin = int(0.04 * length_x)
# if margin<=4: # if margin<=4:
@ -1376,7 +1372,7 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
# margin=0 # margin=0
width_mid = length_x - 2 * margin width_mid = length_x - 2 * margin
nxf = img_crop.shape[1] / float(width_mid) nxf = img_path.shape[1] / float(width_mid)
if nxf > int(nxf): if nxf > int(nxf):
nxf = int(nxf) + 1 nxf = int(nxf) + 1
@ -1392,12 +1388,12 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
index_x_d = i * width_mid index_x_d = i * width_mid
index_x_u = index_x_d + length_x index_x_u = index_x_d + length_x
if index_x_u > img_crop.shape[1]: if index_x_u > img_path.shape[1]:
index_x_u = img_crop.shape[1] index_x_u = img_path.shape[1]
index_x_d = img_crop.shape[1] - length_x index_x_d = img_path.shape[1] - length_x
# img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
img_xline = img_patch_interest[:, index_x_d:index_x_u] img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
try: try:
assert img_xline.any() assert img_xline.any()
@ -1413,9 +1409,9 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
img_line_rotated = rotate_image(img_xline, slope_xline) img_line_rotated = rotate_image(img_xline, slope_xline)
img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
img_patch_interest_revised = np.zeros(img_patch_interest.shape) img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape)
for i in range(nxf): for i in range(nxf):
if i == 0: if i == 0:
@ -1425,11 +1421,11 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
index_x_d = i * width_mid index_x_d = i * width_mid
index_x_u = index_x_d + length_x index_x_u = index_x_d + length_x
if index_x_u > img_crop.shape[1]: if index_x_u > img_path.shape[1]:
index_x_u = img_crop.shape[1] index_x_u = img_path.shape[1]
index_x_d = img_crop.shape[1] - length_x index_x_d = img_path.shape[1] - length_x
img_xline = img_patch_interest[:, index_x_d:index_x_u] img_xline = img_patch_ineterst[:, index_x_d:index_x_u]
img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0]
@ -1452,9 +1448,9 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
return img_patch_interest_revised return img_patch_ineterst_revised
def do_image_rotation(angle, img, sigma_des, logger=None): def do_image_rotation(angle, img, sigma_des, logger=None):
if logger is None: if logger is None:
@ -1635,7 +1631,7 @@ def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None):
def do_work_of_slopes_new( def do_work_of_slopes_new(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, slope_deskew, textline_mask_tot_ea, image_page_rotated, slope_deskew,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
): ):
if KERNEL is None: if KERNEL is None:
@ -1645,7 +1641,7 @@ def do_work_of_slopes_new(
logger.debug('enter do_work_of_slopes_new') logger.debug('enter do_work_of_slopes_new')
x, y, w, h = box_text x, y, w, h = box_text
crop_coor = box2rect(box_text) _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = np.zeros(textline_mask_tot_ea.shape)
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
all_text_region_raw = textline_mask_tot_ea * mask_textline all_text_region_raw = textline_mask_tot_ea * mask_textline
@ -1653,7 +1649,7 @@ def do_work_of_slopes_new(
img_int_p = all_text_region_raw[:,:] img_int_p = all_text_region_raw[:,:]
img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1: if img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
slope = 0 slope = 0
slope_for_all = slope_deskew slope_for_all = slope_deskew
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
@ -1693,7 +1689,7 @@ def do_work_of_slopes_new(
def do_work_of_slopes_new_curved( def do_work_of_slopes_new_curved(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew, textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
): ):
if KERNEL is None: if KERNEL is None:
@ -1710,7 +1706,7 @@ def do_work_of_slopes_new_curved(
# plt.imshow(img_int_p) # plt.imshow(img_int_p)
# plt.show() # plt.show()
if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1: if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
slope = 0 slope = 0
slope_for_all = slope_deskew slope_for_all = slope_deskew
else: else:
@ -1736,7 +1732,7 @@ def do_work_of_slopes_new_curved(
slope_for_all = slope_deskew slope_for_all = slope_deskew
slope = slope_for_all slope = slope_for_all
crop_coor = box2rect(box_text) _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
if abs(slope_for_all) < 45: if abs(slope_for_all) < 45:
textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
@ -1782,7 +1778,7 @@ def do_work_of_slopes_new_curved(
def do_work_of_slopes_new_light( def do_work_of_slopes_new_light(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, slope_deskew, textline_light, textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light,
logger=None logger=None
): ):
if logger is None: if logger is None:
@ -1790,7 +1786,7 @@ def do_work_of_slopes_new_light(
logger.debug('enter do_work_of_slopes_new_light') logger.debug('enter do_work_of_slopes_new_light')
x, y, w, h = box_text x, y, w, h = box_text
crop_coor = box2rect(box_text) _, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = np.zeros(textline_mask_tot_ea.shape)
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
all_text_region_raw = textline_mask_tot_ea * mask_textline all_text_region_raw = textline_mask_tot_ea * mask_textline

View file

@ -289,7 +289,7 @@ class EynollahXmlWriter():
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
for mm in range(len(found_polygons_text_region_h)): for mm in range(len(found_polygons_text_region_h)):
textregion = TextRegionType(id=counter.next_region_id, type_='heading', textregion = TextRegionType(id=counter.next_region_id, type_='header',
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
page.add_TextRegion(textregion) page.add_TextRegion(textregion)
@ -335,7 +335,7 @@ class EynollahXmlWriter():
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
for mm in range(len(polygons_lines_to_be_written_in_xml)): for mm in range(len(polygons_lines_to_be_written_in_xml)):
page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
for mm in range(len(found_polygons_tables)): for mm in range(len(found_polygons_tables)):
page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))