mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-09 20:29:55 +02:00
Merge pull request #102 from qurator-spk/right2left_reading_order
Right2left reading order
This commit is contained in:
commit
68923e0a5d
3 changed files with 57 additions and 11 deletions
|
@ -97,6 +97,12 @@ from qurator.eynollah.eynollah import Eynollah
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="if this parameter set to true, this tool will try to detect tables.",
|
help="if this parameter set to true, this tool will try to detect tables.",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--right2left/--left2right",
|
||||||
|
"-r2l/-l2r",
|
||||||
|
is_flag=True,
|
||||||
|
help="if this parameter set to true, this tool will extract right-to-left reading order.",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--input_binary/--input-RGB",
|
"--input_binary/--input-RGB",
|
||||||
"-ib/-irgb",
|
"-ib/-irgb",
|
||||||
|
@ -149,6 +155,7 @@ def main(
|
||||||
textline_light,
|
textline_light,
|
||||||
full_layout,
|
full_layout,
|
||||||
tables,
|
tables,
|
||||||
|
right2left,
|
||||||
input_binary,
|
input_binary,
|
||||||
allow_scaling,
|
allow_scaling,
|
||||||
headers_off,
|
headers_off,
|
||||||
|
@ -184,6 +191,7 @@ def main(
|
||||||
textline_light=textline_light,
|
textline_light=textline_light,
|
||||||
full_layout=full_layout,
|
full_layout=full_layout,
|
||||||
tables=tables,
|
tables=tables,
|
||||||
|
right2left=right2left,
|
||||||
input_binary=input_binary,
|
input_binary=input_binary,
|
||||||
allow_scaling=allow_scaling,
|
allow_scaling=allow_scaling,
|
||||||
headers_off=headers_off,
|
headers_off=headers_off,
|
||||||
|
|
|
@ -158,6 +158,7 @@ class Eynollah:
|
||||||
textline_light=False,
|
textline_light=False,
|
||||||
full_layout=False,
|
full_layout=False,
|
||||||
tables=False,
|
tables=False,
|
||||||
|
right2left=False,
|
||||||
input_binary=False,
|
input_binary=False,
|
||||||
allow_scaling=False,
|
allow_scaling=False,
|
||||||
headers_off=False,
|
headers_off=False,
|
||||||
|
@ -189,6 +190,7 @@ class Eynollah:
|
||||||
self.textline_light = textline_light
|
self.textline_light = textline_light
|
||||||
self.full_layout = full_layout
|
self.full_layout = full_layout
|
||||||
self.tables = tables
|
self.tables = tables
|
||||||
|
self.right2left = right2left
|
||||||
self.input_binary = input_binary
|
self.input_binary = input_binary
|
||||||
self.allow_scaling = allow_scaling
|
self.allow_scaling = allow_scaling
|
||||||
self.headers_off = headers_off
|
self.headers_off = headers_off
|
||||||
|
@ -2069,6 +2071,7 @@ class Eynollah:
|
||||||
arg_text_con = []
|
arg_text_con = []
|
||||||
for ii in range(len(cx_text_only)):
|
for ii in range(len(cx_text_only)):
|
||||||
for jj in range(len(boxes)):
|
for jj in range(len(boxes)):
|
||||||
|
print(cx_text_only[ii],cy_text_only[ii],'markaz')
|
||||||
if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located
|
if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located
|
||||||
arg_text_con.append(jj)
|
arg_text_con.append(jj)
|
||||||
break
|
break
|
||||||
|
@ -2104,6 +2107,9 @@ class Eynollah:
|
||||||
ref_point += len(id_of_texts)
|
ref_point += len(id_of_texts)
|
||||||
|
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
|
print(len(contours_only_text_parent),'contours_only_text_parent')
|
||||||
|
print(len(order_by_con_main),'order_by_con_main')
|
||||||
|
|
||||||
for tj1 in range(len(contours_only_text_parent)):
|
for tj1 in range(len(contours_only_text_parent)):
|
||||||
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
||||||
|
|
||||||
|
@ -2618,7 +2624,7 @@ class Eynollah:
|
||||||
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables)
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
||||||
boxes_d = None
|
boxes_d = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes))
|
self.logger.debug("len(boxes): %s", len(boxes))
|
||||||
|
|
||||||
|
@ -2628,7 +2634,7 @@ class Eynollah:
|
||||||
img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line)
|
img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line)
|
||||||
img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier)
|
img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier)
|
||||||
else:
|
else:
|
||||||
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables)
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
||||||
boxes = None
|
boxes = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes_d))
|
self.logger.debug("len(boxes): %s", len(boxes_d))
|
||||||
|
|
||||||
|
@ -2713,7 +2719,7 @@ class Eynollah:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables)
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
||||||
text_regions_p_tables = np.copy(text_regions_p)
|
text_regions_p_tables = np.copy(text_regions_p)
|
||||||
text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10
|
text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10
|
||||||
pixel_line = 3
|
pixel_line = 3
|
||||||
|
@ -2722,7 +2728,7 @@ class Eynollah:
|
||||||
img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier)
|
img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables)
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
||||||
text_regions_p_tables = np.copy(text_regions_p_1_n)
|
text_regions_p_tables = np.copy(text_regions_p_1_n)
|
||||||
text_regions_p_tables = np.round(text_regions_p_tables)
|
text_regions_p_tables = np.round(text_regions_p_tables)
|
||||||
text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10
|
text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10
|
||||||
|
@ -3065,10 +3071,17 @@ class Eynollah:
|
||||||
|
|
||||||
|
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables)
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
||||||
else:
|
else:
|
||||||
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables)
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
||||||
|
|
||||||
|
#print(boxes_d,'boxes_d')
|
||||||
|
#img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1]))
|
||||||
|
#for box_i in boxes_d:
|
||||||
|
#img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1
|
||||||
|
#plt.imshow(img_once)
|
||||||
|
#plt.show()
|
||||||
|
#print(np.unique(img_once),'img_once')
|
||||||
if self.plotter:
|
if self.plotter:
|
||||||
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
||||||
t_order = time.time()
|
t_order = time.time()
|
||||||
|
|
|
@ -1672,7 +1672,9 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
|
||||||
return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n
|
return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n
|
||||||
|
|
||||||
|
|
||||||
def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, tables):
|
def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, tables, right2left_readingorder):
|
||||||
|
if right2left_readingorder:
|
||||||
|
regions_without_separators = cv2.flip(regions_without_separators,1)
|
||||||
boxes=[]
|
boxes=[]
|
||||||
peaks_neg_tot_tables = []
|
peaks_neg_tot_tables = []
|
||||||
|
|
||||||
|
@ -1763,6 +1765,13 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
cy_hor_diff=matrix_new[:,7][ (matrix_new[:,9]==0) ]
|
cy_hor_diff=matrix_new[:,7][ (matrix_new[:,9]==0) ]
|
||||||
arg_org_hor_some=matrix_new[:,0][ (matrix_new[:,9]==0) ]
|
arg_org_hor_some=matrix_new[:,0][ (matrix_new[:,9]==0) ]
|
||||||
|
|
||||||
|
if right2left_readingorder:
|
||||||
|
x_max_hor_some_new = regions_without_separators.shape[1] - x_min_hor_some
|
||||||
|
x_min_hor_some_new = regions_without_separators.shape[1] - x_max_hor_some
|
||||||
|
|
||||||
|
x_min_hor_some =list(np.copy(x_min_hor_some_new))
|
||||||
|
x_max_hor_some =list(np.copy(x_max_hor_some_new))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1774,7 +1783,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff)
|
reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (reading_order_type==1) or (reading_order_type==0 and (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1)):
|
if (reading_order_type==1) or (reading_order_type==0 and (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1)):
|
||||||
|
|
||||||
|
|
||||||
|
@ -2028,6 +2036,7 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
columns_not_covered_child_no_mother=np.sort(columns_not_covered_child_no_mother)
|
columns_not_covered_child_no_mother=np.sort(columns_not_covered_child_no_mother)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ind_args=np.array(range(len(y_type_2)))
|
ind_args=np.array(range(len(y_type_2)))
|
||||||
|
|
||||||
|
|
||||||
|
@ -2281,7 +2290,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
|
|
||||||
ind_args=np.array(range(len(y_type_2)))
|
ind_args=np.array(range(len(y_type_2)))
|
||||||
#ind_args=np.array(ind_args)
|
#ind_args=np.array(ind_args)
|
||||||
#print(ind_args,'ind_args')
|
|
||||||
for column in range(len(peaks_neg_tot)-1):
|
for column in range(len(peaks_neg_tot)-1):
|
||||||
#print(column,'column')
|
#print(column,'column')
|
||||||
ind_args_in_col=ind_args[x_starting==column]
|
ind_args_in_col=ind_args[x_starting==column]
|
||||||
|
@ -2337,4 +2345,21 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
|
|
||||||
#else:
|
#else:
|
||||||
#boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]])
|
#boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]])
|
||||||
|
|
||||||
|
if right2left_readingorder:
|
||||||
|
peaks_neg_tot_tables_new = []
|
||||||
|
if len(peaks_neg_tot_tables)>=1:
|
||||||
|
for peaks_tab_ind in peaks_neg_tot_tables:
|
||||||
|
peaks_neg_tot_tables_ind = regions_without_separators.shape[1] - np.array(peaks_tab_ind)
|
||||||
|
peaks_neg_tot_tables_ind = list(peaks_neg_tot_tables_ind[::-1])
|
||||||
|
peaks_neg_tot_tables_new.append(peaks_neg_tot_tables_ind)
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(len(boxes)):
|
||||||
|
x_start_new = regions_without_separators.shape[1] - boxes[i][1]
|
||||||
|
x_end_new = regions_without_separators.shape[1] - boxes[i][0]
|
||||||
|
boxes[i][0] = x_start_new
|
||||||
|
boxes[i][1] = x_end_new
|
||||||
|
return boxes, peaks_neg_tot_tables_new
|
||||||
|
else:
|
||||||
return boxes, peaks_neg_tot_tables
|
return boxes, peaks_neg_tot_tables
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue