@ -158,6 +158,7 @@ class Eynollah:
textline_light = False ,
textline_light = False ,
full_layout = False ,
full_layout = False ,
tables = False ,
tables = False ,
right2left = False ,
input_binary = False ,
input_binary = False ,
allow_scaling = False ,
allow_scaling = False ,
headers_off = False ,
headers_off = False ,
@ -189,6 +190,7 @@ class Eynollah:
self . textline_light = textline_light
self . textline_light = textline_light
self . full_layout = full_layout
self . full_layout = full_layout
self . tables = tables
self . tables = tables
self . right2left = right2left
self . input_binary = input_binary
self . input_binary = input_binary
self . allow_scaling = allow_scaling
self . allow_scaling = allow_scaling
self . headers_off = headers_off
self . headers_off = headers_off
@ -2069,6 +2071,7 @@ class Eynollah:
arg_text_con = [ ]
arg_text_con = [ ]
for ii in range ( len ( cx_text_only ) ) :
for ii in range ( len ( cx_text_only ) ) :
for jj in range ( len ( boxes ) ) :
for jj in range ( len ( boxes ) ) :
print ( cx_text_only [ ii ] , cy_text_only [ ii ] , ' markaz ' )
if cx_text_only [ ii ] > = boxes [ jj ] [ 0 ] and cx_text_only [ ii ] < boxes [ jj ] [ 1 ] and cy_text_only [ ii ] > = boxes [ jj ] [ 2 ] and cy_text_only [ ii ] < boxes [ jj ] [ 3 ] : # this is valid if the center of region identify in which box it is located
if cx_text_only [ ii ] > = boxes [ jj ] [ 0 ] and cx_text_only [ ii ] < boxes [ jj ] [ 1 ] and cy_text_only [ ii ] > = boxes [ jj ] [ 2 ] and cy_text_only [ ii ] < boxes [ jj ] [ 3 ] : # this is valid if the center of region identify in which box it is located
arg_text_con . append ( jj )
arg_text_con . append ( jj )
break
break
@ -2104,6 +2107,9 @@ class Eynollah:
ref_point + = len ( id_of_texts )
ref_point + = len ( id_of_texts )
order_of_texts_tot = [ ]
order_of_texts_tot = [ ]
print ( len ( contours_only_text_parent ) , ' contours_only_text_parent ' )
print ( len ( order_by_con_main ) , ' order_by_con_main ' )
for tj1 in range ( len ( contours_only_text_parent ) ) :
for tj1 in range ( len ( contours_only_text_parent ) ) :
order_of_texts_tot . append ( int ( order_by_con_main [ tj1 ] ) )
order_of_texts_tot . append ( int ( order_by_con_main [ tj1 ] ) )
@ -2618,7 +2624,7 @@ class Eynollah:
regions_without_separators_d = cv2 . erode ( regions_without_separators_d [ : , : ] , KERNEL , iterations = 6 )
regions_without_separators_d = cv2 . erode ( regions_without_separators_d [ : , : ] , KERNEL , iterations = 6 )
t1 = time . time ( )
t1 = time . time ( )
if np . abs ( slope_deskew ) < SLOPE_THRESHOLD :
if np . abs ( slope_deskew ) < SLOPE_THRESHOLD :
boxes , peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new , regions_without_separators , matrix_of_lines_ch , num_col_classifier , erosion_hurts , self . tables )
boxes , peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new , regions_without_separators , matrix_of_lines_ch , num_col_classifier , erosion_hurts , self . tables , self . right2left )
boxes_d = None
boxes_d = None
self . logger . debug ( " len(boxes): %s " , len ( boxes ) )
self . logger . debug ( " len(boxes): %s " , len ( boxes ) )
@ -2628,7 +2634,7 @@ class Eynollah:
img_revised_tab2 = self . add_tables_heuristic_to_layout ( text_regions_p_tables , boxes , 0 , splitter_y_new , peaks_neg_tot_tables , text_regions_p_tables , num_col_classifier , 0.000005 , pixel_line )
img_revised_tab2 = self . add_tables_heuristic_to_layout ( text_regions_p_tables , boxes , 0 , splitter_y_new , peaks_neg_tot_tables , text_regions_p_tables , num_col_classifier , 0.000005 , pixel_line )
img_revised_tab2 , contoures_tables = self . check_iou_of_bounding_box_and_contour_for_tables ( img_revised_tab2 , table_prediction , 10 , num_col_classifier )
img_revised_tab2 , contoures_tables = self . check_iou_of_bounding_box_and_contour_for_tables ( img_revised_tab2 , table_prediction , 10 , num_col_classifier )
else :
else :
boxes_d , peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new_d , regions_without_separators_d , matrix_of_lines_ch_d , num_col_classifier , erosion_hurts , self . tables )
boxes_d , peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new_d , regions_without_separators_d , matrix_of_lines_ch_d , num_col_classifier , erosion_hurts , self . tables , self . right2left )
boxes = None
boxes = None
self . logger . debug ( " len(boxes): %s " , len ( boxes_d ) )
self . logger . debug ( " len(boxes): %s " , len ( boxes_d ) )
@ -2713,7 +2719,7 @@ class Eynollah:
pass
pass
if np . abs ( slope_deskew ) < SLOPE_THRESHOLD :
if np . abs ( slope_deskew ) < SLOPE_THRESHOLD :
boxes , peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new , regions_without_separators , matrix_of_lines_ch , num_col_classifier , erosion_hurts , self . tables )
boxes , peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new , regions_without_separators , matrix_of_lines_ch , num_col_classifier , erosion_hurts , self . tables , self . right2left )
text_regions_p_tables = np . copy ( text_regions_p )
text_regions_p_tables = np . copy ( text_regions_p )
text_regions_p_tables [ : , : ] [ ( table_prediction [ : , : ] == 1 ) ] = 10
text_regions_p_tables [ : , : ] [ ( table_prediction [ : , : ] == 1 ) ] = 10
pixel_line = 3
pixel_line = 3
@ -2722,7 +2728,7 @@ class Eynollah:
img_revised_tab2 , contoures_tables = self . check_iou_of_bounding_box_and_contour_for_tables ( img_revised_tab2 , table_prediction , 10 , num_col_classifier )
img_revised_tab2 , contoures_tables = self . check_iou_of_bounding_box_and_contour_for_tables ( img_revised_tab2 , table_prediction , 10 , num_col_classifier )
else :
else :
boxes_d , peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new_d , regions_without_separators_d , matrix_of_lines_ch_d , num_col_classifier , erosion_hurts , self . tables )
boxes_d , peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new_d , regions_without_separators_d , matrix_of_lines_ch_d , num_col_classifier , erosion_hurts , self . tables , self . right2left )
text_regions_p_tables = np . copy ( text_regions_p_1_n )
text_regions_p_tables = np . copy ( text_regions_p_1_n )
text_regions_p_tables = np . round ( text_regions_p_tables )
text_regions_p_tables = np . round ( text_regions_p_tables )
text_regions_p_tables [ : , : ] [ ( text_regions_p_tables [ : , : ] != 3 ) & ( table_prediction_n [ : , : ] == 1 ) ] = 10
text_regions_p_tables [ : , : ] [ ( text_regions_p_tables [ : , : ] != 3 ) & ( table_prediction_n [ : , : ] == 1 ) ] = 10
@ -3065,10 +3071,17 @@ class Eynollah:
if np . abs ( slope_deskew ) < SLOPE_THRESHOLD :
if np . abs ( slope_deskew ) < SLOPE_THRESHOLD :
boxes , peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new , regions_without_separators , matrix_of_lines_ch , num_col_classifier , erosion_hurts , self . tables )
boxes , peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new , regions_without_separators , matrix_of_lines_ch , num_col_classifier , erosion_hurts , self . tables , self . right2left )
else :
else :
boxes_d , peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new_d , regions_without_separators_d , matrix_of_lines_ch_d , num_col_classifier , erosion_hurts , self . tables )
boxes_d , peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new ( splitter_y_new_d , regions_without_separators_d , matrix_of_lines_ch_d , num_col_classifier , erosion_hurts , self . tables , self . right2left )
#print(boxes_d,'boxes_d')
#img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1]))
#for box_i in boxes_d:
#img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1
#plt.imshow(img_once)
#plt.show()
#print(np.unique(img_once),'img_once')
if self . plotter :
if self . plotter :
self . plotter . write_images_into_directory ( polygons_of_images , image_page )
self . plotter . write_images_into_directory ( polygons_of_images , image_page )
t_order = time . time ( )
t_order = time . time ( )