From 08c8c260285ffefd82bd01409b7c1dbac1993aaf Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 30 Sep 2025 03:52:19 +0200 Subject: [PATCH] indent extremely long lines --- src/eynollah/eynollah.py | 750 ++++++++++++++++++--------- src/eynollah/utils/__init__.py | 30 +- src/eynollah/utils/separate_lines.py | 136 +++-- src/eynollah/utils/utils_ocr.py | 25 +- 4 files changed, 652 insertions(+), 289 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 0c9692e..2e31433 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -272,7 +272,6 @@ class Eynollah: else: self.threshold_art_class_textline = 0.1 - self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" @@ -289,8 +288,17 @@ class Eynollah: self.model_page_dir = dir_models + "/model_eynollah_page_extraction_20250915" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" - self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_mb_ro_aug_ens_11"#"/model_step_3200000_mb_ro"#"/model_ens_reading_order_machine_based"#"/model_mb_ro_aug_ens_8"#"/model_ens_reading_order_machine_based" + self.model_region_dir_p_ens_light_only_images_extraction = (dir_models + + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" + ) + self.model_reading_order_dir = (dir_models + + "/model_eynollah_reading_order_20250824" + #"/model_mb_ro_aug_ens_11" + #"/model_step_3200000_mb_ro" + #"/model_ens_reading_order_machine_based" + #"/model_mb_ro_aug_ens_8" + #"/model_ens_reading_order_machine_based" + ) #"/modelens_12sp_elay_0_3_4__3_6_n" #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" @@ -379,11 +387,9 @@ class Eynollah: self.b_s_ocr = 8 else: self.b_s_ocr = int(batch_size_ocr) - with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: characters = json.load(config_file) - AUTOTUNE = tf.data.AUTOTUNE @@ -840,7 +846,9 @@ class Eynollah: self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, - thresholding_for_artificial_class_in_light_version=False, thresholding_for_fl_light_version=False, threshold_art_class_textline=0.1): + thresholding_for_artificial_class_in_light_version=False, + thresholding_for_fl_light_version=False, + threshold_art_class_textline=0.1): self.logger.debug("enter do_prediction") img_height_model = model.layers[-1].output_shape[1] @@ -1254,7 +1262,9 @@ class Eynollah: self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, - thresholding_for_artificial_class_in_light_version=False, threshold_art_class_textline=0.1, threshold_art_class_layout=0.1): + thresholding_for_artificial_class_in_light_version=False, + threshold_art_class_textline=0.1, + threshold_art_class_layout=0.1): self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] @@ -1384,7 +1394,8 @@ class Eynollah: for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): seg_in_art = seg_art[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] @@ -1404,7 +1415,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[0:-margin or None, @@ -1421,7 +1433,8 @@ class Eynollah: label_p_pred[0, margin:, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[margin:, @@ -1439,7 +1452,8 @@ class Eynollah: 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[margin:, @@ -1456,7 +1470,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[0:-margin or None, @@ -1473,7 +1488,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, 0:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin, 1] = \ seg_in_art[margin:-margin or None, @@ -1489,7 +1505,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, margin:, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0, 1] = \ seg_in_art[margin:-margin or None, @@ -1505,7 +1522,8 @@ class Eynollah: label_p_pred[0, 0:-margin or None, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[0:-margin or None, @@ -1521,7 +1539,8 @@ class Eynollah: label_p_pred[0, margin:, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[margin:, @@ -1537,7 +1556,8 @@ class Eynollah: label_p_pred[0, margin:-margin or None, margin:-margin or None, 1] - if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version: + if (thresholding_for_artificial_class_in_light_version or + thresholding_for_some_classes_in_light_version): prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin, 1] = \ seg_in_art[margin:-margin or None, @@ -1686,7 +1706,10 @@ class Eynollah: else: img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3, thresholding_for_fl_light_version=thresholding_for_fl_light_version) + prediction_regions = self.do_prediction(patches, img, model_region, + marginal_of_patch_percent=0.1, + n_batch_inference=3, + thresholding_for_fl_light_version=thresholding_for_fl_light_version) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions @@ -1839,7 +1862,10 @@ class Eynollah: cy_textline_in = [cy_main_tot[ind] for ind in indexes_in] w_h_textlines_in = [w_h_textlines[ind][0] / float(w_h_textlines[ind][1]) for ind in indexes_in] - textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins, cx_textline_in, cy_textline_in, w_h_textlines_in) + textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins, + cx_textline_in, + cy_textline_in, + w_h_textlines_in) all_found_textline_polygons.append(textlines_ins)#[::-1]) slopes.append(slope_deskew) @@ -1847,7 +1873,13 @@ class Eynollah: crop_coor = box2rect(boxes[index]) all_box_coord.append(crop_coor) - return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes + return (all_found_textline_polygons, + boxes, + contours, + contours_par, + all_box_coord, + np.array(range(len(contours_par))), + slopes) def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): @@ -1883,7 +1915,8 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, + mask_texts_only, num_col, scale_par, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") @@ -1914,10 +1947,11 @@ class Eynollah: img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction( - use_patches, img, self.model_textline, - marginal_of_patch_percent=0.15, n_batch_inference=3, - thresholding_for_artificial_class_in_light_version=self.textline_light, threshold_art_class_textline=self.threshold_art_class_textline) + prediction_textline = self.do_prediction(use_patches, img, self.model_textline, + marginal_of_patch_percent=0.15, + n_batch_inference=3, + thresholding_for_artificial_class_in_light_version=self.textline_light, + threshold_art_class_textline=self.threshold_art_class_textline) #if not self.textline_light: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) @@ -2009,12 +2043,14 @@ class Eynollah: boxes_sub_new = [] poly_sub = [] for mv in range(len(boxes_per_process)): - crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + crop_img, _ = crop_image_inside_box(boxes_per_process[mv], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img = crop_img[:, :, 0] crop_img = cv2.erode(crop_img, KERNEL, iterations=2) try: textline_con, hierarchy = return_contours_of_image(crop_img) - textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 @@ -2139,7 +2175,13 @@ class Eynollah: [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") - return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_images_fin, image_page, page_coord, cont_page + return (text_regions_p_true, + erosion_hurts, + polygons_seplines, + polygons_of_images_fin, + image_page, + page_coord, + cont_page) def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") @@ -2197,7 +2239,8 @@ class Eynollah: #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) - self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) + self.logger.debug("detecting textlines on %s with %d colors", + str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -2214,13 +2257,15 @@ class Eynollah: img_resized.shape[1], img_resized.shape[0], num_col_classifier) prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=1, - thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) + thresholding_for_some_classes_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1])) prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept( False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, - thresholding_for_artificial_class_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) + thresholding_for_artificial_class_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) ys = slice(*self.page_coord[0:2]) xs = slice(*self.page_coord[2:4]) prediction_regions_org[ys, xs] = prediction_regions_page @@ -2233,8 +2278,11 @@ class Eynollah: img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=2, - thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout) - ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + thresholding_for_some_classes_in_light_version=True, + threshold_art_class_layout=self.threshold_art_class_layout) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, + ###n_batch_inference=3, + ###thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2297,7 +2345,12 @@ class Eynollah: #plt.show() #print("inside 4 ", time.time()-t_in) self.logger.debug("exit get_regions_light_v") - return text_regions_p_true, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin, confidence_matrix + return (text_regions_p_true, + erosion_hurts, + polygons_seplines, + textline_mask_tot_ea, + img_bin, + confidence_matrix) else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) self.logger.debug("exit get_regions_light_v") @@ -2417,14 +2470,10 @@ class Eynollah: #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) #prediction_regions_org = self.do_prediction(True, img, self.model_region) - #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - #prediction_regions_org = prediction_regions_org[:,:,0] - #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - mask_lines_only = (prediction_regions_org == 3)*1 mask_texts_only = (prediction_regions_org == 1)*1 mask_images_only= (prediction_regions_org == 2)*1 @@ -2843,7 +2892,8 @@ class Eynollah: contours_new.append(contours_sep[ji]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1)) + only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, + pts=[contours_sep[ji]], color=(1,1,1)) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') @@ -2928,9 +2978,11 @@ class Eynollah: contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) if indiv==pixel_table: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, + max_area=1, min_area=0.001) else: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, + max_area=1, min_area=min_area) img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv)) img_comm = img_comm.astype(np.uint8) @@ -2965,8 +3017,14 @@ class Eynollah: y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line) y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab) - cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text) - cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab) + (cx_tab_m_text, cy_tab_m_text, + x_min_tab_m_text, x_max_tab_m_text, + y_min_tab_m_text, y_max_tab_m_text, + _) = find_new_features_of_contours(contours_table_m_text) + (cx_tabl, cy_tabl, + x_min_tabl, x_max_tabl, + y_min_tabl, y_max_tabl, + _) = find_new_features_of_contours(contours_tab) if len(y_min_main_tab )>0: y_down_tabs=[] @@ -2976,9 +3034,15 @@ class Eynollah: y_down_tab=[] y_up_tab=[] for i_l in range(len(y_min_main_line)): - if y_min_main_tab[i_t]>y_min_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]: + if (y_min_main_tab[i_t] > y_min_main_line[i_l] and + y_max_main_tab[i_t] > y_min_main_line[i_l] and + y_min_main_tab[i_t] > y_max_main_line[i_l] and + y_max_main_tab[i_t] > y_min_main_line[i_l]): pass - elif y_min_main_tab[i_t]= SLOPE_THRESHOLD: _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, + table_prediction, slope_deskew) - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + text_regions_p_1_n = resize_image(text_regions_p_1_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, + text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 @@ -3502,11 +3580,18 @@ class Eynollah: else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, + table_prediction, slope_deskew) - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + text_regions_p_1_n = resize_image(text_regions_p_1_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, + text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, + text_regions_p.shape[0], + text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 @@ -3565,7 +3650,8 @@ class Eynollah: pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( - text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, + peaks_neg_tot_tables_d, text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( @@ -3574,8 +3660,9 @@ class Eynollah: img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, + text_regions_p.shape[0], + text_regions_p.shape[1]) if np.abs(slope_deskew) < 0.13: img_revised_tab = np.copy(img_revised_tab2[:,:,0]) @@ -3646,7 +3733,8 @@ class Eynollah: ##else: ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) + ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, + ### regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4 @@ -3709,7 +3797,10 @@ class Eynollah: min_cont_size_to_be_dilated = 10 if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version: - cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent) + (cx_conts, cy_conts, + x_min_conts, x_max_conts, + y_min_conts, y_max_conts, + _) = find_new_features_of_contours(contours_only_text_parent) args_cont_located = np.array(range(len(contours_only_text_parent))) diff_y_conts = np.abs(y_max_conts[:]-y_min_conts) @@ -3724,15 +3815,31 @@ class Eynollah: args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3] args_cont_located_included = args_cont_located[diff_x_ratio<1.3] - contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3] - contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3] + contours_only_text_parent_excluded = [contours_only_text_parent[ind] + #contours_only_text_parent[diff_x_ratio>=1.3] + for ind in range(len(contours_only_text_parent)) + if diff_x_ratio[ind]>=1.3] + contours_only_text_parent_included = [contours_only_text_parent[ind] + #contours_only_text_parent[diff_x_ratio<1.3] + for ind in range(len(contours_only_text_parent)) + if diff_x_ratio[ind]<1.3] - - cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3] - cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3] - - cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3] - cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3] + cx_conts_excluded = [cx_conts[ind] + #cx_conts[diff_x_ratio>=1.3] + for ind in range(len(cx_conts)) + if diff_x_ratio[ind]>=1.3] + cx_conts_included = [cx_conts[ind] + #cx_conts[diff_x_ratio<1.3] + for ind in range(len(cx_conts)) + if diff_x_ratio[ind]<1.3] + cy_conts_excluded = [cy_conts[ind] + #cy_conts[diff_x_ratio>=1.3] + for ind in range(len(cy_conts)) + if diff_x_ratio[ind]>=1.3] + cy_conts_included = [cy_conts[ind] + #cy_conts[diff_x_ratio<1.3] + for ind in range(len(cy_conts)) + if diff_x_ratio[ind]<1.3] #print(diff_x_ratio, 'ratio') text_regions_p = text_regions_p.astype('uint8') @@ -3754,7 +3861,10 @@ class Eynollah: contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated) contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated) - indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included) + indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = \ + self.return_indexes_of_contours_located_inside_another_list_of_contours( + contours_only_dilated, contours_only_text_parent_included, + cx_conts_included, cy_conts_included, args_cont_located_included) if len(args_cont_located_excluded)>0: @@ -3767,7 +3877,7 @@ class Eynollah: flattened_array = np.concatenate([arr.ravel() for arr in array_list]) #print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques') - missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) ) + missing_textregions = list( set(range(len(contours_only_text_parent))) - set(flattened_array) ) #print(missing_textregions, 'missing_textregions') for ind in missing_textregions: @@ -3887,12 +3997,13 @@ class Eynollah: region_with_curr_order = ordered[ind] if region_with_curr_order < len(contours_only_dilated): if np.isscalar(indexes_of_located_cont[region_with_curr_order]): - org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]] + org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]]) else: arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order]) - org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list ( + org_contours_indexes.extend( + np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) else: - org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]] + org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]]) region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))] return org_contours_indexes, region_ids @@ -3915,17 +4026,13 @@ class Eynollah: if len(peaks_real)>70: print(len(peaks_real), 'len(peaks_real)') - peaks_real = peaks_real[(peaks_realwidth1)] arg_sort = np.argsort(sum_smoothed[peaks_real]) - arg_sort4 =arg_sort[::-1][:4] - peaks_sort_4 = peaks_real[arg_sort][::-1][:4] argsort_sorted = np.argsort(peaks_sort_4) - first_4_sorted = peaks_sort_4[argsort_sorted] y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] #print(first_4_sorted,'first_4_sorted') @@ -4109,7 +4216,8 @@ class Eynollah: return x_differential_new - def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"): + def filter_contours_inside_a_bigger_one(self, contours, contours_d_ordered, image, + marginal_cnts=None, type_contour="textregion"): if type_contour=="textregion": areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] area_tot = image.shape[0]*image.shape[1] @@ -4129,7 +4237,10 @@ class Eynollah: results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big] if marginal_cnts: - results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) + results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], + (cx_main[ind_small], + cy_main[ind_small]), + False) for ind in range(len(marginal_cnts))] results_marginal = np.array(results_marginal) @@ -4184,7 +4295,10 @@ class Eynollah: args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], + (cx_main_tot[ij], + cy_main_tot[ij]), + False) for ind in args_with_bigger_area ] results = np.array(results) if np.any(results==1): @@ -4196,14 +4310,16 @@ class Eynollah: textregion_index_to_del = np.array(textregion_index_to_del) textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) for ind_u_a_trs in np.unique(textregion_index_to_del): - textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] + textline_in_textregion_index_to_del_ind = \ + textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] for ittrd in textline_in_textregion_index_to_del_ind: contours[ind_u_a_trs].pop(ittrd) return contours - def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc): + def return_indexes_of_contours_located_inside_another_list_of_contours( + self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc): indexes_of_located_cont = [] center_x_coordinates_of_located = [] center_y_coordinates_of_located = [] @@ -4217,7 +4333,8 @@ class Eynollah: for ind in range(len(cy_main_loc)) ] results = np.array(results) indexes_in = np.where((results == 0) | (results == 1)) - indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1)) + # [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1)) + indexes = indexes_loc[indexes_in] indexes_of_located_cont.append(indexes) center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] ) @@ -4247,7 +4364,10 @@ class Eynollah: ###contours_with_textline = [] ###for ind_tr, con_tr in enumerate(contours): - ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) + ###results = [cv2.pointPolygonTest(con_tr, + ### (cx_main_textline[index_textline_con], + ### cy_main_textline[index_textline_con]), + ### False) ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] ###results = np.array(results) ###if np.any(results==1): @@ -4300,7 +4420,9 @@ class Eynollah: return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort) - def separate_marginals_to_left_and_right_and_order_from_top_to_down(self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width): + def separate_marginals_to_left_and_right_and_order_from_top_to_down( + self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, + slopes_marginals, mid_point_of_page_width): cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours( polygons_of_marginals) @@ -4310,8 +4432,10 @@ class Eynollah: poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] ) poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] ) - all_found_textline_polygons_marginals_left = list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) - all_found_textline_polygons_marginals_right = list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) + all_found_textline_polygons_marginals_left = \ + list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) + all_found_textline_polygons_marginals_right = \ + list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] ) all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] ) @@ -4322,20 +4446,38 @@ class Eynollah: cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width] cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width] - ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), key=lambda x: x[0])] - ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), key=lambda x: x[0])] + ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), + key=lambda x: x[0])] + ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), + key=lambda x: x[0])] - ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, all_found_textline_polygons_marginals_left), key=lambda x: x[0])] - ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, all_found_textline_polygons_marginals_right), key=lambda x: x[0])] + ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, + all_found_textline_polygons_marginals_left), + key=lambda x: x[0])] + ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, + all_found_textline_polygons_marginals_right), + key=lambda x: x[0])] - ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, all_box_coord_marginals_left), key=lambda x: x[0])] - ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, all_box_coord_marginals_right), key=lambda x: x[0])] + ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, + all_box_coord_marginals_left), + key=lambda x: x[0])] + ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, + all_box_coord_marginals_right), + key=lambda x: x[0])] - ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), key=lambda x: x[0])] - ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), key=lambda x: x[0])] + ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), + key=lambda x: x[0])] + ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), + key=lambda x: x[0])] - return ordered_left_marginals, ordered_right_marginals, ordered_left_marginals_textline, ordered_right_marginals_textline, ordered_left_marginals_bbox, ordered_right_marginals_bbox, ordered_left_slopes_marginals, ordered_right_slopes_marginals - + return (ordered_left_marginals, + ordered_right_marginals, + ordered_left_marginals_textline, + ordered_right_marginals_textline, + ordered_left_marginals_bbox, + ordered_right_marginals_bbox, + ordered_left_slopes_marginals, + ordered_right_slopes_marginals) def run(self, overwrite: bool = False, @@ -4420,9 +4562,11 @@ class Eynollah: self.logger.info(f"Processing file: {self.writer.image_filename}") self.logger.info("Step 1/5: Image Enhancement") - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = \ + self.run_enhancement(self.light_version) - self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns") + self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, " + f"{self.dpi} DPI, {num_col_classifier} columns") if is_image_enhanced: self.logger.info("Enhancement applied") @@ -4433,7 +4577,8 @@ class Eynollah: if self.extract_only_images: self.logger.info("Step 2/5: Image Extraction Mode") - text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, image_page, page_coord, cont_page = \ + text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, \ + image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], @@ -4465,20 +4610,20 @@ class Eynollah: M_main_tot = [cv2.moments(all_found_textline_polygons[j]) for j in range(len(all_found_textline_polygons))] - w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:] for j in range(len(all_found_textline_polygons))] + w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:] + for j in range(len(all_found_textline_polygons))] w_h_textlines = [w_h_textlines[j][0] / float(w_h_textlines[j][1]) for j in range(len(w_h_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted(all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines)#all_found_textline_polygons[::-1] - - all_found_textline_polygons=[ all_found_textline_polygons ] - + all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted( + #all_found_textline_polygons[::-1] + all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines) + all_found_textline_polygons = [ all_found_textline_polygons ] all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline") - order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] @@ -4498,15 +4643,23 @@ class Eynollah: if self.ocr and not self.tr: gc.collect() - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True) + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, self.prediction_model, + self.b_s_ocr, self.num_to_char, textline_light=True) else: ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( cont_page, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) + all_found_textline_polygons, page_coord, polygons_of_images, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, contours_tables, + ocr_all_textlines=ocr_all_textlines, + conf_contours_textregion=conf_contours_textregions, + skip_layout_reading_order=self.skip_layout_and_reading_order) self.logger.info("Basic processing complete") return pcgts @@ -4516,7 +4669,8 @@ class Eynollah: if self.light_version: self.logger.info("Using light version processing") - text_regions_p_1 ,erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ + text_regions_p_1 ,erosion_hurts, polygons_seplines, textline_mask_tot_ea, \ + img_bin_light, confidence_matrix = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4528,7 +4682,6 @@ class Eynollah: img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew) else: slope_deskew = self.run_deskew(textline_mask_tot_ea) @@ -4537,7 +4690,8 @@ class Eynollah: num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, - num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + num_col_classifier, num_column_is_classified, + erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) @@ -4552,7 +4706,8 @@ class Eynollah: t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, + erosion_hurts) self.logger.info(f"Graphics detection took {time.time() - t1:.1f}s") #self.logger.info('cont_page %s', cont_page) #plt.imshow(table_prediction) @@ -4617,13 +4772,15 @@ class Eynollah: ## birdan sora chock chakir t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + polygons_of_images, img_revised_tab, text_regions_p_1_n, \ + textline_mask_tot_d, regions_without_separators_d, \ boxes, boxes_d, polygons_of_marginals, contours_tables = \ self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + polygons_of_images, img_revised_tab, text_regions_p_1_n, \ + textline_mask_tot_d, regions_without_separators_d, \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, @@ -4690,8 +4847,10 @@ class Eynollah: areas_cnt_text_d = self.return_list_of_contours_with_desired_order( areas_cnt_text_d, index_con_parents_d) - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = \ + find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = \ + find_new_features_of_contours(contours_only_text_parent_d) try: if len(cx_bigest_d) >= 5: cx_bigest_d_last5 = cx_bigest_d[-5:] @@ -4751,13 +4910,19 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout( [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], - polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [], + polygons_of_marginals, polygons_of_marginals, + empty_marginals, empty_marginals, + empty_marginals, empty_marginals, + [], [], [], [], cont_page, polygons_seplines) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, - polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], + polygons_of_marginals, polygons_of_marginals, + empty_marginals, empty_marginals, + empty_marginals, empty_marginals, + [], [], [], cont_page, polygons_seplines, contours_tables) return pcgts @@ -4767,7 +4932,8 @@ class Eynollah: if self.light_version: contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( - contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) + contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, + marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( contours_only_text_parent, self.image, confidence_matrix) @@ -4793,19 +4959,26 @@ class Eynollah: polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, boxes_marginals, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, \ + # contours_only_text_parent, index_by_text_par_con = \ # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, # boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, \ + # polygons_of_marginals, polygons_of_marginals, _ = \ # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, - # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) + # boxes_marginals, polygons_of_marginals, polygons_of_marginals, + # np.array(range(len(polygons_of_marginals)))) + all_found_textline_polygons = dilate_textline_contours( + all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = dilate_textline_contours(all_found_textline_polygons_marginals) - contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ + all_found_textline_polygons_marginals = dilate_textline_contours( + all_found_textline_polygons_marginals) + contours_only_text_parent, txt_con_org, conf_contours_textregions, \ + all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) + contours_only_text_parent, txt_con_org, all_found_textline_polygons, + contours_only_text_parent_d_ordered, conf_contours_textregions) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ @@ -4847,7 +5020,13 @@ class Eynollah: all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) mid_point_of_page_width = text_regions_p.shape[1] / 2. - polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes_marginals_left, slopes_marginals_right = self.separate_marginals_to_left_and_right_and_order_from_top_to_down(polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width) + (polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes_marginals_left, slopes_marginals_right) = \ + self.separate_marginals_to_left_and_right_and_order_from_top_to_down( + polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, + slopes_marginals, mid_point_of_page_width) #print(len(polygons_of_marginals), len(ordered_left_marginals), len(ordered_right_marginals), 'marginals ordred') if self.full_layout: @@ -4871,40 +5050,41 @@ class Eynollah: all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \ conf_contours_textregions, conf_contours_textregions_h = fun( - text_regions_p, regions_fully, contours_only_text_parent, - all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) + text_regions_p, regions_fully, contours_only_text_parent, + all_box_coord, all_found_textline_polygons, + slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + label_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, label_img) ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, ##kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) if not self.reading_order_machine_based: - pixel_seps = 6 + label_seps = 6 if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) + num_col_classifier, self.tables, label_seps, contours_only_text_parent_h) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered) + num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps) + num_col_classifier, self.tables, label_seps) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps) + num_col_classifier, self.tables, label_seps) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -4949,7 +5129,8 @@ class Eynollah: contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, + boxes_d, textline_mask_tot_d) self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") if self.ocr and not self.tr: @@ -4962,27 +5143,37 @@ class Eynollah: gc.collect() if len(all_found_textline_polygons)>0: - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: - ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_left, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_marginals_left = None if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: - ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_right, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_marginals_right = None if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: - ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_h, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_h = None if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: - ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines( + image_page, polygons_of_drop_capitals, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_drop = None else: @@ -4997,9 +5188,15 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, - polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) + polygons_of_images, contours_tables, polygons_of_drop_capitals, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, + ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, + ocr_all_textlines_drop, + conf_contours_textregions, conf_contours_textregions_h) return pcgts @@ -5034,18 +5231,14 @@ class Eynollah: if self.ocr and self.tr: self.logger.info("Step 4.5/5: OCR Processing") - if torch.cuda.is_available(): self.logger.info("Using GPU acceleration") else: self.logger.info("Using CPU processing") - if self.light_version: self.logger.info("Using light version OCR") - if self.textline_light: self.logger.info("Using light text line detection for OCR") - self.logger.info("Processing text lines...") device = cuda.get_current_device() @@ -5090,7 +5283,8 @@ class Eynollah: img_croped = img_poly_on_img[y:y+h, x:x+w, :] #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + text_ocr = self.return_ocr_of_textline_without_common_section( + img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) ocr_textline_in_textregion.append(text_ocr) ind_tot = ind_tot +1 ocr_all_textlines.append(ocr_textline_in_textregion) @@ -5098,13 +5292,19 @@ class Eynollah: elif self.ocr and not self.tr: gc.collect() if len(all_found_textline_polygons)>0: - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: - ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_left, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: - ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( + image_page, all_found_textline_polygons_marginals_right, self.prediction_model, + self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None @@ -5117,9 +5317,14 @@ class Eynollah: pcgts = self.writer.build_pagexml_no_full_layout( txt_con_org, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) + all_found_textline_polygons, all_box_coord, polygons_of_images, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, contours_tables, ocr_all_textlines, + ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, + conf_contours_textregions) return pcgts @@ -5138,7 +5343,6 @@ class Eynollah_ocr: min_conf_value_of_textline_text : Optional[float]=None, logger=None, ): - self.dir_models = dir_models self.model_name = model_name self.tr_ocr = tr_ocr self.export_textline_images_and_text = export_textline_images_and_text @@ -5261,7 +5465,9 @@ class Eynollah_ocr: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + textline_coords = np.array( [ [int(x.split(',')[0]), + int(x.split(',')[1]) ] + for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) if dir_out_image_text: @@ -5277,9 +5483,12 @@ class Eynollah_ocr: img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 - self.logger.debug("processing %d lines for '%s'", len(cropped_lines), nn.attrib['id']) + self.logger.debug("processing %d lines for '%s'", + len(cropped_lines), nn.attrib['id']) if h2w_ratio > 0.1: - cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) ) + cropped_lines.append(resize_image(img_crop, + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width) ) cropped_lines_meging_indexing.append(0) indexer_b_s+=1 if indexer_b_s==self.b_s: @@ -5288,8 +5497,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5297,7 +5508,9 @@ class Eynollah_ocr: splited_images, _ = return_textlines_split_if_needed(img_crop, None) #print(splited_images) if splited_images: - cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) + cropped_lines.append(resize_image(splited_images[0], + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(1) indexer_b_s+=1 @@ -5307,13 +5520,17 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged - cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) + cropped_lines.append(resize_image(splited_images[1], + tr_ocr_input_height_and_width, + tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(-1) indexer_b_s+=1 @@ -5323,8 +5540,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5339,8 +5558,10 @@ class Eynollah_ocr: indexer_b_s = 0 pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + generated_ids_merged = self.model_ocr.generate( + pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode( + generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged @@ -5371,15 +5592,22 @@ class Eynollah_ocr: ####n_end = (i+1)*self.b_s ####imgs = cropped_lines[n_start:n_end] ####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values - ####generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) - ####generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + ####generated_ids_merged = self.model_ocr.generate( + #### pixel_values_merged.to(self.device)) + ####generated_text_merged = self.processor.batch_decode( + #### generated_ids_merged, skip_special_tokens=True) ####extracted_texts = extracted_texts + generated_text_merged del cropped_lines gc.collect() - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] #print(extracted_texts_merged, len(extracted_texts_merged)) @@ -5401,7 +5629,8 @@ class Eynollah_ocr: w_bb = bb_ind[2] h_bb = bb_ind[3] - font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) ) + font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], + font.path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) @@ -5419,25 +5648,27 @@ class Eynollah_ocr: #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') #######text_by_textregion = [] #######for ind in unique_cropped_lines_region_indexer: - #######extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] - + #######ind = np.array(cropped_lines_region_indexer)==ind + #######extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] #######text_by_textregion.append(" ".join(extracted_texts_merged_un)) text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + ind = np.array(cropped_lines_region_indexer) == ind + extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] if len(extracted_texts_merged_un)>1: text_by_textregion_ind = "" next_glue = "" for indt in range(len(extracted_texts_merged_un)): - if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'): - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1] + if (extracted_texts_merged_un[indt].endswith('⸗') or + extracted_texts_merged_un[indt].endswith('-') or + extracted_texts_merged_un[indt].endswith('¬')): + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1] next_glue = "" else: - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt] + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt] next_glue = " " text_by_textregion.append(text_by_textregion_ind) - else: text_by_textregion.append(" ".join(extracted_texts_merged_un)) @@ -5495,7 +5726,9 @@ class Eynollah_ocr: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - ###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order] + ###sample_order = [(id_to_order[tid], text) + ### for tid, text in zip(id_textregions, textregions_by_existing_ids) + ### if tid in id_to_order] ##ordered_texts_sample = [text for _, text in sorted(sample_order)] ##tot_page_text = ' '.join(ordered_texts_sample) @@ -5569,7 +5802,9 @@ class Eynollah_ocr: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + textline_coords = np.array( [ [int(x.split(',')[0]), + int(x.split(',')[1]) ] + for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) @@ -5601,17 +5836,19 @@ class Eynollah_ocr: img_crop[mask_poly==0] = 255 else: - #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(), mask_poly[:,:,0].sum() /float(w*h) , 'didi') + # print(file_name, angle_degrees, w*h, + # mask_poly[:,:,0].sum(), + # mask_poly[:,:,0].sum() /float(w*h) , + # 'didi') if angle_degrees > 3: better_des_slope = get_orientation_moments(textline_coords) - img_crop = rotate_image_with_padding(img_crop, better_des_slope ) - + img_crop = rotate_image_with_padding(img_crop, better_des_slope) if dir_in_bin is not None: - img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope ) + img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope) - mask_poly = rotate_image_with_padding(mask_poly, better_des_slope ) + mask_poly = rotate_image_with_padding(mask_poly, better_des_slope) mask_poly = mask_poly.astype('uint8') #new bounding box @@ -5622,7 +5859,6 @@ class Eynollah_ocr: if not self.do_not_mask_with_textline_contour: img_crop[mask_poly==0] = 255 - if dir_in_bin is not None: img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :] if not self.do_not_mask_with_textline_contour: @@ -5630,11 +5866,14 @@ class Eynollah_ocr: if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90: if dir_in_bin is not None: - img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + img_crop, img_crop_bin = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly, img_crop_bin) else: - img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + img_crop, _ = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly) - else: better_des_slope = 0 if not self.do_not_mask_with_textline_contour: @@ -5647,13 +5886,18 @@ class Eynollah_ocr: else: if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90: if dir_in_bin is not None: - img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + img_crop, img_crop_bin = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly, img_crop_bin) else: - img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + img_crop, _ = \ + break_curved_line_into_small_pieces_and_then_merge( + img_crop, mask_poly) if not self.export_textline_images_and_text: if w_scaled < 750:#1.5*image_width: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop, image_height, image_width) cropped_lines.append(img_fin) if abs(better_des_slope) > 45: cropped_lines_ver_index.append(1) @@ -5662,13 +5906,15 @@ class Eynollah_ocr: cropped_lines_meging_indexing.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) else: splited_images, splited_images_bin = return_textlines_split_if_needed( img_crop, img_crop_bin if dir_in_bin is not None else None) if splited_images: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images[0], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) @@ -5677,7 +5923,8 @@ class Eynollah_ocr: else: cropped_lines_ver_index.append(0) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images[1], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) @@ -5688,13 +5935,16 @@ class Eynollah_ocr: cropped_lines_ver_index.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images_bin[0], image_height, image_width) cropped_lines_bin.append(img_fin) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + splited_images_bin[1], image_height, image_width) cropped_lines_bin.append(img_fin) else: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) @@ -5704,7 +5954,8 @@ class Eynollah_ocr: cropped_lines_ver_index.append(0) if dir_in_bin is not None: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model( + img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) if self.export_textline_images_and_text: @@ -5716,7 +5967,8 @@ class Eynollah_ocr: if cheild_text.tag.endswith("Unicode"): textline_text = cheild_text.text if textline_text: - base_name = os.path.join(dir_out, file_name + '_line_' + str(indexer_textlines)) + base_name = os.path.join( + dir_out, file_name + '_line_' + str(indexer_textlines)) if self.pref_of_dataset: base_name += '_' + self.pref_of_dataset if not self.do_not_mask_with_textline_contour: @@ -5806,25 +6058,31 @@ class Eynollah_ocr: preds_max_fliped = np.max(preds_flipped, axis=2 ) preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character - masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped = \ + np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) masked_means_flipped[np.isnan(masked_means_flipped)] = 0 preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) masked_means[np.isnan(masked_means)] = 0 masked_means_ver = masked_means[indices_ver] #print(masked_means_ver, 'pred_max_not_unk') - indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + indices_where_flipped_conf_value_is_higher = \ + np.where(masked_means_flipped > masked_means_ver)[0] #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') if len(indices_where_flipped_conf_value_is_higher)>0: indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] - preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] + preds[indices_to_be_replaced,:,:] = \ + preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] if dir_in_bin is not None: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) @@ -5833,35 +6091,42 @@ class Eynollah_ocr: preds_max_fliped = np.max(preds_flipped, axis=2 ) preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character - masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped = \ + np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) masked_means_flipped[np.isnan(masked_means_flipped)] = 0 preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) masked_means[np.isnan(masked_means)] = 0 masked_means_ver = masked_means[indices_ver] #print(masked_means_ver, 'pred_max_not_unk') - indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + indices_where_flipped_conf_value_is_higher = \ + np.where(masked_means_flipped > masked_means_ver)[0] #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') if len(indices_where_flipped_conf_value_is_higher)>0: indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] - preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] + preds_bin[indices_to_be_replaced,:,:] = \ + preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] preds = (preds + preds_bin) / 2. - pred_texts = decode_batch_predictions(preds, self.num_to_char) preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character - masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means = \ + np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \ + np.sum(pred_max_not_unk_mask_bool, axis=1) for ib in range(imgs.shape[0]): pred_texts_ib = pred_texts[ib].replace("[UNK]", "") @@ -5876,31 +6141,40 @@ class Eynollah_ocr: del cropped_lines_bin gc.collect() - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] - extracted_conf_value_merged = [extracted_conf_value[ind] if cropped_lines_meging_indexing[ind]==0 else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_conf_value_merged = [extracted_conf_value[ind] + if cropped_lines_meging_indexing[ind]==0 + else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] - extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] for ind_cfm in range(len(extracted_texts_merged)) if extracted_texts_merged[ind_cfm] is not None] + extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] + for ind_cfm in range(len(extracted_texts_merged)) + if extracted_texts_merged[ind_cfm] is not None] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - if dir_out_image_text: - #font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! font = importlib_resources.files(__package__) / "Charis-Regular.ttf" with importlib_resources.as_file(font) as font: font = ImageFont.truetype(font=font, size=40) for indexer_text, bb_ind in enumerate(total_bb_coordinates): - - x_bb = bb_ind[0] y_bb = bb_ind[1] w_bb = bb_ind[2] h_bb = bb_ind[3] - font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) ) + font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], + font.path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) @@ -5917,24 +6191,25 @@ class Eynollah_ocr: text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + ind = np.array(cropped_lines_region_indexer)==ind + extracted_texts_merged_un = np.array(extracted_texts_merged)[ind] if len(extracted_texts_merged_un)>1: text_by_textregion_ind = "" next_glue = "" for indt in range(len(extracted_texts_merged_un)): - if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-') or extracted_texts_merged_un[indt].endswith('¬'): - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1] + if (extracted_texts_merged_un[indt].endswith('⸗') or + extracted_texts_merged_un[indt].endswith('-') or + extracted_texts_merged_un[indt].endswith('¬')): + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1] next_glue = "" else: - text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt] + text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt] next_glue = " " text_by_textregion.append(text_by_textregion_ind) - else: text_by_textregion.append(" ".join(extracted_texts_merged_un)) #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion') - - + ###index_tot_regions = [] ###tot_region_ref = [] @@ -5983,7 +6258,8 @@ class Eynollah_ocr: if childtest3.tag.endswith("TextEquiv"): for child_uc in childtest3: if child_uc.tag.endswith("Unicode"): - childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}") + childtest3.set('conf', + f"{extracted_conf_value_merged[indexer]:.2f}") child_uc.text = extracted_texts_merged[indexer] indexer = indexer + 1 @@ -5999,7 +6275,9 @@ class Eynollah_ocr: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - ###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order] + ###sample_order = [(id_to_order[tid], text) + ### for tid, text in zip(id_textregions, textregions_by_existing_ids) + ### if tid in id_to_order] ##ordered_texts_sample = [text for _, text in sorted(sample_order)] ##tot_page_text = ' '.join(ordered_texts_sample) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index f8926cf..52bf3ef 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1012,8 +1012,13 @@ def check_any_text_region_in_model_one_is_main_or_header_light( (regions_model_full[:,:,0]==2)).sum() pixels_main = all_pixels - pixels_header - if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + if (( pixels_header / float(pixels_main) >= 0.6 and + length_con[ii] / float(height_con[ii]) >= 1.3 and + length_con[ii] / float(height_con[ii]) <= 3 ) or + ( pixels_header / float(pixels_main) >= 0.3 and + length_con[ii] / float(height_con[ii]) >=3 )): + + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2 contours_only_text_parent_head.append(contours_only_text_parent[ii]) conf_contours_head.append(None) # why not conf_contours[ii], too? if contours_only_text_parent_d_ordered is not None: @@ -1021,8 +1026,9 @@ def check_any_text_region_in_model_one_is_main_or_header_light( all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) + else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1 contours_only_text_parent_main.append(contours_only_text_parent[ii]) conf_contours_main.append(conf_contours[ii]) if contours_only_text_parent_d_ordered is not None: @@ -1883,7 +1889,8 @@ def return_boxes_of_images_by_order_of_reading_new( range(x_start_without_mother[dj], x_end_without_mother[dj])) columns_not_covered = list(all_columns - columns_covered_by_mothers) - y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother), + y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + + len(x_start_without_mother), dtype=int) * splitter_y_new[i]) ##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) @@ -1938,7 +1945,8 @@ def return_boxes_of_images_by_order_of_reading_new( columns_covered_by_with_child_no_mothers.update( range(x_start_with_child_without_mother[dj], x_end_with_child_without_mother[dj])) - columns_not_covered_child_no_mother = list(all_columns - columns_covered_by_with_child_no_mothers) + columns_not_covered_child_no_mother = list( + all_columns - columns_covered_by_with_child_no_mothers) #indexes_to_be_spanned=[] for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) @@ -1948,7 +1956,8 @@ def return_boxes_of_images_by_order_of_reading_new( x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int) for i_s_nc in columns_not_covered_child_no_mother: if i_s_nc in x_start_with_child_without_mother: - x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] + x_end_biggest_column = \ + x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] args_all_biggest_lines = ind_args[(x_starting==i_s_nc) & (x_ending==x_end_biggest_column)] y_column_nc = y_type_2[args_all_biggest_lines] @@ -1996,9 +2005,12 @@ def return_boxes_of_images_by_order_of_reading_new( np.array(list(set(list(range(len(y_all_between_nm_wc)))) - set(list(index_lines_so_close_to_top_separator)))) if len(indexes_remained_after_deleting_closed_lines) > 0: - y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + y_all_between_nm_wc = \ + y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_starting_all_between_nm_wc = \ + x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_ending_all_between_nm_wc = \ + x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 7a8926d..d41dda1 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -67,7 +67,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -78,11 +79,14 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : - arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -179,7 +183,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -239,7 +244,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): try: neg_peaks_max=np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -316,23 +322,36 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down =y_max_cont-1 + ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down =y_max_cont-1 + ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( - 1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + 1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) @@ -341,7 +360,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_narrow = img_patch.shape[0] - 2 - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -468,7 +489,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -543,7 +565,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -613,7 +636,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -689,30 +713,50 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: - point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = x_max_cont - 1 + ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) + #point_up + # np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = x_max_cont - 1 + ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) + #point_up + # np.max(y_cont) + #peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) else: - point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) + ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) + ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) + ###-int(dis_to_next_down*1./2) if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] @@ -801,7 +845,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -866,7 +911,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down) distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -950,7 +996,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -963,8 +1010,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): if len(arg_diff_cluster) > 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1014,7 +1064,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): try: neg_peaks_max = np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -1290,7 +1341,9 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i return None, cont_final -def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): +def textline_contours_postprocessing(textline_mask, slope, + contour_text_interest, box_ind, + add_boxes_coor_into_textlines=False): textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = textline_mask.astype(np.uint8) kernel = np.ones((5, 5), np.uint8) @@ -1485,7 +1538,8 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) - #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] + #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0], + # int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: @@ -1689,14 +1743,18 @@ def do_work_of_slopes_new_curved( mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) pixel_img = 1 - mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) + mask_biggest2 = resize_image(mask_biggest2, + int(mask_biggest2.shape[0] * scale_par), + int(mask_biggest2.shape[1] * scale_par)) cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) try: textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) except Exception as why: logger.error(why) else: - textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) + textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, + slope_for_all, contour_par, + box_text, True) return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 5f19387..602ad6e 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -370,7 +370,11 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind return textline_contour -def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False): +def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, + prediction_model, + b_s_ocr, num_to_char, + textline_light=False, + curved_line=False): max_len = 512 padding_token = 299 image_width = 512#max_len * 4 @@ -426,17 +430,23 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) if splited_images: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) else: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) @@ -469,7 +479,12 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr pred_texts_ib = pred_texts[ib].replace("[UNK]", "") extracted_texts.append(pred_texts_ib) - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [extracted_texts[ind] + if cropped_lines_meging_indexing[ind]==0 + else extracted_texts[ind]+" "+extracted_texts[ind+1] + if cropped_lines_meging_indexing[ind]==1 + else None + for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)