diff --git a/sbb_newspapers_org_image/eynollah.py b/sbb_newspapers_org_image/eynollah.py index af4212d..41538b4 100644 --- a/sbb_newspapers_org_image/eynollah.py +++ b/sbb_newspapers_org_image/eynollah.py @@ -28,8 +28,6 @@ tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") from .utils.contour import ( - contours_in_same_horizon, - filter_contours_area_of_image_tables, filter_contours_area_of_image, find_contours_mean_y_diff, find_new_features_of_contoures, @@ -67,15 +65,9 @@ from .utils.resize import resize_image from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, - find_features_of_lines, find_num_col, - find_num_col_by_vertical_lines, - find_num_col_deskew, - find_num_col_only_image, - isNaN, otsu_copy, otsu_copy_binary, - return_hor_spliter_by_index_for_without_verticals, delete_seperator_around, return_regions_without_seperators, put_drop_out_from_only_drop_model, diff --git a/sbb_newspapers_org_image/unused.py b/sbb_newspapers_org_image/unused.py index f886e04..1981611 100644 --- a/sbb_newspapers_org_image/unused.py +++ b/sbb_newspapers_org_image/unused.py @@ -3087,4 +3087,82 @@ def filter_contours_area_of_image_interiors(image, contours, hirarchy, max_area, jv += 1 return found_polygons_early +def return_hor_spliter_by_index_for_without_verticals(peaks_neg_fin_t, x_min_hor_some, x_max_hor_some): + # print(peaks_neg_fin_t,x_min_hor_some,x_max_hor_some) + arg_min_hor_sort = np.argsort(x_min_hor_some) + x_min_hor_some_sort = np.sort(x_min_hor_some) + x_max_hor_some_sort = x_max_hor_some[arg_min_hor_sort] + arg_minmax = np.array(range(len(peaks_neg_fin_t))) + indexer_lines = [] + indexes_to_delete = [] + indexer_lines_deletions_len = [] + indexr_uniq_ind = [] + for i in range(len(x_min_hor_some_sort)): + min_h = peaks_neg_fin_t - x_min_hor_some_sort[i] + + max_h = peaks_neg_fin_t - x_max_hor_some_sort[i] + + min_h[0] = min_h[0] # +20 + max_h[len(max_h) - 1] = max_h[len(max_h) - 1] - 20 + + min_h_neg = arg_minmax[(min_h < 0)] + min_h_neg_n = min_h[min_h < 0] + + try: + min_h_neg = [min_h_neg[np.argmax(min_h_neg_n)]] + except: + min_h_neg = [] + + max_h_neg = arg_minmax[(max_h > 0)] + max_h_neg_n = max_h[max_h > 0] + + if len(max_h_neg_n) > 0: + max_h_neg = [max_h_neg[np.argmin(max_h_neg_n)]] + else: + max_h_neg = [] + + if len(min_h_neg) > 0 and len(max_h_neg) > 0: + deletions = list(range(min_h_neg[0] + 1, max_h_neg[0])) + unique_delets_int = [] + # print(deletions,len(deletions),'delii') + if len(deletions) > 0: + + for j in range(len(deletions)): + indexes_to_delete.append(deletions[j]) + # print(deletions,indexes_to_delete,'badiii') + unique_delets = np.unique(indexes_to_delete) + # print(min_h_neg[0],unique_delets) + unique_delets_int = unique_delets[unique_delets < min_h_neg[0]] + + indexer_lines_deletions_len.append(len(deletions)) + indexr_uniq_ind.append([deletions]) + + else: + indexer_lines_deletions_len.append(0) + indexr_uniq_ind.append(-999) + + index_line_true = min_h_neg[0] - len(unique_delets_int) + # print(index_line_true) + if index_line_true > 0 and min_h_neg[0] >= 2: + index_line_true = index_line_true + else: + index_line_true = min_h_neg[0] + + indexer_lines.append(index_line_true) + + if len(unique_delets_int) > 0: + for dd in range(len(unique_delets_int)): + indexes_to_delete.append(unique_delets_int[dd]) + else: + indexer_lines.append(-999) + indexer_lines_deletions_len.append(-999) + indexr_uniq_ind.append(-999) + + peaks_true = [] + for m in range(len(peaks_neg_fin_t)): + if m in indexes_to_delete: + pass + else: + peaks_true.append(peaks_neg_fin_t[m]) + return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind diff --git a/sbb_newspapers_org_image/utils/__init__.py b/sbb_newspapers_org_image/utils/__init__.py index 781864d..daf6edd 100644 --- a/sbb_newspapers_org_image/utils/__init__.py +++ b/sbb_newspapers_org_image/utils/__init__.py @@ -376,85 +376,6 @@ def find_num_col_deskew(regions_without_seperators, sigma_, multiplier=3.8): z = gaussian_filter1d(regions_without_seperators_0, sigma_) return np.std(z) -def return_hor_spliter_by_index_for_without_verticals(peaks_neg_fin_t, x_min_hor_some, x_max_hor_some): - # print(peaks_neg_fin_t,x_min_hor_some,x_max_hor_some) - arg_min_hor_sort = np.argsort(x_min_hor_some) - x_min_hor_some_sort = np.sort(x_min_hor_some) - x_max_hor_some_sort = x_max_hor_some[arg_min_hor_sort] - - arg_minmax = np.array(range(len(peaks_neg_fin_t))) - indexer_lines = [] - indexes_to_delete = [] - indexer_lines_deletions_len = [] - indexr_uniq_ind = [] - for i in range(len(x_min_hor_some_sort)): - min_h = peaks_neg_fin_t - x_min_hor_some_sort[i] - - max_h = peaks_neg_fin_t - x_max_hor_some_sort[i] - - min_h[0] = min_h[0] # +20 - max_h[len(max_h) - 1] = max_h[len(max_h) - 1] - 20 - - min_h_neg = arg_minmax[(min_h < 0)] - min_h_neg_n = min_h[min_h < 0] - - try: - min_h_neg = [min_h_neg[np.argmax(min_h_neg_n)]] - except: - min_h_neg = [] - - max_h_neg = arg_minmax[(max_h > 0)] - max_h_neg_n = max_h[max_h > 0] - - if len(max_h_neg_n) > 0: - max_h_neg = [max_h_neg[np.argmin(max_h_neg_n)]] - else: - max_h_neg = [] - - if len(min_h_neg) > 0 and len(max_h_neg) > 0: - deletions = list(range(min_h_neg[0] + 1, max_h_neg[0])) - unique_delets_int = [] - # print(deletions,len(deletions),'delii') - if len(deletions) > 0: - - for j in range(len(deletions)): - indexes_to_delete.append(deletions[j]) - # print(deletions,indexes_to_delete,'badiii') - unique_delets = np.unique(indexes_to_delete) - # print(min_h_neg[0],unique_delets) - unique_delets_int = unique_delets[unique_delets < min_h_neg[0]] - - indexer_lines_deletions_len.append(len(deletions)) - indexr_uniq_ind.append([deletions]) - - else: - indexer_lines_deletions_len.append(0) - indexr_uniq_ind.append(-999) - - index_line_true = min_h_neg[0] - len(unique_delets_int) - # print(index_line_true) - if index_line_true > 0 and min_h_neg[0] >= 2: - index_line_true = index_line_true - else: - index_line_true = min_h_neg[0] - - indexer_lines.append(index_line_true) - - if len(unique_delets_int) > 0: - for dd in range(len(unique_delets_int)): - indexes_to_delete.append(unique_delets_int[dd]) - else: - indexer_lines.append(-999) - indexer_lines_deletions_len.append(-999) - indexr_uniq_ind.append(-999) - - peaks_true = [] - for m in range(len(peaks_neg_fin_t)): - if m in indexes_to_delete: - pass - else: - peaks_true.append(peaks_neg_fin_t[m]) - return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind def find_num_col(regions_without_seperators, multiplier=3.8): regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=0) diff --git a/sbb_newspapers_org_image/utils/separate_lines.py b/sbb_newspapers_org_image/utils/separate_lines.py index 071116b..a1095b0 100644 --- a/sbb_newspapers_org_image/utils/separate_lines.py +++ b/sbb_newspapers_org_image/utils/separate_lines.py @@ -13,31 +13,8 @@ from .contour import ( ) from .is_nan import isNaN from . import ( - boosting_headers_by_longshot_region_segmentation, - crop_image_inside_box, - find_features_of_lines, - find_num_col, - find_num_col_by_vertical_lines, find_num_col_deskew, - find_num_col_only_image, isNaN, - otsu_copy, - otsu_copy_binary, - return_hor_spliter_by_index_for_without_verticals, - delete_seperator_around, - return_regions_without_seperators, - put_drop_out_from_only_drop_model, - putt_bb_of_drop_capitals_of_model_in_patches_in_layout, - check_any_text_region_in_model_one_is_main_or_header, - small_textlines_to_parent_adherence2, - order_and_id_of_texts, - order_of_regions, - implent_law_head_main_not_parallel, - return_hor_spliter_by_index, - combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new, - return_points_with_boundies, - find_number_of_columns_in_document, - return_boxes_of_images_by_order_of_reading_new, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):