pep8-e265: comment should start with `# `

code-suggestions
cneud 1 week ago
parent 87ae6d11a9
commit b4d168cae3

File diff suppressed because it is too large Load Diff

@ -180,7 +180,7 @@ class SbbBinarizer:
seg = np.argmax(label_p_pred, axis=3)
#print(seg.shape, len(seg), len(list_i_s))
# print(seg.shape, len(seg), len(list_i_s))
indexer_inside_batch = 0
for i_batch, j_batch in zip(list_i_s, list_j_s):
@ -253,7 +253,7 @@ class SbbBinarizer:
seg = np.argmax(label_p_pred, axis=3)
#print(seg.shape, len(seg), len(list_i_s))
# print(seg.shape, len(seg), len(list_i_s))
indexer_inside_batch = 0
for i_batch, j_batch in zip(list_i_s, list_j_s):

@ -19,7 +19,7 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
x_start = []
x_end = []
kind = [] #if covers 2 and more than 2 columns set it to 1 otherwise 0
kind = [] # if covers 2 and more than 2 columns set it to 1 otherwise 0
len_sep = []
y_sep = []
y_diff = []
@ -40,7 +40,7 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
if (max_end - min_start) == (len(peak_points) - 1):
new_main_sep_y.append(indexer)
#print((max_end-min_start),len(peak_points),'(max_end-min_start)')
# print((max_end-min_start),len(peak_points),'(max_end-min_start)')
y_sep.append(cy_hor_some[i])
y_diff.append(cy_hor_diff[i])
x_end.append(max_end)
@ -67,7 +67,7 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_s_unified = []
x_e_unified = []
if len(all_args_uniq) > 0:
#print('burda')
# print('burda')
if type(all_args_uniq[0]) is list:
for dd in range(len(all_args_uniq)):
if len(all_args_uniq[dd]) == 2:
@ -75,14 +75,14 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_e_same_hor = np.array(x_end_returned)[all_args_uniq[dd]]
y_sep_same_hor = np.array(y_sep_returned)[all_args_uniq[dd]]
y_diff_same_hor = np.array(y_diff_returned)[all_args_uniq[dd]]
#print('burda2')
# print('burda2')
if (x_s_same_hor[0] == x_e_same_hor[1] - 1 or
x_s_same_hor[1] == x_e_same_hor[0] - 1 and
x_s_same_hor[0] != x_s_same_hor[1] and
x_e_same_hor[0] != x_e_same_hor[1]):
#print('burda3')
# print('burda3')
for arg_in in all_args_uniq[dd]:
#print(arg_in,'arg_in')
# print(arg_in,'arg_in')
args_to_be_unified.append(arg_in)
y_selected = np.min(y_sep_same_hor)
y_diff_selected = np.max(y_diff_same_hor)
@ -93,15 +93,15 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_e_unified.append(x_e_selected)
y_unified.append(y_selected)
y_diff_unified.append(y_diff_selected)
#print(x_s_same_hor,'x_s_same_hor')
#print(x_e_same_hor[:]-1,'x_e_same_hor')
#print('#############################')
#print(x_s_unified,'y_selected')
#print(x_e_unified,'x_s_selected')
#print(y_unified,'x_e_same_hor')
# print(x_s_same_hor,'x_s_same_hor')
# print(x_e_same_hor[:]-1,'x_e_same_hor')
# print('#############################')
# print(x_s_unified,'y_selected')
# print(x_e_unified,'x_s_selected')
# print(y_unified,'x_e_same_hor')
args_lines_not_unified = list(set(range(len(y_sep_returned))) - set(args_to_be_unified))
#print(args_lines_not_unified,'args_lines_not_unified')
# print(args_lines_not_unified,'args_lines_not_unified')
x_start_returned_not_unified = list(np.array(x_start_returned)[args_lines_not_unified])
x_end_returned_not_unified = list(np.array(x_end_returned)[args_lines_not_unified])
@ -114,22 +114,22 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_start_returned_not_unified.append(x_s_unified[dv])
x_end_returned_not_unified.append(x_e_unified[dv])
#print(y_sep_returned,'y_sep_returned')
#print(x_start_returned,'x_start_returned')
#print(x_end_returned,'x_end_returned')
# print(y_sep_returned,'y_sep_returned')
# print(x_start_returned,'x_start_returned')
# print(x_end_returned,'x_end_returned')
x_start_returned = np.array(x_start_returned_not_unified, dtype=int)
x_end_returned = np.array(x_end_returned_not_unified, dtype=int)
y_sep_returned = np.array(y_sep_returned_not_unified, dtype=int)
y_diff_returned = np.array(y_diff_returned_not_unified, dtype=int)
#print(y_sep_returned,'y_sep_returned2')
#print(x_start_returned,'x_start_returned2')
#print(x_end_returned,'x_end_returned2')
#print(new_main_sep_y,'new_main_sep_y')
# print(y_sep_returned,'y_sep_returned2')
# print(x_start_returned,'x_start_returned2')
# print(x_end_returned,'x_end_returned2')
# print(new_main_sep_y,'new_main_sep_y')
#print(x_start,'x_start')
#print(x_end,'x_end')
# print(x_start,'x_start')
# print(x_end,'x_end')
if len(new_main_sep_y) > 0:
min_ys = np.min(y_sep)
@ -151,9 +151,9 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
y_min_new = y_mains_sorted[argm]
y_max_new = y_mains_sorted[argm + 1]
#print(y_min_new,'y_min_new')
#print(y_max_new,'y_max_new')
#print(y_sep[new_main_sep_y[0]],y_sep,'yseps')
# print(y_min_new,'y_min_new')
# print(y_max_new,'y_max_new')
# print(y_sep[new_main_sep_y[0]],y_sep,'yseps')
x_start = np.array(x_start)
x_end = np.array(x_end)
kind = np.array(kind)
@ -166,11 +166,11 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
y_sep = y_sep[(y_sep > y_min_new) & (y_sep < y_max_new)]
elif (y_min_new in y_mains_sep_ohne_grenzen and
y_max_new not in y_mains_sep_ohne_grenzen):
#print('burda')
# print('burda')
x_start = x_start[(y_sep > y_min_new) & (y_sep <= y_max_new)]
#print('burda1')
# print('burda1')
x_end = x_end[(y_sep > y_min_new) & (y_sep <= y_max_new)]
#print('burda2')
# print('burda2')
kind = kind[(y_sep > y_min_new) & (y_sep <= y_max_new)]
y_sep = y_sep[(y_sep > y_min_new) & (y_sep <= y_max_new)]
elif (y_min_new not in y_mains_sep_ohne_grenzen and
@ -184,9 +184,9 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_end = x_end[(y_sep >= y_min_new) & (y_sep <= y_max_new)]
kind = kind[(y_sep >= y_min_new) & (y_sep <= y_max_new)]
y_sep = y_sep[(y_sep >= y_min_new) & (y_sep <= y_max_new)]
#print(x_start,'x_start')
#print(x_end,'x_end')
#print(len_sep)
# print(x_start,'x_start')
# print(x_end,'x_end')
# print(len_sep)
deleted = []
for i in range(len(x_start) - 1):
@ -194,11 +194,11 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
for j in range(i + 1, len(x_start)):
if nodes_i == set(range(x_start[j], x_end[j] + 1)):
deleted.append(j)
#print(np.unique(deleted))
# print(np.unique(deleted))
remained_sep_indexes = set(range(len(x_start))) - set(np.unique(deleted))
#print(remained_sep_indexes,'remained_sep_indexes')
mother = [] #if it has mother
# print(remained_sep_indexes,'remained_sep_indexes')
mother = [] # if it has mother
child = []
for index_i in remained_sep_indexes:
have_mother = 0
@ -213,9 +213,9 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
mother.append(have_mother)
child.append(have_child)
#print(mother,'mother')
#print(len(remained_sep_indexes))
#print(len(remained_sep_indexes),len(x_start),len(x_end),len(y_sep),'lens')
# print(mother,'mother')
# print(len(remained_sep_indexes))
# print(len(remained_sep_indexes),len(x_start),len(x_end),len(y_sep),'lens')
y_lines_without_mother = []
x_start_without_mother = []
x_end_without_mother = []
@ -226,23 +226,23 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
mother = np.array(mother)
child = np.array(child)
#print(mother,'mother')
#print(child,'child')
# print(mother,'mother')
# print(child,'child')
remained_sep_indexes = np.array(list(remained_sep_indexes))
x_start = np.array(x_start)
x_end = np.array(x_end)
y_sep = np.array(y_sep)
if len(remained_sep_indexes) > 1:
#print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)')
#print(np.array(mother),'mother')
##remained_sep_indexes_without_mother = remained_sep_indexes[mother==0]
##remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1]
# print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)')
# print(np.array(mother),'mother')
# remained_sep_indexes_without_mother = remained_sep_indexes[mother==0]
# remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1]
remained_sep_indexes_without_mother = np.array(list(remained_sep_indexes))[np.array(mother) == 0]
remained_sep_indexes_with_child_without_mother = np.array(list(remained_sep_indexes))[
(np.array(mother) == 0) & (np.array(child) == 1)]
#print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother')
#print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother')
# print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother')
# print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother')
x_end_with_child_without_mother = x_end[remained_sep_indexes_with_child_without_mother]
x_start_with_child_without_mother = x_start[remained_sep_indexes_with_child_without_mother]
@ -269,19 +269,19 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
reading_orther_type = 1
else:
reading_orther_type = 0
#print(reading_orther_type,'javab')
#print(y_lines_with_child_without_mother,'y_lines_with_child_without_mother')
#print(x_start_with_child_without_mother,'x_start_with_child_without_mother')
#print(x_end_with_child_without_mother,'x_end_with_hild_without_mother')
# print(reading_orther_type,'javab')
# print(y_lines_with_child_without_mother,'y_lines_with_child_without_mother')
# print(x_start_with_child_without_mother,'x_start_with_child_without_mother')
# print(x_end_with_child_without_mother,'x_end_with_hild_without_mother')
len_sep_with_child = len(child[child == 1])
#print(len_sep_with_child,'len_sep_with_child')
# print(len_sep_with_child,'len_sep_with_child')
there_is_sep_with_child = 0
if len_sep_with_child >= 1:
there_is_sep_with_child = 1
#print(all_args_uniq,'all_args_uniq')
#print(args_to_be_unified,'args_to_be_unified')
# print(all_args_uniq,'all_args_uniq')
# print(args_to_be_unified,'args_to_be_unified')
return (reading_orther_type,
x_start_returned,
@ -355,7 +355,7 @@ def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregio
textregion_pre_p_org = np.copy(textregion_pre_p)
# 4 is drop capitals
headers_in_longshot = textregion_pre_np[:, :, 0] == 2
#headers_in_longshot = ((textregion_pre_np[:,:,0]==2) |
# headers_in_longshot = ((textregion_pre_np[:,:,0]==2) |
# (textregion_pre_np[:,:,0]==1))
textregion_pre_p[:, :, 0][headers_in_longshot &
(textregion_pre_p[:, :, 0] != 4)] = 2
@ -379,8 +379,8 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
regions_without_separators_0 = regions_without_separators.sum(axis=0)
##plt.plot(regions_without_separators_0)
##plt.show()
# plt.plot(regions_without_separators_0)
# plt.show()
sigma_ = 35 # 70#35
meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1]
first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0)
@ -479,7 +479,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
num_col = 1
peaks_neg_fin = []
##print(len(peaks_neg_fin))
# print(len(peaks_neg_fin))
diff_peaks = np.abs(np.diff(peaks_neg_fin))
cut_off = 400
@ -514,7 +514,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
p_u_quarter = len(y) - p_quarter
##print(num_col,'early')
# print(num_col,'early')
if num_col == 3:
if ((peaks_neg_true[0] > p_g_u and
peaks_neg_true[1] > p_g_u) or
@ -564,15 +564,15 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
# plt.plot(peaks_neg_true,z[peaks_neg_true],'*')
# plt.plot([0,len(y)], [grenze,grenze])
# plt.show()
##print(len(peaks_neg_true))
# print(len(peaks_neg_true))
return len(peaks_neg_true), peaks_neg_true
def find_num_col_only_image(regions_without_separators, multiplier=3.8):
regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0)
##plt.plot(regions_without_separators_0)
##plt.show()
# plt.plot(regions_without_separators_0)
# plt.show()
sigma_ = 15
meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1]
@ -767,8 +767,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8):
def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8):
regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0)
##plt.plot(regions_without_separators_0)
##plt.show()
# plt.plot(regions_without_separators_0)
# plt.show()
sigma_ = 35 # 70#35
z = gaussian_filter1d(regions_without_separators_0, sigma_)
@ -862,7 +862,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = drop_capital_label
layout_in_patch[box0][layout_in_patch[box0] == 0] = drop_capital_label
layout_in_patch[box0][layout_in_patch[box0] == 4] = drop_capital_label # images
#layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = 1#drop_capital_label
# layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = 1#drop_capital_label
return layout_in_patch
@ -920,7 +920,7 @@ def check_any_text_region_in_model_one_is_main_or_header(
slopes_main.append(slopes[ii])
all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
#print(all_pixels,pixels_main,pixels_header)
# print(all_pixels,pixels_main,pixels_header)
return (regions_model_1,
contours_only_text_parent_main,
@ -941,7 +941,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
all_box_coord, all_found_textline_polygons,
slopes,
contours_only_text_parent_d_ordered):
### to make it faster
# to make it faster
h_o = regions_model_1.shape[0]
w_o = regions_model_1.shape[1]
zoom = 3
@ -953,7 +953,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
interpolation=cv2.INTER_NEAREST)
contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent]
###
cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
find_new_features_of_contours(contours_only_text_parent)
@ -1002,16 +1001,15 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
slopes_main.append(slopes[ii])
all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
#print(all_pixels,pixels_main,pixels_header)
# print(all_pixels,pixels_main,pixels_header)
### to make it faster
# to make it faster
regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST)
# regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
# regions_model_full.shape[0] // zoom),
# interpolation=cv2.INTER_NEAREST)
contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
###
return (regions_model_1,
contours_only_text_parent_main,
@ -1042,11 +1040,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
textlines_tot.append(np.array(textlines_con[m1][nn], dtype=np.int32))
textlines_tot_org_form.append(textlines_con[m1][nn])
##img_text_all=np.zeros((textline_iamge.shape[0],textline_iamge.shape[1]))
##img_text_all=cv2.fillPoly(img_text_all, pts =textlines_tot , color=(1,1,1))
# img_text_all=np.zeros((textline_iamge.shape[0],textline_iamge.shape[1]))
# img_text_all=cv2.fillPoly(img_text_all, pts =textlines_tot , color=(1,1,1))
##plt.imshow(img_text_all)
##plt.show()
# plt.imshow(img_text_all)
# plt.show()
areas_cnt_text = np.array([cv2.contourArea(textlines_tot[j])
for j in range(len(textlines_tot))])
areas_cnt_text = areas_cnt_text / float(textline_iamge.shape[0] * textline_iamge.shape[1])
@ -1152,8 +1150,8 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
##plt.imshow(textline_mask)
##plt.show()
# plt.imshow(textline_mask)
# plt.show()
"""
print(len(contours_main),'contours_main')
mada_n=textline_mask.sum(axis=1)
@ -1164,8 +1162,8 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
x=np.arange(len(y))
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
##plt.imshow(textline_mask[:,:])
##plt.show()
# plt.imshow(textline_mask[:,:])
# plt.show()
sigma_gaus=8
z= gaussian_filter1d(y_help, sigma_gaus)
@ -1200,8 +1198,8 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
peaks_neg = peaks_neg - 20 - 20
peaks = peaks - 20
##plt.plot(z)
##plt.show()
# plt.plot(z)
# plt.show()
if contours_main is not None:
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
@ -1279,7 +1277,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
final_types.append(int(ind_in_type[j]))
final_index_type.append(int(ind_ind_type[j]))
##matrix_of_orders[:len_main,4]=final_indexers_sorted[:]
# matrix_of_orders[:len_main,4]=final_indexers_sorted[:]
# This fix is applied if the sum of the lengths of contours and contours_h
# does not match final_indexers_sorted. However, this is not the optimal solution..
@ -1297,7 +1295,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
img_p_in_ver, img_in_hor, num_col_classifier):
#img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
# img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
img_p_in_ver = img_p_in_ver.astype(np.uint8)
img_p_in_ver = np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(img_p_in_ver, cv2.COLOR_BGR2GRAY)
@ -1334,7 +1332,7 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column < 10:
args_hor = np.arange(len(slope_lines_hor))
all_args_uniq = contours_in_same_horizon(cy_main_hor)
#print(all_args_uniq,'all_args_uniq')
# print(all_args_uniq,'all_args_uniq')
if len(all_args_uniq) > 0:
if type(all_args_uniq[0]) is list:
special_separators = []
@ -1346,8 +1344,8 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
some_x_min = x_min_main_hor[all_args_uniq[dd]]
some_x_max = x_max_main_hor[all_args_uniq[dd]]
#img_in=np.zeros(separators_closeup_n[:,:,2].shape)
#print(img_p_in_ver.shape[1],some_x_max-some_x_min,'xdiff')
# img_in=np.zeros(separators_closeup_n[:,:,2].shape)
# print(img_p_in_ver.shape[1],some_x_max-some_x_min,'xdiff')
diff_x_some = some_x_max - some_x_min
for jv in range(len(some_args)):
img_p_in = cv2.fillPoly(img_in_hor, pts=[contours_lines_hor[some_args[jv]]], color=(1, 1, 1))
@ -1535,9 +1533,9 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
matrix_of_lines_ch[:len(cy_main_hor), 0] = args_hor
matrix_of_lines_ch[len(cy_main_hor):, 0] = args_ver
matrix_of_lines_ch[len(cy_main_hor):, 1] = cx_main_ver
matrix_of_lines_ch[:len(cy_main_hor), 2] = x_min_main_hor + 50 #x_min_main_hor+150
matrix_of_lines_ch[:len(cy_main_hor), 2] = x_min_main_hor + 50 # x_min_main_hor+150
matrix_of_lines_ch[len(cy_main_hor):, 2] = x_min_main_ver
matrix_of_lines_ch[:len(cy_main_hor), 3] = x_max_main_hor - 50 #x_max_main_hor-150
matrix_of_lines_ch[:len(cy_main_hor), 3] = x_max_main_hor - 50 # x_max_main_hor-150
matrix_of_lines_ch[len(cy_main_hor):, 3] = x_max_main_ver
matrix_of_lines_ch[:len(cy_main_hor), 4] = dist_x_hor
matrix_of_lines_ch[len(cy_main_hor):, 4] = dist_x_ver
@ -1563,7 +1561,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
matrix_l_n[matrix_of_lines_ch.shape[0]:, 4] = dist_x_head
matrix_l_n[matrix_of_lines_ch.shape[0]:, 5] = y_min_main_head - 3 - 8
matrix_l_n[matrix_of_lines_ch.shape[0]:, 6] = y_min_main_head - 5 - 8
matrix_l_n[matrix_of_lines_ch.shape[0]:, 7] = y_max_main_head #y_min_main_head+1-8
matrix_l_n[matrix_of_lines_ch.shape[0]:, 7] = y_max_main_head # y_min_main_head+1-8
matrix_l_n[matrix_of_lines_ch.shape[0]:, 8] = 4
matrix_of_lines_ch = np.copy(matrix_l_n)
@ -1624,13 +1622,13 @@ def return_boxes_of_images_by_order_of_reading_new(
boxes = []
peaks_neg_tot_tables = []
for i in range(len(splitter_y_new) - 1):
#print(splitter_y_new[i],splitter_y_new[i+1])
# print(splitter_y_new[i],splitter_y_new[i+1])
matrix_new = matrix_of_lines_ch[:, :][(matrix_of_lines_ch[:, 6] > splitter_y_new[i]) &
(matrix_of_lines_ch[:, 7] < splitter_y_new[i + 1])]
#print(len( matrix_new[:,9][matrix_new[:,9]==1] ))
#print(matrix_new[:,8][matrix_new[:,9]==1],'gaddaaa')
# print(len( matrix_new[:,9][matrix_new[:,9]==1] ))
# print(matrix_new[:,8][matrix_new[:,9]==1],'gaddaaa')
# check to see is there any vertical separator to find holes.
#if (len(matrix_new[:,9][matrix_new[:,9]==1]) > 0 and
# if (len(matrix_new[:,9][matrix_new[:,9]==1]) > 0 and
# np.max(matrix_new[:,8][matrix_new[:,9]==1]) >=
# 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))):
if True:
@ -1649,26 +1647,26 @@ def return_boxes_of_images_by_order_of_reading_new(
try:
peaks_neg_fin_org = np.copy(peaks_neg_fin)
if (len(peaks_neg_fin) + 1) < num_col_classifier or num_col_classifier == 6:
#print('burda')
# print('burda')
if len(peaks_neg_fin) == 0:
num_col, peaks_neg_fin = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i + 1]), :],
num_col_classifier, tables, multiplier=3.)
peaks_neg_fin_early = [0]
#print(peaks_neg_fin,'peaks_neg_fin')
# print(peaks_neg_fin,'peaks_neg_fin')
for p_n in peaks_neg_fin:
peaks_neg_fin_early.append(p_n)
peaks_neg_fin_early.append(regions_without_separators.shape[1] - 1)
#print(peaks_neg_fin_early,'burda2')
# print(peaks_neg_fin_early,'burda2')
peaks_neg_fin_rev = []
for i_n in range(len(peaks_neg_fin_early) - 1):
#print(i_n,'i_n')
#plt.plot(regions_without_separators[int(splitter_y_new[i]):
# print(i_n,'i_n')
# plt.plot(regions_without_separators[int(splitter_y_new[i]):
# int(splitter_y_new[i+1]),
# peaks_neg_fin_early[i_n]:
# peaks_neg_fin_early[i_n+1]].sum(axis=0) )
#plt.show()
# plt.show()
try:
num_col, peaks_neg_fin1 = find_num_col(
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i + 1]),
@ -1692,7 +1690,7 @@ def return_boxes_of_images_by_order_of_reading_new(
if i_n != (len(peaks_neg_fin_early) - 2):
peaks_neg_fin_rev.append(peaks_neg_fin_early[i_n + 1])
#print(peaks_neg_fin,'peaks_neg_fin')
# print(peaks_neg_fin,'peaks_neg_fin')
peaks_neg_fin_rev = peaks_neg_fin_rev + peaks_neg_fin
if len(peaks_neg_fin_rev) >= len(peaks_neg_fin_org):
@ -1702,10 +1700,10 @@ def return_boxes_of_images_by_order_of_reading_new(
peaks_neg_fin = list(np.copy(peaks_neg_fin_org))
num_col = len(peaks_neg_fin)
#print(peaks_neg_fin,'peaks_neg_fin')
# print(peaks_neg_fin,'peaks_neg_fin')
except:
pass
#num_col, peaks_neg_fin = find_num_col(
# num_col, peaks_neg_fin = find_num_col(
# regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
# multiplier=7.0)
x_min_hor_some = matrix_new[:, 2][(matrix_new[:, 9] == 0)]
@ -1738,11 +1736,11 @@ def return_boxes_of_images_by_order_of_reading_new(
(len(y_lines_without_mother) >= 2 or there_is_sep_with_child == 1))):
try:
y_grenze = int(splitter_y_new[i]) + 300
#check if there is a big separator in this y_mains_sep_ohne_grenzen
# check if there is a big separator in this y_mains_sep_ohne_grenzen
args_early_ys = np.arange(len(y_type_2))
#print(args_early_ys,'args_early_ys')
#print(int(splitter_y_new[i]),int(splitter_y_new[i+1]))
# print(args_early_ys,'args_early_ys')
# print(int(splitter_y_new[i]),int(splitter_y_new[i+1]))
x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) &
(y_type_2 <= y_grenze)]
@ -1761,21 +1759,21 @@ def return_boxes_of_images_by_order_of_reading_new(
(x_ending_up == (len(peaks_neg_tot) - 1))]
args_main_to_deleted = args_up[(x_starting_up == 0) &
(x_ending_up == (len(peaks_neg_tot) - 1))]
#print(y_main_separator_up,y_diff_main_separator_up,args_main_to_deleted,'fffffjammmm')
# print(y_main_separator_up,y_diff_main_separator_up,args_main_to_deleted,'fffffjammmm')
if len(y_diff_main_separator_up) > 0:
args_to_be_kept = np.array(list(set(args_early_ys) - set(args_main_to_deleted)))
#print(args_to_be_kept,'args_to_be_kept')
# print(args_to_be_kept,'args_to_be_kept')
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot) - 1],
int(splitter_y_new[i]), int(np.max(y_diff_main_separator_up))])
splitter_y_new[i] = [np.max(y_diff_main_separator_up)][0]
#print(splitter_y_new[i],'splitter_y_new[i]')
# print(splitter_y_new[i],'splitter_y_new[i]')
y_type_2 = y_type_2[args_to_be_kept]
x_starting = x_starting[args_to_be_kept]
x_ending = x_ending[args_to_be_kept]
y_diff_type_2 = y_diff_type_2[args_to_be_kept]
#print('galdiha')
# print('galdiha')
y_grenze = int(splitter_y_new[i]) + 200
args_early_ys2 = np.arange(len(y_type_2))
y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) &
@ -1788,20 +1786,20 @@ def return_boxes_of_images_by_order_of_reading_new(
(y_type_2 <= y_grenze)]
args_up2 = args_early_ys2[(y_type_2 > int(splitter_y_new[i])) &
(y_type_2 <= y_grenze)]
#print(y_type_2_up,x_starting_up,x_ending_up,'didid')
# print(y_type_2_up,x_starting_up,x_ending_up,'didid')
nodes_in = []
for ij in range(len(x_starting_up)):
nodes_in = nodes_in + list(range(x_starting_up[ij],
x_ending_up[ij]))
nodes_in = np.unique(nodes_in)
#print(nodes_in,'nodes_in')
# print(nodes_in,'nodes_in')
if set(nodes_in) == set(range(len(peaks_neg_tot) - 1)):
pass
elif set(nodes_in) == set(range(1, len(peaks_neg_tot) - 1)):
pass
else:
#print('burdaydikh')
# print('burdaydikh')
args_to_be_kept2 = np.array(list(set(args_early_ys2) - set(args_up2)))
if len(args_to_be_kept2) > 0:
@ -1811,28 +1809,28 @@ def return_boxes_of_images_by_order_of_reading_new(
y_diff_type_2 = y_diff_type_2[args_to_be_kept2]
else:
pass
#print('burdaydikh2')
# print('burdaydikh2')
elif len(y_diff_main_separator_up) == 0:
nodes_in = []
for ij in range(len(x_starting_up)):
nodes_in = nodes_in + list(range(x_starting_up[ij],
x_ending_up[ij]))
nodes_in = np.unique(nodes_in)
#print(nodes_in,'nodes_in2')
#print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
# print(nodes_in,'nodes_in2')
# print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
if set(nodes_in) == set(range(len(peaks_neg_tot) - 1)):
pass
elif set(nodes_in) == set(range(1, len(peaks_neg_tot) - 1)):
pass
else:
#print('burdaydikh')
#print(args_early_ys,'args_early_ys')
#print(args_up,'args_up')
# print('burdaydikh')
# print(args_early_ys,'args_early_ys')
# print(args_up,'args_up')
args_to_be_kept2 = np.array(list(set(args_early_ys) - set(args_up)))
#print(args_to_be_kept2,'args_to_be_kept2')
#print(len(y_type_2),len(x_starting),len(x_ending),len(y_diff_type_2))
# print(args_to_be_kept2,'args_to_be_kept2')
# print(len(y_type_2),len(x_starting),len(x_ending),len(y_diff_type_2))
if len(args_to_be_kept2) > 0:
y_type_2 = y_type_2[args_to_be_kept2]
x_starting = x_starting[args_to_be_kept2]
@ -1840,9 +1838,9 @@ def return_boxes_of_images_by_order_of_reading_new(
y_diff_type_2 = y_diff_type_2[args_to_be_kept2]
else:
pass
#print('burdaydikh2')
# print('burdaydikh2')
#int(splitter_y_new[i])
# int(splitter_y_new[i])
y_lines_by_order = []
x_start_by_order = []
x_end_by_order = []
@ -1853,7 +1851,7 @@ def return_boxes_of_images_by_order_of_reading_new(
x_start_by_order.append(0)
x_end_by_order.append(len(peaks_neg_tot) - 2)
else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
# print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = []
for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \
@ -1864,40 +1862,40 @@ def return_boxes_of_images_by_order_of_reading_new(
all_columns = np.arange(len(peaks_neg_tot) - 1)
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (
len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
len(columns_not_covered) + len(x_start_without_mother)))
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_ending = np.append(x_ending, x_end_without_mother)
ind_args = np.arange(len(y_type_2))
#ind_args=np.array(ind_args)
#print(ind_args,'ind_args')
# ind_args=np.array(ind_args)
# print(ind_args,'ind_args')
for column in range(len(peaks_neg_tot) - 1):
#print(column,'column')
# print(column,'column')
ind_args_in_col = ind_args[x_starting == column]
#print('babali2')
#print(ind_args_in_col,'ind_args_in_col')
# print('babali2')
# print(ind_args_in_col,'ind_args_in_col')
ind_args_in_col = np.array(ind_args_in_col)
#print(len(y_type_2))
# print(len(y_type_2))
y_column = y_type_2[ind_args_in_col]
x_start_column = x_starting[ind_args_in_col]
x_end_column = x_ending[ind_args_in_col]
#print('babali3')
# print('babali3')
ind_args_col_sorted = np.argsort(y_column)
y_col_sort = y_column[ind_args_col_sorted]
x_start_column_sort = x_start_column[ind_args_col_sorted]
x_end_column_sort = x_end_column[ind_args_col_sorted]
#print('babali4')
# print('babali4')
for ii in range(len(y_col_sort)):
#print('babali5')
# print('babali5')
y_lines_by_order.append(y_col_sort[ii])
x_start_by_order.append(x_start_column_sort[ii])
x_end_by_order.append(x_end_column_sort[ii] - 1)
else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
# print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = []
for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \
@ -1908,9 +1906,9 @@ def return_boxes_of_images_by_order_of_reading_new(
all_columns = np.arange(len(peaks_neg_tot) - 1)
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (
len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
len(columns_not_covered) + len(x_start_without_mother)))
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
@ -1926,7 +1924,7 @@ def return_boxes_of_images_by_order_of_reading_new(
all_columns = np.arange(len(peaks_neg_tot) - 1)
columns_not_covered_child_no_mother = list(
set(all_columns) - set(columns_covered_by_with_child_no_mothers))
#indexes_to_be_spanned=[]
# indexes_to_be_spanned=[]
for i_s in range(len(x_end_with_child_without_mother)):
columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother)
@ -1936,7 +1934,7 @@ def return_boxes_of_images_by_order_of_reading_new(
for i_s_nc in columns_not_covered_child_no_mother:
if i_s_nc in x_start_with_child_without_mother:
x_end_biggest_column = \
x_end_with_child_without_mother[x_start_with_child_without_mother == i_s_nc][0]
x_end_with_child_without_mother[x_start_with_child_without_mother == i_s_nc][0]
args_all_biggest_lines = ind_args[(x_starting == i_s_nc) &
(x_ending == x_end_biggest_column)]
y_column_nc = y_type_2[args_all_biggest_lines]
@ -2020,40 +2018,40 @@ def return_boxes_of_images_by_order_of_reading_new(
ind_args_between = np.arange(len(x_ending_all_between_nm_wc))
for column in range(i_s_nc, x_end_biggest_column):
ind_args_in_col = ind_args_between[x_starting_all_between_nm_wc == column]
#print('babali2')
#print(ind_args_in_col,'ind_args_in_col')
# print('babali2')
# print(ind_args_in_col,'ind_args_in_col')
ind_args_in_col = np.array(ind_args_in_col)
#print(len(y_type_2))
# print(len(y_type_2))
y_column = y_all_between_nm_wc[ind_args_in_col]
x_start_column = x_starting_all_between_nm_wc[ind_args_in_col]
x_end_column = x_ending_all_between_nm_wc[ind_args_in_col]
#print('babali3')
# print('babali3')
ind_args_col_sorted = np.argsort(y_column)
y_col_sort = y_column[ind_args_col_sorted]
x_start_column_sort = x_start_column[ind_args_col_sorted]
x_end_column_sort = x_end_column[ind_args_col_sorted]
#print('babali4')
# print('babali4')
for ii in range(len(y_col_sort)):
#print('babali5')
# print('babali5')
y_lines_by_order.append(y_col_sort[ii])
x_start_by_order.append(x_start_column_sort[ii])
x_end_by_order.append(x_end_column_sort[ii] - 1)
else:
#print(column,'column')
# print(column,'column')
ind_args_in_col = ind_args[x_starting == i_s_nc]
#print('babali2')
#print(ind_args_in_col,'ind_args_in_col')
# print('babali2')
# print(ind_args_in_col,'ind_args_in_col')
ind_args_in_col = np.array(ind_args_in_col)
#print(len(y_type_2))
# print(len(y_type_2))
y_column = y_type_2[ind_args_in_col]
x_start_column = x_starting[ind_args_in_col]
x_end_column = x_ending[ind_args_in_col]
#print('babali3')
# print('babali3')
ind_args_col_sorted = np.argsort(y_column)
y_col_sort = y_column[ind_args_col_sorted]
x_start_column_sort = x_start_column[ind_args_col_sorted]
x_end_column_sort = x_end_column[ind_args_col_sorted]
#print('babali4')
# print('babali4')
for ii in range(len(y_col_sort)):
y_lines_by_order.append(y_col_sort[ii])
x_start_by_order.append(x_start_column_sort[ii])
@ -2064,27 +2062,27 @@ def return_boxes_of_images_by_order_of_reading_new(
x_start_copy = list(x_start_by_order)
x_end_copy = list(x_end_by_order)
#print(y_copy,'y_copy')
# print(y_copy,'y_copy')
y_itself = y_copy.pop(il)
x_start_itself = x_start_copy.pop(il)
x_end_itself = x_end_copy.pop(il)
#print(y_copy,'y_copy2')
# print(y_copy,'y_copy2')
for column in range(x_start_itself, x_end_itself + 1):
#print(column,'cols')
# print(column,'cols')
y_in_cols = []
for yic in range(len(y_copy)):
#print('burda')
# print('burda')
if (y_copy[yic] > y_itself and
x_start_copy[yic] <= column <= x_end_copy[yic]):
y_in_cols.append(y_copy[yic])
#print('burda2')
#print(y_in_cols,'y_in_cols')
# print('burda2')
# print(y_in_cols,'y_in_cols')
if len(y_in_cols) > 0:
y_down = np.min(y_in_cols)
else:
y_down = [int(splitter_y_new[i + 1])][0]
#print(y_itself,'y_itself')
# print(y_itself,'y_itself')
boxes.append([peaks_neg_tot[column],
peaks_neg_tot[column + 1],
y_itself,
@ -2110,8 +2108,8 @@ def return_boxes_of_images_by_order_of_reading_new(
columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
if len(new_main_sep_y) > 0:
@ -2124,18 +2122,18 @@ def return_boxes_of_images_by_order_of_reading_new(
all_columns = np.arange(len(peaks_neg_tot) - 1)
columns_not_covered = list(set(all_columns))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
ind_args = np.array(range(len(y_type_2)))
#ind_args=np.array(ind_args)
# ind_args=np.array(ind_args)
for column in range(len(peaks_neg_tot) - 1):
#print(column,'column')
# print(column,'column')
ind_args_in_col = ind_args[x_starting == column]
ind_args_in_col = np.array(ind_args_in_col)
#print(len(y_type_2))
# print(len(y_type_2))
y_column = y_type_2[ind_args_in_col]
x_start_column = x_starting[ind_args_in_col]
x_end_column = x_ending[ind_args_in_col]
@ -2144,9 +2142,9 @@ def return_boxes_of_images_by_order_of_reading_new(
y_col_sort = y_column[ind_args_col_sorted]
x_start_column_sort = x_start_column[ind_args_col_sorted]
x_end_column_sort = x_end_column[ind_args_col_sorted]
#print('babali4')
# print('babali4')
for ii in range(len(y_col_sort)):
#print('babali5')
# print('babali5')
y_lines_by_order.append(y_col_sort[ii])
x_start_by_order.append(x_start_column_sort[ii])
x_end_by_order.append(x_end_column_sort[ii] - 1)
@ -2156,33 +2154,33 @@ def return_boxes_of_images_by_order_of_reading_new(
x_start_copy = list(x_start_by_order)
x_end_copy = list(x_end_by_order)
#print(y_copy,'y_copy')
# print(y_copy,'y_copy')
y_itself = y_copy.pop(il)
x_start_itself = x_start_copy.pop(il)
x_end_itself = x_end_copy.pop(il)
#print(y_copy,'y_copy2')
# print(y_copy,'y_copy2')
for column in range(x_start_itself, x_end_itself + 1):
#print(column,'cols')
# print(column,'cols')
y_in_cols = []
for yic in range(len(y_copy)):
#print('burda')
# print('burda')
if (y_copy[yic] > y_itself and
x_start_copy[yic] <= column <= x_end_copy[yic]):
y_in_cols.append(y_copy[yic])
#print('burda2')
#print(y_in_cols,'y_in_cols')
# print('burda2')
# print(y_in_cols,'y_in_cols')
if len(y_in_cols) > 0:
y_down = np.min(y_in_cols)
else:
y_down = [int(splitter_y_new[i + 1])][0]
#print(y_itself,'y_itself')
# print(y_itself,'y_itself')
boxes.append([peaks_neg_tot[column],
peaks_neg_tot[column + 1],
y_itself,
y_down])
#else:
#boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]])
# else:
# boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]])
if right2left_readingorder:
peaks_neg_tot_tables_new = []

@ -65,7 +65,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
polygon = geometry.Polygon([point[0] for point in c])
# area = cv2.contourArea(c)
area = polygon.area
##print(np.prod(thresh.shape[:2]))
# print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area
# print(hierarchy[0][jv][3],hierarchy )
if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and
@ -200,7 +200,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
# print(img.shape,'img')
img_copy = rotation_image_new(img_copy, -slope_first)
##print(img_copy.shape,'img_copy')
# print(img_copy.shape,'img_copy')
# plt.imshow(img_copy)
# plt.show()
@ -258,8 +258,8 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
if not len(cnts):
return []
img = cv2.resize(img, (int(img.shape[1] / 6), int(img.shape[0] / 6)), interpolation=cv2.INTER_NEAREST)
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
#cnts = cnts/2
# cnts = list( (np.array(cnts)/2).astype(np.int16) )
# cnts = cnts/2
cnts = [(i / 6).astype(np.int) for i in cnts]
results = map(partial(do_back_rotation_and_get_cnt_back,
img=img,

@ -48,7 +48,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
# plt.imshow(img_con[:,:,0])
# plt.show()
##img_con=cv2.dilate(img_con, kernel, iterations=30)
# img_con=cv2.dilate(img_con, kernel, iterations=30)
# plt.imshow(img_con[:,:,0])
# plt.show()
@ -126,13 +126,13 @@ def adhere_drop_capital_region_into_corresponding_textline(
contours_combined = return_contours_of_interested_region(img_textlines, 255, 0)
#plt.imshow(img_textlines)
#plt.show()
# plt.imshow(img_textlines)
# plt.show()
#imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
#ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
#contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
areas_cnt_text = np.array(
[cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
@ -193,10 +193,10 @@ def adhere_drop_capital_region_into_corresponding_textline(
img_textlines = img_textlines.astype(np.uint8)
contours_combined = return_contours_of_interested_region(img_textlines, 255, 0)
##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
##contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
areas_cnt_text = np.array(
[cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
@ -209,7 +209,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
# print(np.shape(contours_biggest),'contours_biggest')
# print(np.shape(all_found_textline_polygons[int(region_final)][arg_min]))
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
if len(contours_combined) == 1:
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
elif len(contours_combined) == 2:
@ -250,10 +250,10 @@ def adhere_drop_capital_region_into_corresponding_textline(
img_textlines = img_textlines.astype(np.uint8)
contours_combined = return_contours_of_interested_region(img_textlines, 255, 0)
#imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
#ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
#contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array(
@ -266,7 +266,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] # -all_box_coord[int(region_final)][2]
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0]
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
if len(contours_combined) == 1:
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
elif len(contours_combined) == 2:
@ -281,49 +281,49 @@ def adhere_drop_capital_region_into_corresponding_textline(
else:
pass
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
###print(all_box_coord[j_cont])
###print(cx_t)
###print(cy_t)
###print(cx_d[i_drop])
###print(cy_d[i_drop])
##y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
# print(all_box_coord[j_cont])
# print(cx_t)
# print(cy_t)
# print(cx_d[i_drop])
# print(cy_d[i_drop])
# y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
##y_lines[y_lines<y_min_d[i_drop]]=0
###print(y_lines)
# y_lines[y_lines<y_min_d[i_drop]]=0
# print(y_lines)
##arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
###print(arg_min)
# arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
# print(arg_min)
##cnt_nearest=np.copy(all_found_textline_polygons[int(region_final)][arg_min])
##cnt_nearest[:,0,0]=all_found_textline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
##cnt_nearest[:,0,1]=all_found_textline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
# cnt_nearest=np.copy(all_found_textline_polygons[int(region_final)][arg_min])
# cnt_nearest[:,0,0]=all_found_textline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
# cnt_nearest[:,0,1]=all_found_textline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
##img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
##img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
##img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
# img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
# img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
# img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
##img_textlines=img_textlines.astype(np.uint8)
# img_textlines=img_textlines.astype(np.uint8)
##plt.imshow(img_textlines)
##plt.show()
##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# plt.imshow(img_textlines)
# plt.show()
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
##contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
# contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
##print(len(contours_combined),'len textlines mixed')
##areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
# print(len(contours_combined),'len textlines mixed')
# areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
##contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
# contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
###print(np.shape(contours_biggest))
###print(contours_biggest[:])
##contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
##contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
# print(np.shape(contours_biggest))
# print(contours_biggest[:])
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
##all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
# all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
else:
if len(region_with_intersected_drop) > 1:
@ -369,10 +369,10 @@ def adhere_drop_capital_region_into_corresponding_textline(
img_textlines = img_textlines.astype(np.uint8)
contours_combined = return_contours_of_interested_region(img_textlines, 255, 0)
#imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
#ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
#contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array(
@ -437,10 +437,10 @@ def adhere_drop_capital_region_into_corresponding_textline(
img_textlines = img_textlines.astype(np.uint8)
contours_combined = return_contours_of_interested_region(img_textlines, 255, 0)
#imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
#ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
#contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array(
@ -469,69 +469,69 @@ def adhere_drop_capital_region_into_corresponding_textline(
else:
pass
#####for i_drop in range(len(polygons_of_drop_capitals)):
#####for j_cont in range(len(contours_only_text_parent)):
#####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
#####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
#####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
#####img_con=img_con.astype(np.uint8)
######imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
######ret, thresh = cv2.threshold(imgray, 0, 255, 0)
######contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#####contours_new,hir_new=return_contours_of_image(img_con)
#####contours_new_parent=return_parent_contours( contours_new,hir_new)
######plt.imshow(img_con)
######plt.show()
#####try:
#####if len(contours_new_parent)==1:
######print(all_found_textline_polygons[j_cont][0])
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
######print(all_box_coord[j_cont])
######print(cx_t)
######print(cy_t)
######print(cx_d[i_drop])
######print(cy_d[i_drop])
#####y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
######print(y_lines)
#####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
######print(arg_min)
#####cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min])
#####cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
#####cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
#####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
#####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
#####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
#####img_textlines=img_textlines.astype(np.uint8)
#####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
#####ret, thresh = cv2.threshold(imgray, 0, 255, 0)
#####contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
#####contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
######print(np.shape(contours_biggest))
######print(contours_biggest[:])
#####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
#####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
#####all_found_textline_polygons[j_cont][arg_min]=contours_biggest
######print(contours_biggest)
######plt.imshow(img_textlines[:,:,0])
######plt.show()
#####else:
#####pass
#####except:
#####pass
# for i_drop in range(len(polygons_of_drop_capitals)):
# for j_cont in range(len(contours_only_text_parent)):
# img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
# img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
# img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
# img_con=img_con.astype(np.uint8)
# #imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
# #ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# #contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
# contours_new,hir_new=return_contours_of_image(img_con)
# contours_new_parent=return_parent_contours( contours_new,hir_new)
# #plt.imshow(img_con)
# #plt.show()
# try:
# if len(contours_new_parent)==1:
# #print(all_found_textline_polygons[j_cont][0])
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
# #print(all_box_coord[j_cont])
# #print(cx_t)
# #print(cy_t)
# #print(cx_d[i_drop])
# #print(cy_d[i_drop])
# y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
# #print(y_lines)
# arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
# #print(arg_min)
# cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min])
# cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
# cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
# img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
# img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
# img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
# img_textlines=img_textlines.astype(np.uint8)
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
# contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
# areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
# contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
# #print(np.shape(contours_biggest))
# #print(contours_biggest[:])
# contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
# contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
# all_found_textline_polygons[j_cont][arg_min]=contours_biggest
# #print(contours_biggest)
# #plt.imshow(img_textlines[:,:,0])
# #plt.show()
# else:
# pass
# except:
# pass
return all_found_textline_polygons

@ -12,7 +12,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
mask_marginals = mask_marginals.astype(np.uint8)
text_with_lines = text_with_lines.astype(np.uint8)
##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
# text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
text_with_lines_eroded = cv2.erode(text_with_lines, kernel, iterations=5)
@ -34,7 +34,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
thickness_along_y_percent = text_with_lines_y_eroded.max() / (float(text_with_lines.shape[0])) * 100
#print(thickness_along_y_percent,'thickness_along_y_percent')
# print(thickness_along_y_percent,'thickness_along_y_percent')
if thickness_along_y_percent < 30:
min_textline_thickness = 8
@ -191,25 +191,25 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
text_regions = cv2.fillPoly(text_regions, pts=marginlas_should_be_main_text, color=(4, 4))
#text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0
#text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0
# text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0
# text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0
text_regions[:, :int(min_point_of_left_marginal)][
text_regions[:, :int(min_point_of_left_marginal)] == 1] = 0
text_regions[:, int(max_point_of_right_marginal):][
text_regions[:, int(max_point_of_right_marginal):] == 1] = 0
###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
# text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
#plt.plot(region_sum_0)
#plt.plot(peaks,region_sum_0[peaks],'*')
#plt.show()
# text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
# plt.plot(region_sum_0)
# plt.plot(peaks,region_sum_0[peaks],'*')
# plt.show()
#plt.imshow(text_regions)
#plt.show()
# plt.imshow(text_regions)
# plt.show()
#sys.exit()
# sys.exit()
else:
pass
return text_regions

@ -217,14 +217,14 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
textline_con_fil = filter_contours_area_of_image(img_patch,
textline_con, hierarchy,
max_area=1, min_area=0.0008)
y_diff_mean = np.mean(np.diff(peaks_new_tot)) #self.find_contours_mean_y_diff(textline_con_fil)
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
sigma_gaus = int(y_diff_mean * (7. / 40.0))
#print(sigma_gaus,'sigma_gaus')
# print(sigma_gaus,'sigma_gaus')
except:
sigma_gaus = 12
if sigma_gaus < 3:
sigma_gaus = 3
#print(sigma_gaus,'sigma')
# print(sigma_gaus,'sigma')
y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
y_padded_up_to_down = -y_padded + np.max(y_padded)
@ -279,21 +279,21 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
peaks_new_tot.append(i1)
peaks_new_tot = np.sort(peaks_new_tot)
##plt.plot(y_padded_up_to_down_padded)
##plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
##plt.show()
# plt.plot(y_padded_up_to_down_padded)
# plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
# plt.show()
##plt.plot(y_padded_up_to_down_padded)
##plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
##plt.show()
# plt.plot(y_padded_up_to_down_padded)
# plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
# plt.show()
##plt.plot(y_padded_smoothed)
##plt.plot(peaks,y_padded_smoothed[peaks],'*')
##plt.show()
# plt.plot(y_padded_smoothed)
# plt.plot(peaks,y_padded_smoothed[peaks],'*')
# plt.show()
##plt.plot(y_padded_smoothed)
##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
##plt.show()
# plt.plot(y_padded_smoothed)
# plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
# plt.show()
peaks = peaks_new_tot[:]
peaks_neg = peaks_neg_new[:]
else:
@ -327,29 +327,29 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = y_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = y_max_cont - 1 # peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = y_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = y_max_cont - 1 # peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
else:
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(
1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
1.33 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
if point_down_narrow >= img_patch.shape[0]:
point_down_narrow = img_patch.shape[0] - 2
@ -422,8 +422,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
else:
x_min = np.min(xvinside) # max(x_min_interest,x_min_cont)
x_max = np.max(xvinside) # min(x_max_interest,x_max_cont)
#x_min = x_min_cont
#x_max = x_max_cont
# x_min = x_min_cont
# x_max = x_max_cont
y_min = y_min_cont
y_max = y_max_cont
@ -469,7 +469,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
dis_to_next = np.abs(peaks[1] - peaks[0])
for jj in range(len(peaks)):
if jj == 0:
point_up = 0 #peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next)
point_up = 0 # peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next)
if point_up < 0:
point_up = 1
point_down = peaks_neg[1] + first_nonzero # peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
@ -478,7 +478,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
if point_down >= img_patch.shape[0]:
point_down = img_patch.shape[0] - 2
try:
point_up = peaks_neg[2] + first_nonzero #peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
point_up = peaks_neg[2] + first_nonzero # peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
except:
point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
@ -705,29 +705,29 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1 # peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = x_max_cont - 1 # peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
else:
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(
1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
1.33 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
if point_down_narrow >= img_patch.shape[0]:
point_down_narrow = img_patch.shape[0] - 2
@ -1104,8 +1104,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
peaks_values = y_padded_smoothed[peaks]
###peaks_neg = peaks_neg - 20 - 20
###peaks = peaks - 20
# #peaks_neg = peaks_neg - 20 - 20
# #peaks = peaks - 20
peaks_neg_true = peaks_neg[:]
peaks_pos_true = peaks[:]
@ -1125,14 +1125,14 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
peaks_pos_true = peaks_pos_true - 20
for i in range(len(peaks_pos_true)):
##img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1
# img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1
img_patch[peaks_pos_true[i] - 6: peaks_pos_true[i] + 6, :] = 1
else:
pass
kernel = np.ones((5, 5), np.uint8)
# img_patch = cv2.erode(img_patch,kernel,iterations = 3)
#######################img_patch = cv2.erode(img_patch,kernel,iterations = 2)
# # # # # # # # # # # #img_patch = cv2.erode(img_patch,kernel,iterations = 2)
img_patch = cv2.erode(img_patch, kernel, iterations=1)
return img_patch
@ -1156,8 +1156,8 @@ def separate_lines_new_inside_tiles(img_path, thetha):
mada_n = img_path.sum(axis=1)
##plt.plot(mada_n)
##plt.show()
# plt.plot(mada_n)
# plt.show()
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
@ -1252,7 +1252,7 @@ def separate_lines_new_inside_tiles(img_path, thetha):
#plt.xlabel('Dichte')
#plt.ylabel('Y')
##plt.plot([0,len(y)], [grenze,grenze])
# plt.plot([0,len(y)], [grenze,grenze])
#plt.show()
"""
peaks_neg_true = peaks_neg_true - 20 - 20
@ -1300,7 +1300,7 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
contours_imgs, hierarchy,
max_area=max_area, min_area=min_area)
cont_final = []
###print(add_boxes_coor_into_textlines,'ikki')
# #print(add_boxes_coor_into_textlines,'ikki')
for i in range(len(contours_imgs)):
img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3))
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255))
@ -1311,16 +1311,16 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
##0]
##contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
##if add_boxes_coor_into_textlines:
##print(np.shape(contours_text_rot[0]),'sjppo')
##contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0]
##contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1]
# contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
# 0]
# contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
# if add_boxes_coor_into_textlines:
# print(np.shape(contours_text_rot[0]),'sjppo')
# contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0]
# contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1]
cont_final.append(contours_text_rot[0])
##print(cont_final,'nadizzzz')
# print(cont_final,'nadizzzz')
return None, cont_final
@ -1549,7 +1549,7 @@ def return_deskew_slop(img_patch_org, sigma_des, n_tot_angles=100,
plotter.save_plot_of_textline_density(img_patch_org)
img_int = np.zeros((img_patch_org.shape[0], img_patch_org.shape[1]))
img_int[:, :] = img_patch_org[:, :] #img_patch_org[:,:,0]
img_int[:, :] = img_patch_org[:, :] # img_patch_org[:,:,0]
max_shape = np.max(img_int.shape)
img_resized = np.zeros((int(max_shape * 1.1), int(max_shape * 1.1)))
@ -1557,25 +1557,25 @@ def return_deskew_slop(img_patch_org, sigma_des, n_tot_angles=100,
onset_x = int((img_resized.shape[1] - img_int.shape[1]) / 2.)
onset_y = int((img_resized.shape[0] - img_int.shape[0]) / 2.)
#img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
#img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
# img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
# img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
img_resized[onset_y:onset_y + img_int.shape[0], onset_x:onset_x + img_int.shape[1]] = img_int[:, :]
#print(img_resized.shape,'img_resizedshape')
#plt.imshow(img_resized)
#plt.show()
# print(img_resized.shape,'img_resizedshape')
# plt.imshow(img_resized)
# plt.show()
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
#plt.imshow(img_resized)
#plt.show()
# plt.imshow(img_resized)
# plt.show()
angles = np.array([-45, 0, 45, 90, ])
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
elif main_page:
#plt.imshow(img_resized)
#plt.show()
angles = np.linspace(-12, 12, n_tot_angles) #np.array([0 , 45 , 90 , -45])
# plt.imshow(img_resized)
# plt.show()
angles = np.linspace(-12, 12, n_tot_angles) # np.array([0 , 45 , 90 , -45])
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
early_slope_edge = 11
@ -1672,12 +1672,12 @@ def do_work_of_slopes_new(
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy()
mask_only_con_region = mask_only_con_region[y: y + h, x: x + w]
##plt.imshow(textline_mask_tot_ea)
##plt.show()
##plt.imshow(all_text_region_raw)
##plt.show()
##plt.imshow(mask_only_con_region)
##plt.show()
# plt.imshow(textline_mask_tot_ea)
# plt.show()
# plt.imshow(all_text_region_raw)
# plt.show()
# plt.imshow(mask_only_con_region)
# plt.show()
all_text_region_raw[mask_only_con_region == 0] = 0
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text)
@ -1746,7 +1746,7 @@ def do_work_of_slopes_new_curved(
logger=logger, plotter=plotter)
# new line added
##print(np.shape(textline_rotated_separated),np.shape(mask_biggest))
# print(np.shape(textline_rotated_separated),np.shape(mask_biggest))
textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0
# till here

@ -174,7 +174,7 @@ class EynollahXmlWriter:
if ocr_all_textlines_textregion:
textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])])
text_region.add_TextLine(textline)
#region_bboxes = all_box_coord[region_idx]
# region_bboxes = all_box_coord[region_idx]
points_co = ''
for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]):
if len(contour_textline) == 2:
@ -342,9 +342,9 @@ class EynollahXmlWriter:
points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm],
page_coord)))
page.add_TextRegion(dropcapital)
###all_box_coord_drop = None
###slopes_drop = None
###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
# all_box_coord_drop = None
# slopes_drop = None
# self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
for mm in range(len(found_polygons_text_region_img)):
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(

@ -45,7 +45,7 @@ class CapturingTestCase(TestCase):
"""
self.capture_out_err() # XXX snapshot just before executing the CLI
code = 0
sys.argv[1:] = args # XXX necessary because sys.argv reflects pytest args not cli args
sys.argv[1:] = args # XXX necessary because sys.argv reflects pytest args not cli args
try:
cli.main(args=args)
except SystemExit as e:

Loading…
Cancel
Save