mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 11:49:55 +02:00
pep8-e265: comment should start with #
This commit is contained in:
parent
87ae6d11a9
commit
b4d168cae3
9 changed files with 729 additions and 731 deletions
|
@ -291,7 +291,7 @@ class Eynollah:
|
|||
# "/modelens_1_2_4_5_early_lay_1_2_spaltige"
|
||||
# "/model_3_eraly_layout_no_patches_1_2_spaltige"
|
||||
self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"
|
||||
##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans"
|
||||
# self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans"
|
||||
# "/modelens_full_lay_1_3_031124"
|
||||
# "/modelens_full_lay_13__3_19_241024"
|
||||
# "/model_full_lay_13_241024"
|
||||
|
@ -343,7 +343,7 @@ class Eynollah:
|
|||
self.model_region = self.our_load_model(self.model_region_dir_p_ens)
|
||||
self.model_region_p2 = self.our_load_model(self.model_region_dir_p2)
|
||||
self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement)
|
||||
###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new)
|
||||
# self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new)
|
||||
self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np)
|
||||
self.model_region_fl = self.our_load_model(self.model_region_dir_fully)
|
||||
if self.reading_order_machine_based:
|
||||
|
@ -1123,7 +1123,7 @@ class Eynollah:
|
|||
img = resize_image(img, img.shape[0], img_width_model)
|
||||
|
||||
self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
|
||||
##margin = int(marginal_of_patch_percent * img_height_model)
|
||||
# margin = int(marginal_of_patch_percent * img_height_model)
|
||||
# width_mid = img_width_model - 2 * margin
|
||||
# height_mid = img_height_model - 2 * margin
|
||||
img = img / 255.0
|
||||
|
@ -1780,15 +1780,15 @@ class Eynollah:
|
|||
text_regions_p_true[text_regions_p_true.shape[0] - 15:text_regions_p_true.shape[0], :] = 0
|
||||
text_regions_p_true[:, text_regions_p_true.shape[1] - 15:text_regions_p_true.shape[1]] = 0
|
||||
|
||||
##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
|
||||
# polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
|
||||
polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001)
|
||||
image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
|
||||
|
||||
###image_boundary_of_doc[:6, :] = 1
|
||||
###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1
|
||||
# image_boundary_of_doc[:6, :] = 1
|
||||
# image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1
|
||||
|
||||
###image_boundary_of_doc[:, :6] = 1
|
||||
###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1
|
||||
# image_boundary_of_doc[:, :6] = 1
|
||||
# image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1
|
||||
|
||||
polygons_of_images_fin = []
|
||||
for ploy_img_ind in polygons_of_images:
|
||||
|
@ -1802,7 +1802,7 @@ class Eynollah:
|
|||
test_poly_image_intersected_area = test_poly_image_intersected_area.sum()
|
||||
|
||||
if test_poly_image_intersected_area==0:
|
||||
##polygons_of_images_fin.append(ploy_img_ind)
|
||||
# polygons_of_images_fin.append(ploy_img_ind)
|
||||
|
||||
box = cv2.boundingRect(ploy_img_ind)
|
||||
_, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
|
||||
|
@ -1862,23 +1862,23 @@ class Eynollah:
|
|||
# if (not self.input_binary) or self.full_layout:
|
||||
# if self.input_binary:
|
||||
# img_bin = np.copy(img_resized)
|
||||
###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
|
||||
###if not self.dir_in:
|
||||
###self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
|
||||
###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
|
||||
# if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
|
||||
# if not self.dir_in:
|
||||
# self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
|
||||
# prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
|
||||
|
||||
####print("inside bin ", time.time()-t_bin)
|
||||
###prediction_bin=prediction_bin[:,:,0]
|
||||
###prediction_bin = (prediction_bin[:,:]==0)*1
|
||||
###prediction_bin = prediction_bin*255
|
||||
# print("inside bin ", time.time()-t_bin)
|
||||
# prediction_bin=prediction_bin[:,:,0]
|
||||
# prediction_bin = (prediction_bin[:,:]==0)*1
|
||||
# prediction_bin = prediction_bin*255
|
||||
|
||||
###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||
# prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||
|
||||
###prediction_bin = prediction_bin.astype(np.uint16)
|
||||
####img= np.copy(prediction_bin)
|
||||
###img_bin = np.copy(prediction_bin)
|
||||
###else:
|
||||
###img_bin = np.copy(img_resized)
|
||||
# prediction_bin = prediction_bin.astype(np.uint16)
|
||||
# img= np.copy(prediction_bin)
|
||||
# img_bin = np.copy(prediction_bin)
|
||||
# else:
|
||||
# img_bin = np.copy(img_resized)
|
||||
if self.ocr and not self.input_binary:
|
||||
if not self.dir_in:
|
||||
self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
|
||||
|
@ -1892,7 +1892,7 @@ class Eynollah:
|
|||
img_bin = np.copy(img_resized)
|
||||
# print("inside 1 ", time.time()-t_in)
|
||||
|
||||
###textline_mask_tot_ea = self.run_textline(img_bin)
|
||||
# textline_mask_tot_ea = self.run_textline(img_bin)
|
||||
self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape),
|
||||
len(np.unique(img_resized)))
|
||||
textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier)
|
||||
|
@ -1907,7 +1907,7 @@ class Eynollah:
|
|||
# print("inside 2 ", time.time()-t_in)
|
||||
if not self.dir_in:
|
||||
self.model_region_1_2, _ = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np)
|
||||
##self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
|
||||
# self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
|
||||
|
||||
if num_col_classifier == 1 or num_col_classifier == 2:
|
||||
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np)
|
||||
|
@ -1934,7 +1934,7 @@ class Eynollah:
|
|||
prediction_regions_org = self.do_prediction_new_concept(
|
||||
True, img_resized, self.model_region_1_2, n_batch_inference=2,
|
||||
thresholding_for_some_classes_in_light_version=True)
|
||||
###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
|
||||
# prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
|
||||
# print("inside 3 ", time.time()-t_in)
|
||||
# plt.imshow(prediction_regions_org[:,:,0])
|
||||
# plt.show()
|
||||
|
@ -1947,9 +1947,9 @@ class Eynollah:
|
|||
mask_texts_only = (prediction_regions_org[:, :] == 1) * 1
|
||||
mask_texts_only = mask_texts_only.astype('uint8')
|
||||
|
||||
##if num_col_classifier == 1 or num_col_classifier == 2:
|
||||
###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
|
||||
##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1)
|
||||
# if num_col_classifier == 1 or num_col_classifier == 2:
|
||||
# mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
|
||||
# mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1)
|
||||
|
||||
mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2, 2), np.uint8), iterations=1)
|
||||
mask_images_only = (prediction_regions_org[:, :] == 2) * 1
|
||||
|
@ -1978,7 +1978,7 @@ class Eynollah:
|
|||
# sys.exit()
|
||||
|
||||
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
|
||||
##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts)
|
||||
# polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts)
|
||||
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
|
||||
|
||||
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
||||
|
@ -2024,7 +2024,7 @@ class Eynollah:
|
|||
prediction_regions_org_y = prediction_regions_org_y[:, :, 0]
|
||||
mask_zeros_y = (prediction_regions_org_y[:, :] == 0) * 1
|
||||
|
||||
##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1
|
||||
# img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1
|
||||
img_only_regions_with_sep = (prediction_regions_org_y == 1).astype(np.uint8)
|
||||
try:
|
||||
img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=20)
|
||||
|
@ -2285,7 +2285,7 @@ class Eynollah:
|
|||
args_contours = np.array(range(len(arg_text_con)))
|
||||
order_by_con_main = np.zeros(len(arg_text_con))
|
||||
|
||||
############################# head
|
||||
# head
|
||||
|
||||
arg_text_con_h = []
|
||||
for ii in range(len(cx_text_only_h)):
|
||||
|
@ -2880,10 +2880,10 @@ class Eynollah:
|
|||
else:
|
||||
img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6)
|
||||
|
||||
##print(img_only_regions.shape,'img_only_regions')
|
||||
##plt.imshow(img_only_regions[:,:])
|
||||
##plt.show()
|
||||
##num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
||||
# print(img_only_regions.shape,'img_only_regions')
|
||||
# plt.imshow(img_only_regions[:,:])
|
||||
# plt.show()
|
||||
# num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
||||
try:
|
||||
num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
||||
num_col = num_col + 1
|
||||
|
@ -3311,7 +3311,7 @@ class Eynollah:
|
|||
img_revised_tab[:, :][img_revised_tab[:, :] == 10] = 0
|
||||
img_revised_tab[:, :][img_revised_tab2_d_rotated[:, :, 0] == 10] = 10
|
||||
|
||||
##img_revised_tab=img_revised_tab2[:,:,0]
|
||||
# img_revised_tab=img_revised_tab2[:,:,0]
|
||||
# img_revised_tab=text_regions_p[:,:]
|
||||
text_regions_p[:, :][text_regions_p[:, :] == 10] = 0
|
||||
text_regions_p[:, :][img_revised_tab[:, :] == 10] = 10
|
||||
|
@ -3348,11 +3348,11 @@ class Eynollah:
|
|||
# in the new full layout drop capital is 3 and separators are 5
|
||||
|
||||
text_regions_p[:, :][regions_fully[:, :, 0] == 5] = 6
|
||||
###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4
|
||||
# regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4
|
||||
|
||||
# text_regions_p[:,:][regions_fully[:,:,0]==6]=6
|
||||
##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p)
|
||||
##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4
|
||||
# regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p)
|
||||
# regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4
|
||||
drop_capital_label_in_full_layout_model = 3
|
||||
|
||||
drops = (regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model) * 1
|
||||
|
@ -3365,20 +3365,20 @@ class Eynollah:
|
|||
|
||||
regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(
|
||||
regions_fully, drop_capital_label_in_full_layout_model, text_regions_p)
|
||||
##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier)
|
||||
##if num_col_classifier > 2:
|
||||
##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0
|
||||
##else:
|
||||
##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)
|
||||
# regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier)
|
||||
# if num_col_classifier > 2:
|
||||
# regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0
|
||||
# else:
|
||||
# regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)
|
||||
|
||||
###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions)
|
||||
# regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions)
|
||||
# plt.imshow(regions_fully[:,:,0])
|
||||
# plt.show()
|
||||
text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4
|
||||
####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
|
||||
# text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
|
||||
# plt.imshow(text_regions_p)
|
||||
# plt.show()
|
||||
####if not self.tables:
|
||||
# if not self.tables:
|
||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||
_, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(
|
||||
image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
|
||||
|
@ -3572,7 +3572,7 @@ class Eynollah:
|
|||
# plt.figure(ind_tot)
|
||||
# plt.imshow(textline_image)
|
||||
# plt.plot([peaks_final, peaks_final], [0, height-1])
|
||||
##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
|
||||
# plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
|
||||
# plt.savefig('./'+str(ind_tot)+'.png')
|
||||
|
||||
return peaks_final
|
||||
|
@ -3666,14 +3666,14 @@ class Eynollah:
|
|||
|
||||
# print(generated_text1,'generated_text1')
|
||||
# print(generated_text2, 'generated_text2')
|
||||
#print('########################################')
|
||||
# print('##########')
|
||||
else:
|
||||
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
||||
generated_ids = model_ocr.generate(pixel_values.to(device))
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
|
||||
# print(generated_text,'generated_text')
|
||||
#print('########################################')
|
||||
# print('##########')
|
||||
return generated_text
|
||||
|
||||
def return_ocr_of_textline(
|
||||
|
@ -3706,7 +3706,7 @@ class Eynollah:
|
|||
generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
|
||||
# print(generated_text1,'generated_text1')
|
||||
# print(generated_text2, 'generated_text2')
|
||||
#print('########################################')
|
||||
# print('##########')
|
||||
|
||||
match = sq(None, generated_text1, generated_text2).find_longest_match(
|
||||
0, len(generated_text1), 0, len(generated_text2))
|
||||
|
@ -4161,28 +4161,28 @@ class Eynollah:
|
|||
def filter_contours_without_textline_inside(
|
||||
self, contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered):
|
||||
|
||||
###contours_txtline_of_all_textregions = []
|
||||
###for jj in range(len(contours_textline)):
|
||||
###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj]
|
||||
# contours_txtline_of_all_textregions = []
|
||||
# for jj in range(len(contours_textline)):
|
||||
# contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj]
|
||||
|
||||
###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j])
|
||||
### for j in range(len(contours_txtline_of_all_textregions))]
|
||||
###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32))
|
||||
### for j in range(len(M_main_textline))]
|
||||
###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32))
|
||||
### for j in range(len(M_main_textline))]
|
||||
# M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j])
|
||||
# for j in range(len(contours_txtline_of_all_textregions))]
|
||||
# cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32))
|
||||
# for j in range(len(M_main_textline))]
|
||||
# cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32))
|
||||
# for j in range(len(M_main_textline))]
|
||||
|
||||
###M_main = [cv2.moments(contours[j]) for j in range(len(contours))]
|
||||
###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
# M_main = [cv2.moments(contours[j]) for j in range(len(contours))]
|
||||
# cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
# cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
|
||||
###contours_with_textline = []
|
||||
###for ind_tr, con_tr in enumerate(contours):
|
||||
###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False)
|
||||
### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ]
|
||||
###results = np.array(results)
|
||||
###if np.any(results==1):
|
||||
###contours_with_textline.append(con_tr)
|
||||
# contours_with_textline = []
|
||||
# for ind_tr, con_tr in enumerate(contours):
|
||||
# results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False)
|
||||
# for index_textline_con in range(len(contours_txtline_of_all_textregions)) ]
|
||||
# results = np.array(results)
|
||||
# if np.any(results==1):
|
||||
# contours_with_textline.append(con_tr)
|
||||
|
||||
textregion_index_to_del = []
|
||||
for index_textregion, textlines_textregion in enumerate(contours_textline):
|
||||
|
@ -4399,7 +4399,7 @@ class Eynollah:
|
|||
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \
|
||||
self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
|
||||
|
||||
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
|
||||
# all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
|
||||
|
||||
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
|
||||
all_found_textline_polygons = filter_contours_area_of_image(
|
||||
|
@ -4533,21 +4533,21 @@ class Eynollah:
|
|||
|
||||
self.logger.info("detection of marginals took %.1fs", time.time() - t1)
|
||||
# print("text region early 2 marginal in %.1fs", time.time() - t0)
|
||||
## birdan sora chock chakir
|
||||
# birdan sora chock chakir
|
||||
t1 = time.time()
|
||||
if not self.full_layout:
|
||||
polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
|
||||
boxes, boxes_d, polygons_of_marginals, contours_tables = \
|
||||
self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
|
||||
num_col_classifier, table_prediction, erosion_hurts)
|
||||
###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
|
||||
# polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
|
||||
else:
|
||||
polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
|
||||
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
|
||||
self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
|
||||
num_col_classifier, img_only_regions, table_prediction, erosion_hurts,
|
||||
img_bin_light if self.light_version else None)
|
||||
###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
|
||||
# polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
|
||||
if self.light_version:
|
||||
drop_label_in_full_layout = 4
|
||||
textline_mask_tot_ea_org[img_revised_tab == drop_label_in_full_layout] = 0
|
||||
|
@ -4557,7 +4557,7 @@ class Eynollah:
|
|||
text_only_d = (text_regions_p_1_n[:, :] == 1) * 1
|
||||
|
||||
# print("text region early 2 in %.1fs", time.time() - t0)
|
||||
###min_con_area = 0.000005
|
||||
# min_con_area = 0.000005
|
||||
contours_only_text, hir_on_text = return_contours_of_image(text_only)
|
||||
contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text)
|
||||
if len(contours_only_text_parent) > 0:
|
||||
|
@ -4573,13 +4573,13 @@ class Eynollah:
|
|||
contours_only_text_parent = self.return_list_of_contours_with_desired_order(
|
||||
contours_only_text_parent, index_con_parents)
|
||||
|
||||
##try:
|
||||
##contours_only_text_parent = \
|
||||
##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents])
|
||||
##except:
|
||||
##contours_only_text_parent = \
|
||||
##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents])
|
||||
##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents])
|
||||
# try:
|
||||
# contours_only_text_parent = \
|
||||
# list(np.array(contours_only_text_parent,dtype=object)[index_con_parents])
|
||||
# except:
|
||||
# contours_only_text_parent = \
|
||||
# list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents])
|
||||
# areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents])
|
||||
areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(
|
||||
areas_cnt_text_parent, index_con_parents)
|
||||
|
||||
|
@ -4703,7 +4703,7 @@ class Eynollah:
|
|||
boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
|
||||
boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)
|
||||
# print("text region early 5 in %.1fs", time.time() - t0)
|
||||
## birdan sora chock chakir
|
||||
# birdan sora chock chakir
|
||||
if not self.curved_line:
|
||||
if self.light_version:
|
||||
if self.textline_light:
|
||||
|
@ -4723,7 +4723,7 @@ class Eynollah:
|
|||
# self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals,
|
||||
# boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
|
||||
# all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
|
||||
#####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
|
||||
# all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
|
||||
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
|
||||
all_found_textline_polygons)
|
||||
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
||||
|
@ -5152,7 +5152,7 @@ class Eynollah_ocr:
|
|||
out_file_ocr = os.path.join(self.dir_out, file_name + '.xml')
|
||||
img = cv2.imread(dir_img)
|
||||
|
||||
##file_name = Path(dir_xmls).stem
|
||||
# file_name = Path(dir_xmls).stem
|
||||
tree1 = ET.parse(dir_xml, parser=ET.XMLParser(encoding="utf-8"))
|
||||
root1 = tree1.getroot()
|
||||
alltags = [elem.tag for elem in root1.iter()]
|
||||
|
|
|
@ -236,8 +236,8 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(
|
|||
if len(remained_sep_indexes) > 1:
|
||||
# print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)')
|
||||
# print(np.array(mother),'mother')
|
||||
##remained_sep_indexes_without_mother = remained_sep_indexes[mother==0]
|
||||
##remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1]
|
||||
# remained_sep_indexes_without_mother = remained_sep_indexes[mother==0]
|
||||
# remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1]
|
||||
remained_sep_indexes_without_mother = np.array(list(remained_sep_indexes))[np.array(mother) == 0]
|
||||
remained_sep_indexes_with_child_without_mother = np.array(list(remained_sep_indexes))[
|
||||
(np.array(mother) == 0) & (np.array(child) == 1)]
|
||||
|
@ -379,8 +379,8 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
|
|||
|
||||
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
|
||||
regions_without_separators_0 = regions_without_separators.sum(axis=0)
|
||||
##plt.plot(regions_without_separators_0)
|
||||
##plt.show()
|
||||
# plt.plot(regions_without_separators_0)
|
||||
# plt.show()
|
||||
sigma_ = 35 # 70#35
|
||||
meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1]
|
||||
first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0)
|
||||
|
@ -479,7 +479,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
|
|||
num_col = 1
|
||||
peaks_neg_fin = []
|
||||
|
||||
##print(len(peaks_neg_fin))
|
||||
# print(len(peaks_neg_fin))
|
||||
|
||||
diff_peaks = np.abs(np.diff(peaks_neg_fin))
|
||||
cut_off = 400
|
||||
|
@ -514,7 +514,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
|
|||
|
||||
p_u_quarter = len(y) - p_quarter
|
||||
|
||||
##print(num_col,'early')
|
||||
# print(num_col,'early')
|
||||
if num_col == 3:
|
||||
if ((peaks_neg_true[0] > p_g_u and
|
||||
peaks_neg_true[1] > p_g_u) or
|
||||
|
@ -564,15 +564,15 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
|
|||
# plt.plot(peaks_neg_true,z[peaks_neg_true],'*')
|
||||
# plt.plot([0,len(y)], [grenze,grenze])
|
||||
# plt.show()
|
||||
##print(len(peaks_neg_true))
|
||||
# print(len(peaks_neg_true))
|
||||
return len(peaks_neg_true), peaks_neg_true
|
||||
|
||||
|
||||
def find_num_col_only_image(regions_without_separators, multiplier=3.8):
|
||||
regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0)
|
||||
|
||||
##plt.plot(regions_without_separators_0)
|
||||
##plt.show()
|
||||
# plt.plot(regions_without_separators_0)
|
||||
# plt.show()
|
||||
sigma_ = 15
|
||||
|
||||
meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1]
|
||||
|
@ -767,8 +767,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8):
|
|||
def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8):
|
||||
regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0)
|
||||
|
||||
##plt.plot(regions_without_separators_0)
|
||||
##plt.show()
|
||||
# plt.plot(regions_without_separators_0)
|
||||
# plt.show()
|
||||
sigma_ = 35 # 70#35
|
||||
|
||||
z = gaussian_filter1d(regions_without_separators_0, sigma_)
|
||||
|
@ -941,7 +941,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
|
|||
all_box_coord, all_found_textline_polygons,
|
||||
slopes,
|
||||
contours_only_text_parent_d_ordered):
|
||||
### to make it faster
|
||||
# to make it faster
|
||||
h_o = regions_model_1.shape[0]
|
||||
w_o = regions_model_1.shape[1]
|
||||
zoom = 3
|
||||
|
@ -953,7 +953,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
|
|||
interpolation=cv2.INTER_NEAREST)
|
||||
contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent]
|
||||
|
||||
###
|
||||
cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
|
||||
find_new_features_of_contours(contours_only_text_parent)
|
||||
|
||||
|
@ -1004,14 +1003,13 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
|
|||
|
||||
# print(all_pixels,pixels_main,pixels_header)
|
||||
|
||||
### to make it faster
|
||||
# to make it faster
|
||||
regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST)
|
||||
# regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom,
|
||||
# regions_model_full.shape[0] // zoom),
|
||||
# interpolation=cv2.INTER_NEAREST)
|
||||
contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head]
|
||||
contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main]
|
||||
###
|
||||
|
||||
return (regions_model_1,
|
||||
contours_only_text_parent_main,
|
||||
|
@ -1042,11 +1040,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
|
|||
textlines_tot.append(np.array(textlines_con[m1][nn], dtype=np.int32))
|
||||
textlines_tot_org_form.append(textlines_con[m1][nn])
|
||||
|
||||
##img_text_all=np.zeros((textline_iamge.shape[0],textline_iamge.shape[1]))
|
||||
##img_text_all=cv2.fillPoly(img_text_all, pts =textlines_tot , color=(1,1,1))
|
||||
# img_text_all=np.zeros((textline_iamge.shape[0],textline_iamge.shape[1]))
|
||||
# img_text_all=cv2.fillPoly(img_text_all, pts =textlines_tot , color=(1,1,1))
|
||||
|
||||
##plt.imshow(img_text_all)
|
||||
##plt.show()
|
||||
# plt.imshow(img_text_all)
|
||||
# plt.show()
|
||||
areas_cnt_text = np.array([cv2.contourArea(textlines_tot[j])
|
||||
for j in range(len(textlines_tot))])
|
||||
areas_cnt_text = areas_cnt_text / float(textline_iamge.shape[0] * textline_iamge.shape[1])
|
||||
|
@ -1152,8 +1150,8 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
|
|||
|
||||
|
||||
def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
||||
##plt.imshow(textline_mask)
|
||||
##plt.show()
|
||||
# plt.imshow(textline_mask)
|
||||
# plt.show()
|
||||
"""
|
||||
print(len(contours_main),'contours_main')
|
||||
mada_n=textline_mask.sum(axis=1)
|
||||
|
@ -1164,8 +1162,8 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
|||
x=np.arange(len(y))
|
||||
|
||||
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
|
||||
##plt.imshow(textline_mask[:,:])
|
||||
##plt.show()
|
||||
# plt.imshow(textline_mask[:,:])
|
||||
# plt.show()
|
||||
|
||||
sigma_gaus=8
|
||||
z= gaussian_filter1d(y_help, sigma_gaus)
|
||||
|
@ -1200,8 +1198,8 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
|||
peaks_neg = peaks_neg - 20 - 20
|
||||
peaks = peaks - 20
|
||||
|
||||
##plt.plot(z)
|
||||
##plt.show()
|
||||
# plt.plot(z)
|
||||
# plt.show()
|
||||
if contours_main is not None:
|
||||
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||
|
@ -1279,7 +1277,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
|||
final_types.append(int(ind_in_type[j]))
|
||||
final_index_type.append(int(ind_ind_type[j]))
|
||||
|
||||
##matrix_of_orders[:len_main,4]=final_indexers_sorted[:]
|
||||
# matrix_of_orders[:len_main,4]=final_indexers_sorted[:]
|
||||
|
||||
# This fix is applied if the sum of the lengths of contours and contours_h
|
||||
# does not match final_indexers_sorted. However, this is not the optimal solution..
|
||||
|
@ -1865,8 +1863,8 @@ def return_boxes_of_images_by_order_of_reading_new(
|
|||
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
|
||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (
|
||||
len(columns_not_covered) + len(x_start_without_mother)))
|
||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
x_starting = np.append(x_starting, columns_not_covered)
|
||||
x_starting = np.append(x_starting, x_start_without_mother)
|
||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
||||
|
@ -1909,8 +1907,8 @@ def return_boxes_of_images_by_order_of_reading_new(
|
|||
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
|
||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (
|
||||
len(columns_not_covered) + len(x_start_without_mother)))
|
||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
x_starting = np.append(x_starting, columns_not_covered)
|
||||
x_starting = np.append(x_starting, x_start_without_mother)
|
||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
||||
|
@ -2110,8 +2108,8 @@ def return_boxes_of_images_by_order_of_reading_new(
|
|||
columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
|
||||
|
||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
|
||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
x_starting = np.append(x_starting, columns_not_covered)
|
||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
||||
if len(new_main_sep_y) > 0:
|
||||
|
@ -2124,8 +2122,8 @@ def return_boxes_of_images_by_order_of_reading_new(
|
|||
all_columns = np.arange(len(peaks_neg_tot) - 1)
|
||||
columns_not_covered = list(set(all_columns))
|
||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
# y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
||||
# x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||
x_starting = np.append(x_starting, columns_not_covered)
|
||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
|
|||
polygon = geometry.Polygon([point[0] for point in c])
|
||||
# area = cv2.contourArea(c)
|
||||
area = polygon.area
|
||||
##print(np.prod(thresh.shape[:2]))
|
||||
# print(np.prod(thresh.shape[:2]))
|
||||
# Check that polygon has area greater than minimal area
|
||||
# print(hierarchy[0][jv][3],hierarchy )
|
||||
if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and
|
||||
|
@ -200,7 +200,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
|||
|
||||
# print(img.shape,'img')
|
||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||
##print(img_copy.shape,'img_copy')
|
||||
# print(img_copy.shape,'img_copy')
|
||||
# plt.imshow(img_copy)
|
||||
# plt.show()
|
||||
|
||||
|
@ -258,7 +258,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
|
|||
if not len(cnts):
|
||||
return []
|
||||
img = cv2.resize(img, (int(img.shape[1] / 6), int(img.shape[0] / 6)), interpolation=cv2.INTER_NEAREST)
|
||||
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||
# cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||
# cnts = cnts/2
|
||||
cnts = [(i / 6).astype(np.int) for i in cnts]
|
||||
results = map(partial(do_back_rotation_and_get_cnt_back,
|
||||
|
|
|
@ -48,7 +48,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
|
||||
# plt.imshow(img_con[:,:,0])
|
||||
# plt.show()
|
||||
##img_con=cv2.dilate(img_con, kernel, iterations=30)
|
||||
# img_con=cv2.dilate(img_con, kernel, iterations=30)
|
||||
|
||||
# plt.imshow(img_con[:,:,0])
|
||||
# plt.show()
|
||||
|
@ -193,10 +193,10 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
img_textlines = img_textlines.astype(np.uint8)
|
||||
|
||||
contours_combined = return_contours_of_interested_region(img_textlines, 255, 0)
|
||||
##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
##contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
# contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
areas_cnt_text = np.array(
|
||||
[cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
@ -209,7 +209,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
# print(np.shape(contours_biggest),'contours_biggest')
|
||||
# print(np.shape(all_found_textline_polygons[int(region_final)][arg_min]))
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
if len(contours_combined) == 1:
|
||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
elif len(contours_combined) == 2:
|
||||
|
@ -266,7 +266,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] # -all_box_coord[int(region_final)][2]
|
||||
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0]
|
||||
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
if len(contours_combined) == 1:
|
||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
elif len(contours_combined) == 2:
|
||||
|
@ -281,49 +281,49 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
else:
|
||||
pass
|
||||
|
||||
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
###print(all_box_coord[j_cont])
|
||||
###print(cx_t)
|
||||
###print(cy_t)
|
||||
###print(cx_d[i_drop])
|
||||
###print(cy_d[i_drop])
|
||||
##y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
# print(cx_d[i_drop])
|
||||
# print(cy_d[i_drop])
|
||||
# y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
|
||||
|
||||
##y_lines[y_lines<y_min_d[i_drop]]=0
|
||||
###print(y_lines)
|
||||
# y_lines[y_lines<y_min_d[i_drop]]=0
|
||||
# print(y_lines)
|
||||
|
||||
##arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
###print(arg_min)
|
||||
# arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
# print(arg_min)
|
||||
|
||||
##cnt_nearest=np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
##cnt_nearest[:,0,0]=all_found_textline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
|
||||
##cnt_nearest[:,0,1]=all_found_textline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
|
||||
# cnt_nearest=np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
# cnt_nearest[:,0,0]=all_found_textline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
|
||||
# cnt_nearest[:,0,1]=all_found_textline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
|
||||
|
||||
##img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
##img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
##img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
# img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
# img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
# img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
|
||||
##img_textlines=img_textlines.astype(np.uint8)
|
||||
# img_textlines=img_textlines.astype(np.uint8)
|
||||
|
||||
##plt.imshow(img_textlines)
|
||||
##plt.show()
|
||||
##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
# plt.imshow(img_textlines)
|
||||
# plt.show()
|
||||
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
##contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
# contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
##print(len(contours_combined),'len textlines mixed')
|
||||
##areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
# print(len(contours_combined),'len textlines mixed')
|
||||
# areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
##contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||
# contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
###print(np.shape(contours_biggest))
|
||||
###print(contours_biggest[:])
|
||||
##contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
||||
##contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
# print(np.shape(contours_biggest))
|
||||
# print(contours_biggest[:])
|
||||
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
||||
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
##all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
# all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
|
||||
else:
|
||||
if len(region_with_intersected_drop) > 1:
|
||||
|
@ -469,69 +469,69 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
else:
|
||||
pass
|
||||
|
||||
#####for i_drop in range(len(polygons_of_drop_capitals)):
|
||||
#####for j_cont in range(len(contours_only_text_parent)):
|
||||
#####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
#####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
#####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
|
||||
# for i_drop in range(len(polygons_of_drop_capitals)):
|
||||
# for j_cont in range(len(contours_only_text_parent)):
|
||||
# img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
# img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
# img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
|
||||
|
||||
#####img_con=img_con.astype(np.uint8)
|
||||
######imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
|
||||
######ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
# img_con=img_con.astype(np.uint8)
|
||||
# #imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
|
||||
# #ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
######contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
# #contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
#####contours_new,hir_new=return_contours_of_image(img_con)
|
||||
#####contours_new_parent=return_parent_contours( contours_new,hir_new)
|
||||
######plt.imshow(img_con)
|
||||
######plt.show()
|
||||
#####try:
|
||||
#####if len(contours_new_parent)==1:
|
||||
######print(all_found_textline_polygons[j_cont][0])
|
||||
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
|
||||
######print(all_box_coord[j_cont])
|
||||
######print(cx_t)
|
||||
######print(cy_t)
|
||||
######print(cx_d[i_drop])
|
||||
######print(cy_d[i_drop])
|
||||
#####y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
|
||||
# contours_new,hir_new=return_contours_of_image(img_con)
|
||||
# contours_new_parent=return_parent_contours( contours_new,hir_new)
|
||||
# #plt.imshow(img_con)
|
||||
# #plt.show()
|
||||
# try:
|
||||
# if len(contours_new_parent)==1:
|
||||
# #print(all_found_textline_polygons[j_cont][0])
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
|
||||
# #print(all_box_coord[j_cont])
|
||||
# #print(cx_t)
|
||||
# #print(cy_t)
|
||||
# #print(cx_d[i_drop])
|
||||
# #print(cy_d[i_drop])
|
||||
# y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
|
||||
|
||||
######print(y_lines)
|
||||
# #print(y_lines)
|
||||
|
||||
#####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
######print(arg_min)
|
||||
# arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
# #print(arg_min)
|
||||
|
||||
#####cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min])
|
||||
#####cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
|
||||
#####cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
|
||||
# cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min])
|
||||
# cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
|
||||
# cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
|
||||
|
||||
#####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
#####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
#####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
# img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
# img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
# img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
|
||||
#####img_textlines=img_textlines.astype(np.uint8)
|
||||
#####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
#####ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
# img_textlines=img_textlines.astype(np.uint8)
|
||||
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
#####contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
# contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
#####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
# areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
#####contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||
# contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
######print(np.shape(contours_biggest))
|
||||
######print(contours_biggest[:])
|
||||
#####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
|
||||
#####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
|
||||
# #print(np.shape(contours_biggest))
|
||||
# #print(contours_biggest[:])
|
||||
# contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
|
||||
# contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
|
||||
|
||||
#####all_found_textline_polygons[j_cont][arg_min]=contours_biggest
|
||||
######print(contours_biggest)
|
||||
######plt.imshow(img_textlines[:,:,0])
|
||||
######plt.show()
|
||||
#####else:
|
||||
#####pass
|
||||
#####except:
|
||||
#####pass
|
||||
# all_found_textline_polygons[j_cont][arg_min]=contours_biggest
|
||||
# #print(contours_biggest)
|
||||
# #plt.imshow(img_textlines[:,:,0])
|
||||
# #plt.show()
|
||||
# else:
|
||||
# pass
|
||||
# except:
|
||||
# pass
|
||||
return all_found_textline_polygons
|
||||
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
|||
mask_marginals = mask_marginals.astype(np.uint8)
|
||||
|
||||
text_with_lines = text_with_lines.astype(np.uint8)
|
||||
##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
|
||||
# text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
|
||||
|
||||
text_with_lines_eroded = cv2.erode(text_with_lines, kernel, iterations=5)
|
||||
|
||||
|
@ -199,9 +199,9 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
|||
text_regions[:, int(max_point_of_right_marginal):][
|
||||
text_regions[:, int(max_point_of_right_marginal):] == 1] = 0
|
||||
|
||||
###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
|
||||
# text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
|
||||
|
||||
###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
|
||||
# text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
|
||||
# plt.plot(region_sum_0)
|
||||
# plt.plot(peaks,region_sum_0[peaks],'*')
|
||||
# plt.show()
|
||||
|
|
|
@ -279,21 +279,21 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
peaks_new_tot.append(i1)
|
||||
peaks_new_tot = np.sort(peaks_new_tot)
|
||||
|
||||
##plt.plot(y_padded_up_to_down_padded)
|
||||
##plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
||||
##plt.show()
|
||||
# plt.plot(y_padded_up_to_down_padded)
|
||||
# plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
||||
# plt.show()
|
||||
|
||||
##plt.plot(y_padded_up_to_down_padded)
|
||||
##plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
|
||||
##plt.show()
|
||||
# plt.plot(y_padded_up_to_down_padded)
|
||||
# plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
|
||||
# plt.show()
|
||||
|
||||
##plt.plot(y_padded_smoothed)
|
||||
##plt.plot(peaks,y_padded_smoothed[peaks],'*')
|
||||
##plt.show()
|
||||
# plt.plot(y_padded_smoothed)
|
||||
# plt.plot(peaks,y_padded_smoothed[peaks],'*')
|
||||
# plt.show()
|
||||
|
||||
##plt.plot(y_padded_smoothed)
|
||||
##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
|
||||
##plt.show()
|
||||
# plt.plot(y_padded_smoothed)
|
||||
# plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
|
||||
# plt.show()
|
||||
peaks = peaks_new_tot[:]
|
||||
peaks_neg = peaks_neg_new[:]
|
||||
else:
|
||||
|
@ -327,29 +327,29 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||
|
||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = y_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = y_max_cont - 1 # peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = y_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = y_max_cont - 1 # peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
|
||||
else:
|
||||
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||
|
||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(
|
||||
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(
|
||||
1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
1.33 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
|
||||
|
||||
if point_down_narrow >= img_patch.shape[0]:
|
||||
point_down_narrow = img_patch.shape[0] - 2
|
||||
|
@ -705,29 +705,29 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||
|
||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1 # peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = x_max_cont - 1 # peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
1.4 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
|
||||
else:
|
||||
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||
|
||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(
|
||||
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
else:
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||
point_down = peaks[jj] + first_nonzero + int(
|
||||
1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||
1.33 * dis_to_next_down) # #-int(dis_to_next_down*1./4.0)
|
||||
|
||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
||||
1.1 * dis_to_next_down) # #-int(dis_to_next_down*1./2)
|
||||
|
||||
if point_down_narrow >= img_patch.shape[0]:
|
||||
point_down_narrow = img_patch.shape[0] - 2
|
||||
|
@ -1104,8 +1104,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|||
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
||||
peaks_values = y_padded_smoothed[peaks]
|
||||
|
||||
###peaks_neg = peaks_neg - 20 - 20
|
||||
###peaks = peaks - 20
|
||||
# #peaks_neg = peaks_neg - 20 - 20
|
||||
# #peaks = peaks - 20
|
||||
peaks_neg_true = peaks_neg[:]
|
||||
peaks_pos_true = peaks[:]
|
||||
|
||||
|
@ -1125,14 +1125,14 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|||
peaks_pos_true = peaks_pos_true - 20
|
||||
|
||||
for i in range(len(peaks_pos_true)):
|
||||
##img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1
|
||||
# img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1
|
||||
img_patch[peaks_pos_true[i] - 6: peaks_pos_true[i] + 6, :] = 1
|
||||
else:
|
||||
pass
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
|
||||
# img_patch = cv2.erode(img_patch,kernel,iterations = 3)
|
||||
#######################img_patch = cv2.erode(img_patch,kernel,iterations = 2)
|
||||
# # # # # # # # # # # #img_patch = cv2.erode(img_patch,kernel,iterations = 2)
|
||||
img_patch = cv2.erode(img_patch, kernel, iterations=1)
|
||||
return img_patch
|
||||
|
||||
|
@ -1156,8 +1156,8 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|||
|
||||
mada_n = img_path.sum(axis=1)
|
||||
|
||||
##plt.plot(mada_n)
|
||||
##plt.show()
|
||||
# plt.plot(mada_n)
|
||||
# plt.show()
|
||||
|
||||
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
|
||||
|
||||
|
@ -1252,7 +1252,7 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|||
|
||||
#plt.xlabel('Dichte')
|
||||
#plt.ylabel('Y')
|
||||
##plt.plot([0,len(y)], [grenze,grenze])
|
||||
# plt.plot([0,len(y)], [grenze,grenze])
|
||||
#plt.show()
|
||||
"""
|
||||
peaks_neg_true = peaks_neg_true - 20 - 20
|
||||
|
@ -1300,7 +1300,7 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
|||
contours_imgs, hierarchy,
|
||||
max_area=max_area, min_area=min_area)
|
||||
cont_final = []
|
||||
###print(add_boxes_coor_into_textlines,'ikki')
|
||||
# #print(add_boxes_coor_into_textlines,'ikki')
|
||||
for i in range(len(contours_imgs)):
|
||||
img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3))
|
||||
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255))
|
||||
|
@ -1311,16 +1311,16 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
|||
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
|
||||
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
|
||||
##0]
|
||||
##contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
|
||||
##if add_boxes_coor_into_textlines:
|
||||
##print(np.shape(contours_text_rot[0]),'sjppo')
|
||||
##contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0]
|
||||
##contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1]
|
||||
# contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
|
||||
# 0]
|
||||
# contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
|
||||
# if add_boxes_coor_into_textlines:
|
||||
# print(np.shape(contours_text_rot[0]),'sjppo')
|
||||
# contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0]
|
||||
# contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1]
|
||||
cont_final.append(contours_text_rot[0])
|
||||
|
||||
##print(cont_final,'nadizzzz')
|
||||
# print(cont_final,'nadizzzz')
|
||||
return None, cont_final
|
||||
|
||||
|
||||
|
@ -1672,12 +1672,12 @@ def do_work_of_slopes_new(
|
|||
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy()
|
||||
mask_only_con_region = mask_only_con_region[y: y + h, x: x + w]
|
||||
|
||||
##plt.imshow(textline_mask_tot_ea)
|
||||
##plt.show()
|
||||
##plt.imshow(all_text_region_raw)
|
||||
##plt.show()
|
||||
##plt.imshow(mask_only_con_region)
|
||||
##plt.show()
|
||||
# plt.imshow(textline_mask_tot_ea)
|
||||
# plt.show()
|
||||
# plt.imshow(all_text_region_raw)
|
||||
# plt.show()
|
||||
# plt.imshow(mask_only_con_region)
|
||||
# plt.show()
|
||||
|
||||
all_text_region_raw[mask_only_con_region == 0] = 0
|
||||
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text)
|
||||
|
@ -1746,7 +1746,7 @@ def do_work_of_slopes_new_curved(
|
|||
logger=logger, plotter=plotter)
|
||||
|
||||
# new line added
|
||||
##print(np.shape(textline_rotated_separated),np.shape(mask_biggest))
|
||||
# print(np.shape(textline_rotated_separated),np.shape(mask_biggest))
|
||||
textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0
|
||||
# till here
|
||||
|
||||
|
|
|
@ -342,9 +342,9 @@ class EynollahXmlWriter:
|
|||
points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm],
|
||||
page_coord)))
|
||||
page.add_TextRegion(dropcapital)
|
||||
###all_box_coord_drop = None
|
||||
###slopes_drop = None
|
||||
###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
|
||||
# all_box_coord_drop = None
|
||||
# slopes_drop = None
|
||||
# self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
|
||||
|
||||
for mm in range(len(found_polygons_text_region_img)):
|
||||
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue