mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-05-01 03:32:00 +02:00
get_textlines_of_a_textregion_sorted: w-h instead of w/h test
This commit is contained in:
parent
9723dfeb73
commit
be61875d6e
1 changed files with 27 additions and 31 deletions
|
|
@ -939,7 +939,7 @@ class Eynollah:
|
||||||
if len(diff_cy)>0:
|
if len(diff_cy)>0:
|
||||||
mean_y_diff = np.mean(diff_cy)
|
mean_y_diff = np.mean(diff_cy)
|
||||||
mean_x_diff = np.mean(diff_cx)
|
mean_x_diff = np.mean(diff_cx)
|
||||||
count_hor = np.count_nonzero(np.array(w_h_textline) > 1)
|
count_hor = np.count_nonzero(np.diff(w_h_textline) > 0)
|
||||||
count_ver = len(w_h_textline) - count_hor
|
count_ver = len(w_h_textline) - count_hor
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -1000,29 +1000,27 @@ class Eynollah:
|
||||||
def get_slopes_and_deskew_new_light2(self, contours_par, textline_mask_tot, boxes, slope_deskew):
|
def get_slopes_and_deskew_new_light2(self, contours_par, textline_mask_tot, boxes, slope_deskew):
|
||||||
|
|
||||||
polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot, 1, 0.00001)
|
polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot, 1, 0.00001)
|
||||||
cx_main_tot, cy_main_tot = find_center_of_contours(polygons_of_textlines)
|
cx_textlines, cy_textlines = find_center_of_contours(polygons_of_textlines)
|
||||||
w_h_textlines = [cv2.boundingRect(polygon)[2:] for polygon in polygons_of_textlines]
|
w_h_textlines = [cv2.boundingRect(polygon)[2:] for polygon in polygons_of_textlines]
|
||||||
|
|
||||||
args_textlines = np.arange(len(polygons_of_textlines))
|
args_textlines = np.arange(len(polygons_of_textlines))
|
||||||
|
|
||||||
all_found_textline_polygons = []
|
all_found_textline_polygons = []
|
||||||
slopes = []
|
slopes = []
|
||||||
|
for index, contour in enumerate(contours_par):
|
||||||
for index, con_region_ind in enumerate(contours_par):
|
results = [cv2.pointPolygonTest(contour,
|
||||||
results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False)
|
(cx_textlines[ind],
|
||||||
|
cy_textlines[ind]),
|
||||||
|
False)
|
||||||
for ind in args_textlines]
|
for ind in args_textlines]
|
||||||
results = np.array(results)
|
results = np.array(results)
|
||||||
indexes_in = args_textlines[results == 1]
|
indexes_in = args_textlines[results == 1]
|
||||||
textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in]
|
textlines_in = self.get_textlines_of_a_textregion_sorted(
|
||||||
cx_textline_in = [cx_main_tot[ind] for ind in indexes_in]
|
[polygons_of_textlines[ind] for ind in indexes_in],
|
||||||
cy_textline_in = [cy_main_tot[ind] for ind in indexes_in]
|
[cx_textlines[ind] for ind in indexes_in],
|
||||||
w_h_textlines_in = [w_h_textlines[ind][0] / float(w_h_textlines[ind][1]) for ind in indexes_in]
|
[cy_textlines[ind] for ind in indexes_in],
|
||||||
|
[w_h_textlines[ind] for ind in indexes_in])
|
||||||
|
|
||||||
textlines_ins = self.get_textlines_of_a_textregion_sorted(textlines_ins,
|
all_found_textline_polygons.append(textlines_in) #[::-1])
|
||||||
cx_textline_in,
|
|
||||||
cy_textline_in,
|
|
||||||
w_h_textlines_in)
|
|
||||||
|
|
||||||
all_found_textline_polygons.append(textlines_ins)#[::-1])
|
|
||||||
slopes.append(slope_deskew)
|
slopes.append(slope_deskew)
|
||||||
|
|
||||||
return all_found_textline_polygons, slopes
|
return all_found_textline_polygons, slopes
|
||||||
|
|
@ -1631,7 +1629,9 @@ class Eynollah:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
#print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew')
|
#print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew')
|
||||||
slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True,
|
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2)
|
||||||
|
slope_deskew = return_deskew_slop(textline_mask_tot_ea, 2,
|
||||||
|
n_tot_angles=30, main_page=True,
|
||||||
logger=self.logger, plotter=self.plotter)
|
logger=self.logger, plotter=self.plotter)
|
||||||
self.logger.info("slope_deskew: %.2f°", slope_deskew)
|
self.logger.info("slope_deskew: %.2f°", slope_deskew)
|
||||||
return slope_deskew
|
return slope_deskew
|
||||||
|
|
@ -2235,28 +2235,23 @@ class Eynollah:
|
||||||
all_found_textline_polygons = filter_contours_area_of_image(
|
all_found_textline_polygons = filter_contours_area_of_image(
|
||||||
textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
||||||
|
|
||||||
cx_main_tot, cy_main_tot = find_center_of_contours(all_found_textline_polygons)
|
cx_textlines, cy_textlines = find_center_of_contours(all_found_textline_polygons)
|
||||||
w_h_textlines = [cv2.boundingRect(polygon)[2:]
|
w_h_textlines = [cv2.boundingRect(polygon)[2:]
|
||||||
for polygon in all_found_textline_polygons]
|
for polygon in all_found_textline_polygons]
|
||||||
w_h_textlines = [w / float(h) for w, h in w_h_textlines]
|
|
||||||
|
|
||||||
all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted(
|
all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted(
|
||||||
#all_found_textline_polygons[::-1]
|
#all_found_textline_polygons[::-1]
|
||||||
all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines)
|
all_found_textline_polygons, cx_textlines, cy_textlines, w_h_textlines)
|
||||||
all_found_textline_polygons = [all_found_textline_polygons]
|
all_found_textline_polygons = [all_found_textline_polygons]
|
||||||
all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
|
all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
|
||||||
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
||||||
all_found_textline_polygons, None, None, type_contour="textline")
|
all_found_textline_polygons, None, None, type_contour="textline")
|
||||||
|
|
||||||
order_text_new = [0]
|
|
||||||
slopes =[0]
|
|
||||||
conf_contours_textregions =[0]
|
|
||||||
|
|
||||||
pcgts = writer.build_pagexml_no_full_layout(
|
pcgts = writer.build_pagexml_no_full_layout(
|
||||||
num_col=num_col_classifier,
|
num_col=num_col_classifier,
|
||||||
found_polygons_text_region=cont_page,
|
found_polygons_text_region=cont_page,
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=order_text_new,
|
page_slope=0,
|
||||||
|
order_of_texts=[0],
|
||||||
all_found_textline_polygons=all_found_textline_polygons,
|
all_found_textline_polygons=all_found_textline_polygons,
|
||||||
found_polygons_images=[],
|
found_polygons_images=[],
|
||||||
found_polygons_tables=[],
|
found_polygons_tables=[],
|
||||||
|
|
@ -2264,11 +2259,12 @@ class Eynollah:
|
||||||
found_polygons_marginals_right=[],
|
found_polygons_marginals_right=[],
|
||||||
all_found_textline_polygons_marginals_left=[],
|
all_found_textline_polygons_marginals_left=[],
|
||||||
all_found_textline_polygons_marginals_right=[],
|
all_found_textline_polygons_marginals_right=[],
|
||||||
slopes=slopes,
|
slopes=[0],
|
||||||
slopes_marginals_left=[],
|
slopes_marginals_left=[],
|
||||||
slopes_marginals_right=[],
|
slopes_marginals_right=[],
|
||||||
cont_page=cont_page,
|
cont_page=cont_page,
|
||||||
polygons_seplines=[],
|
polygons_seplines=[],
|
||||||
|
conf_textregions=[0],
|
||||||
skip_layout_reading_order=True
|
skip_layout_reading_order=True
|
||||||
)
|
)
|
||||||
self.logger.info("Basic processing complete")
|
self.logger.info("Basic processing complete")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue