mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 19:59:56 +02:00
strong erosion, more modification
This commit is contained in:
parent
78d29f34c1
commit
44dad6a072
2 changed files with 12 additions and 9 deletions
|
@ -1567,7 +1567,7 @@ class Eynollah:
|
||||||
self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
|
self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
|
||||||
return textline_mask_tot, text_regions_p, image_page_rotated
|
return textline_mask_tot, text_regions_p, image_page_rotated
|
||||||
|
|
||||||
def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier):
|
def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, erosion_hurts):
|
||||||
self.logger.debug('enter run_boxes_no_full_layout')
|
self.logger.debug('enter run_boxes_no_full_layout')
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
_, textline_mask_tot_d, text_regions_p_1_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, slope_deskew)
|
_, textline_mask_tot_d, text_regions_p_1_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, slope_deskew)
|
||||||
|
@ -1598,11 +1598,11 @@ class Eynollah:
|
||||||
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier)
|
boxes = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts)
|
||||||
boxes_d = None
|
boxes_d = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes))
|
self.logger.debug("len(boxes): %s", len(boxes))
|
||||||
else:
|
else:
|
||||||
boxes_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier)
|
boxes_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts)
|
||||||
boxes = None
|
boxes = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes_d))
|
self.logger.debug("len(boxes): %s", len(boxes_d))
|
||||||
|
|
||||||
|
@ -1723,7 +1723,7 @@ class Eynollah:
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
|
||||||
if not self.full_layout:
|
if not self.full_layout:
|
||||||
polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier)
|
polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, erosion_hurts)
|
||||||
|
|
||||||
pixel_img = 4
|
pixel_img = 4
|
||||||
min_area_mar = 0.00001
|
min_area_mar = 0.00001
|
||||||
|
@ -1893,9 +1893,9 @@ class Eynollah:
|
||||||
regions_without_separators_d[(random_pixels_for_image[:, :] == 1) & (text_regions_p_1_n[:, :] == 5)] = 1
|
regions_without_separators_d[(random_pixels_for_image[:, :] == 1) & (text_regions_p_1_n[:, :] == 5)] = 1
|
||||||
|
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier)
|
boxes = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts)
|
||||||
else:
|
else:
|
||||||
boxes_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier)
|
boxes_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts)
|
||||||
|
|
||||||
if self.plotter:
|
if self.plotter:
|
||||||
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
||||||
|
|
|
@ -1579,7 +1579,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, pixel_l
|
||||||
return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n
|
return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n
|
||||||
|
|
||||||
|
|
||||||
def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier):
|
def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts):
|
||||||
boxes=[]
|
boxes=[]
|
||||||
|
|
||||||
|
|
||||||
|
@ -1594,11 +1594,14 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
if 1>0:#len( matrix_new[:,9][matrix_new[:,9]==1] )>0 and np.max(matrix_new[:,8][matrix_new[:,9]==1])>=0.1*(np.abs(splitter_y_new[i+1]-splitter_y_new[i] )):
|
if 1>0:#len( matrix_new[:,9][matrix_new[:,9]==1] )>0 and np.max(matrix_new[:,8][matrix_new[:,9]==1])>=0.1*(np.abs(splitter_y_new[i+1]-splitter_y_new[i] )):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.)
|
if erosion_hurts:
|
||||||
|
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=3.)
|
||||||
|
else:
|
||||||
|
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin=[]
|
peaks_neg_fin=[]
|
||||||
|
|
||||||
#print(peaks_neg_fin,'peaks_neg_fin0')
|
print(peaks_neg_fin,'peaks_neg_fin0')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
peaks_neg_fin_org=np.copy(peaks_neg_fin)
|
peaks_neg_fin_org=np.copy(peaks_neg_fin)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue