diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 7232bb5..3a00c7d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -98,6 +98,8 @@ from .utils.resize import resize_image from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, + box2rect, + box2slice, find_num_col, otsu_copy_binary, put_drop_out_from_only_drop_model, @@ -1519,7 +1521,7 @@ class Eynollah: self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 - def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) M_main_tot = [cv2.moments(polygons_of_textlines[j]) @@ -1542,18 +1544,17 @@ class Eynollah: all_found_textline_polygons.append(textlines_ins[::-1]) slopes.append(slope_deskew) - _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + crop_coor = box2rect(boxes[index]) all_box_coord.append(crop_coor) return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes - def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") results = self.executor.map(partial(do_work_of_slopes_new_light, textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, slope_deskew=slope_deskew,textline_light=self.textline_light, logger=self.logger,), boxes, contours, contours_par, range(len(contours_par))) @@ -1561,13 +1562,12 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) - def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") results = self.executor.map(partial(do_work_of_slopes_new, textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, slope_deskew=slope_deskew, MAX_SLOPE=MAX_SLOPE, KERNEL=KERNEL, @@ -1578,13 +1578,12 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") results = self.executor.map(partial(do_work_of_slopes_new_curved, textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, mask_texts_only=mask_texts_only, num_col=num_col, scale_par=scale_par, @@ -1754,7 +1753,7 @@ class Eynollah: ##polygons_of_images_fin.append(ploy_img_ind) box = cv2.boundingRect(ploy_img_ind) - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -1768,7 +1767,7 @@ class Eynollah: if h < 150 or w < 150: pass else: - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -2933,10 +2932,10 @@ class Eynollah: return slope_deskew def run_marginals( - self, image_page, textline_mask_tot_ea, mask_images, mask_lines, + self, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): - image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] + textline_mask_tot = textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 text_regions_p_1[mask_lines[:, :] == 1] = 3 @@ -2955,10 +2954,7 @@ class Eynollah: except Exception as e: self.logger.error("exception %s", e) - if self.plotter: - self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) - self.plotter.save_plot_of_layout_main(text_regions_p, image_page) - return textline_mask_tot, text_regions_p, image_page_rotated + return textline_mask_tot, text_regions_p def run_boxes_no_full_layout( self, image_page, textline_mask_tot, text_regions_p, @@ -3110,7 +3106,7 @@ class Eynollah: text_regions_p[:,:][table_prediction[:,:]==1] = 10 img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) @@ -3130,7 +3126,7 @@ class Eynollah: else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) @@ -4006,9 +4002,12 @@ class Eynollah: text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - textline_mask_tot, text_regions_p, image_page_rotated = \ - self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, + textline_mask_tot, text_regions_p = \ + self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + if self.plotter: + self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) + self.plotter.save_plot_of_layout_main(text_regions_p, image_page) if self.light_version and num_col_classifier in (1,2): image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) @@ -4017,7 +4016,6 @@ class Eynollah: textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) @@ -4193,11 +4191,11 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, @@ -4217,11 +4215,11 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: @@ -4229,25 +4227,25 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, - image_page_rotated, boxes_text, text_only, + boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, - image_page_rotated, boxes_marginals, text_only, + boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 44086f0..f76c3e1 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1,3 +1,4 @@ +from typing import Tuple import time import math @@ -298,9 +299,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( x_end_with_child_without_mother, new_main_sep_y) +def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]: + return (box[1], box[1] + box[3], + box[0], box[0] + box[2]) + +def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]: + return (slice(box[1], box[1] + box[3]), + slice(box[0], box[0] + box[2])) + def crop_image_inside_box(box, img_org_copy): - image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] - return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + image_box = img_org_copy[box2slice(box)] + return image_box, box2rect(box) def otsu_copy_binary(img): img_r = np.zeros((img.shape[0], img.shape[1], 3)) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index a9a7172..ee2faa7 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -141,12 +141,12 @@ def return_parent_contours(contours, hierarchy): if hierarchy[0][i][3] == -1] return contours_parent -def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): +def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -264,12 +264,12 @@ def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix, map confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) return cnts, confs -def return_contours_of_interested_textline(region_pre_p, pixel): +def return_contours_of_interested_textline(region_pre_p, label): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -292,12 +292,12 @@ def return_contours_of_image(image): contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy -def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): +def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -310,12 +310,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si return contours_imgs -def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): +def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index ffbfff7..dcddc65 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -18,6 +18,8 @@ from .contour import ( from . import ( find_num_col_deskew, crop_image_inside_box, + box2rect, + box2slice, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): @@ -1530,7 +1532,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map def do_work_of_slopes_new( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, + textline_mask_tot_ea, slope_deskew, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1540,7 +1542,7 @@ def do_work_of_slopes_new( logger.debug('enter do_work_of_slopes_new') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline @@ -1588,7 +1590,7 @@ def do_work_of_slopes_new( def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, + textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1631,7 +1633,7 @@ def do_work_of_slopes_new_curved( slope_for_all = slope_deskew slope = slope_for_all - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) if abs(slope_for_all) < 45: textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) @@ -1677,7 +1679,7 @@ def do_work_of_slopes_new_curved( def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, + textline_mask_tot_ea, slope_deskew, textline_light, logger=None ): if logger is None: @@ -1685,7 +1687,7 @@ def do_work_of_slopes_new_light( logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 92e353f..b9e906a 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -268,7 +268,7 @@ class EynollahXmlWriter(): self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): - textregion = TextRegionType(id=counter.next_region_id, type_='header', + textregion = TextRegionType(id=counter.next_region_id, type_='heading', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) @@ -296,7 +296,7 @@ class EynollahXmlWriter(): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) + page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) for mm in range(len(found_polygons_tables)): page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))