From 97bc57be35a21f2080542c9bae8b963da8fabdb3 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 23 Nov 2020 11:37:32 +0100 Subject: [PATCH] Use boolean values for eynollah's flags --- sbb_newspapers_org_image/cli.py | 10 ++-- sbb_newspapers_org_image/eynollah.py | 68 ++++++++++++++-------------- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/sbb_newspapers_org_image/cli.py b/sbb_newspapers_org_image/cli.py index 291388f..03d0167 100644 --- a/sbb_newspapers_org_image/cli.py +++ b/sbb_newspapers_org_image/cli.py @@ -45,26 +45,31 @@ from sbb_newspapers_org_image.eynollah import eynollah @click.option( "--allow_enhancement", "-ae", + is_flag=True, help="if this parameter set to true, this tool would check that input image need resizing and enhancement or not. If so output of resized and enhanced image and corresponding layout data will be written in out directory", ) @click.option( "--curved_line", "-cl", + is_flag=True, help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.", ) @click.option( "--full_layout", "-fl", + is_flag=True, help="if this parameter set to true, this tool will try to return all elements of layout.", ) @click.option( "--allow_scaling", "-as", + is_flag=True, help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection", ) @click.option( "--headers_off", "-ho", + is_flag=True, help="if this parameter set to true, this tool would ignore headers role in reading order", ) def main( @@ -81,7 +86,7 @@ def main( allow_scaling, headers_off, ): - x = eynollah( + eynollah( image, None, out, @@ -95,8 +100,7 @@ def main( full_layout, allow_scaling, headers_off, - ) - x.run() + ).run() if __name__ == "__main__": diff --git a/sbb_newspapers_org_image/eynollah.py b/sbb_newspapers_org_image/eynollah.py index eecd33c..5bbaeb3 100644 --- a/sbb_newspapers_org_image/eynollah.py +++ b/sbb_newspapers_org_image/eynollah.py @@ -49,11 +49,11 @@ class eynollah: dir_of_layout=None, dir_of_deskewed=None, dir_of_all=None, - allow_enhancement="false", - curved_line="false", - full_layout="false", - allow_scaling="false", - headers_off="false" + allow_enhancement=False, + curved_line=False, + full_layout=False, + allow_scaling=False, + headers_off=False ): self.image_dir = image_dir # XXX This does not seem to be a directory as the name suggests, but a file self.dir_out = dir_out @@ -4875,7 +4875,7 @@ class eynollah: for l in range(len(all_found_texline_polygons[mm][j])): # point = ET.SubElement(coord, 'Point') - if curved_line is None or curved_line == "false" or curved_line == "False" or curved_line == "FALSE": + if not self.curved_line: # point.set('x',str(found_polygons[j][l][0])) # point.set('y',str(found_polygons[j][l][1])) if len(all_found_texline_polygons[mm][j][l]) == 2: @@ -4887,7 +4887,7 @@ class eynollah: points_co = points_co + "," points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0][1] + all_box_coord[mm][0] + page_coord[0]) / self.scale_y)) - if (curved_line == "true" or curved_line == "True" or curved_line == "TRUE") and np.abs(slopes[mm]) <= 45: + if (self.curved_line) and np.abs(slopes[mm]) <= 45: if len(all_found_texline_polygons[mm][j][l]) == 2: points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0] + page_coord[2]) / self.scale_x)) points_co = points_co + "," @@ -4896,7 +4896,7 @@ class eynollah: points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0][0] + page_coord[2]) / self.scale_x)) points_co = points_co + "," points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0][1] + page_coord[0]) / self.scale_y)) - elif (curved_line == "true" or curved_line == "True" or curved_line == "TRUE") and np.abs(slopes[mm]) > 45: + elif (self.curved_line) and np.abs(slopes[mm]) > 45: if len(all_found_texline_polygons[mm][j][l]) == 2: points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x)) points_co = points_co + "," @@ -4974,7 +4974,7 @@ class eynollah: for l in range(len(all_found_texline_polygons_h[mm][j])): # point = ET.SubElement(coord, 'Point') - if curved_line is None or curved_line == "false" or curved_line == "False" or curved_line == "FALSE": + if not self.curved_line: # point.set('x',str(found_polygons[j][l][0])) # point.set('y',str(found_polygons[j][l][1])) if len(all_found_texline_polygons_h[mm][j][l]) == 2: @@ -4986,7 +4986,7 @@ class eynollah: points_co = points_co + "," points_co = points_co + str(int((all_found_texline_polygons_h[mm][j][l][0][1] + all_box_coord_h[mm][0] + page_coord[0]) / self.scale_y)) - if curved_line == "true" or curved_line == "True" or curved_line == "TRUE": + if self.curved_line: if len(all_found_texline_polygons_h[mm][j][l]) == 2: points_co = points_co + str(int((all_found_texline_polygons_h[mm][j][l][0] + page_coord[2]) / self.scale_x)) points_co = points_co + "," @@ -5061,7 +5061,7 @@ class eynollah: ##for l in range(len(all_found_texline_polygons_h[mm][j])): ###point = ET.SubElement(coord, 'Point') - ##if curved_line is None or curved_line=='false' or curved_line=='False' or curved_line=='FALSE' : + ##if not curved_line: ###point.set('x',str(found_polygons[j][l][0])) ###point.set('y',str(found_polygons[j][l][1])) ##if len(all_found_texline_polygons_h[mm][j][l])==2: @@ -5077,7 +5077,7 @@ class eynollah: ##points_co=points_co+str( int( ( all_found_texline_polygons_h[mm][j][l][0][1] ##+all_box_coord_h[mm][0]+page_coord[0])/self.scale_y) ) - ##if curved_line=='true' or curved_line=='True' or curved_line=='TRUE' : + ##if curved_line: ##if len(all_found_texline_polygons_h[mm][j][l])==2: ##points_co=points_co+str( int( (all_found_texline_polygons_h[mm][j][l][0] ##+page_coord[2])/self.scale_x) ) @@ -5158,7 +5158,7 @@ class eynollah: for l in range(len(all_found_texline_polygons_marginals[mm][j])): # point = ET.SubElement(coord, 'Point') - if curved_line is None or curved_line == "false" or curved_line == "False" or curved_line == "FALSE": + if not self.curved_line: # point.set('x',str(found_polygons[j][l][0])) # point.set('y',str(found_polygons[j][l][1])) if len(all_found_texline_polygons_marginals[mm][j][l]) == 2: @@ -5170,7 +5170,7 @@ class eynollah: points_co = points_co + "," points_co = points_co + str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y)) - if curved_line == "true" or curved_line == "True" or curved_line == "TRUE": + if self.curved_line: if len(all_found_texline_polygons_marginals[mm][j][l]) == 2: points_co = points_co + str(int((all_found_texline_polygons_marginals[mm][j][l][0] + page_coord[2]) / self.scale_x)) points_co = points_co + "," @@ -5384,7 +5384,7 @@ class eynollah: for l in range(len(all_found_texline_polygons[mm][j])): # point = ET.SubElement(coord, 'Point') - if curved_line is None or curved_line == "false" or curved_line == "False" or curved_line == "FALSE": + if not self.curved_line: # point.set('x',str(found_polygons[j][l][0])) # point.set('y',str(found_polygons[j][l][1])) if len(all_found_texline_polygons[mm][j][l]) == 2: @@ -5396,7 +5396,7 @@ class eynollah: points_co = points_co + "," points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0][1] + all_box_coord[mm][0] + page_coord[0]) / self.scale_y)) - if (curved_line == "true" or curved_line == "True" or curved_line == "TRUE") and abs(slopes[mm]) <= 45: + if (self.curved_line) and abs(slopes[mm]) <= 45: if len(all_found_texline_polygons[mm][j][l]) == 2: points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0] + page_coord[2]) / self.scale_x)) points_co = points_co + "," @@ -5406,7 +5406,7 @@ class eynollah: points_co = points_co + "," points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0][1] + page_coord[0]) / self.scale_y)) - elif (curved_line == "true" or curved_line == "True" or curved_line == "TRUE") and abs(slopes[mm]) > 45: + elif (self.curved_line) and abs(slopes[mm]) > 45: if len(all_found_texline_polygons[mm][j][l]) == 2: points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x)) points_co = points_co + "," @@ -5567,7 +5567,7 @@ class eynollah: for l in range(len(all_found_texline_polygons_marginals[mm][j])): # point = ET.SubElement(coord, 'Point') - if curved_line is None or curved_line == "false" or curved_line == "False" or curved_line == "FALSE": + if not self.curved_line: # point.set('x',str(found_polygons[j][l][0])) # point.set('y',str(found_polygons[j][l][1])) if len(all_found_texline_polygons_marginals[mm][j][l]) == 2: @@ -5579,7 +5579,7 @@ class eynollah: points_co = points_co + "," points_co = points_co + str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y)) - if curved_line == "true" or curved_line == "True" or curved_line == "TRUE": + if self.curved_line: if len(all_found_texline_polygons_marginals[mm][j][l]) == 2: points_co = points_co + str(int((all_found_texline_polygons_marginals[mm][j][l][0] + page_coord[2]) / self.scale_x)) points_co = points_co + "," @@ -10161,7 +10161,7 @@ class eynollah: def do_order_of_regions(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): - if self.full_layout == "true" or self.full_layout == "True" or self.full_layout == "TRUE": + if self.full_layout: cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = self.find_new_features_of_contoures(contours_only_text_parent) cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = self.find_new_features_of_contoures(contours_only_text_parent_h) @@ -10501,7 +10501,7 @@ class eynollah: # region_with_intersected_drop=region_with_intersected_drop/3 region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8) # print(np.unique(img_con_all_copy[:,:,0])) - if self.curved_line == "True" or self.curved_line == "true": + if self.curved_line: if len(region_with_intersected_drop) > 1: sum_pixels_of_intersection = [] @@ -10981,21 +10981,21 @@ class eynollah: print(is_image_enhanced, "is_image_enhanced") K.clear_session() scale = 1 - if (self.allow_enhancement == "true" or self.allow_enhancement == "True" or self.allow_enhancement == "TRUE") and is_image_enhanced: + if (self.allow_enhancement) and is_image_enhanced: cv2.imwrite(os.path.join(self.dir_out, self.f_name) + ".tif", img_res) img_res = img_res.astype(np.uint8) self.get_image_and_scales(img_org, img_res, scale) - if (self.allow_enhancement == "False" or self.allow_enhancement == "false" or self.allow_enhancement == "FALSE" or self.allow_enhancement == None) and is_image_enhanced: + if (not self.allow_enhancement) and is_image_enhanced: self.get_image_and_scales_after_enhancing(img_org, img_res) - if (self.allow_enhancement == "true" or self.allow_enhancement == "True" or self.allow_enhancement == "TRUE") and not is_image_enhanced: + if (self.allow_enhancement) and not is_image_enhanced: self.get_image_and_scales(img_org, img_res, scale) - if (self.allow_enhancement == "False" or self.allow_enhancement == "false" or self.allow_enhancement == "FALSE" or self.allow_enhancement == None) and not is_image_enhanced: + if (not self.allow_enhancement) and not is_image_enhanced: self.get_image_and_scales(img_org, img_res, scale) - if (self.allow_scaling == "true" or self.allow_scaling == "True" or self.allow_scaling == "TRUE") and not is_image_enhanced: + if (self.allow_scaling) and not is_image_enhanced: img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced) self.get_image_and_scales_after_enhancing(img_org, img_res) @@ -11160,7 +11160,7 @@ class eynollah: print("marginals: " + str(time.time() - t1)) - if self.full_layout is None or self.full_layout == "False" or self.full_layout == "false" or self.full_layout == "FALSE": + if not self.full_layout: if np.abs(slope_deskew) >= 0.13: image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n = self.rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, slope_deskew) @@ -11228,7 +11228,7 @@ class eynollah: min_area_mar = 0.00001 polygons_of_marginals = self.return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - if self.full_layout == "true" or self.full_layout == "True" or self.full_layout == "TRUE": + if self.full_layout: # set first model with second model text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 3] = 6 @@ -11445,12 +11445,12 @@ class eynollah: boxes_marginals, _ = self.get_text_region_boxes_by_given_contours(polygons_of_marginals) ####boxes_text_h,_=self.get_text_region_boxes_by_given_contours(text_only_h,contours_only_text_parent_h,image_page) - if self.curved_line is None or self.curved_line == "false" or self.curved_line == "False" or self.curved_line == "FALSE": + if not self.curved_line: slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, index_by_text_par_con_marginal = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - if self.curved_line == "true" or self.curved_line == "True" or self.curved_line == "TRUE": + if self.curved_line: scale_param = 1 all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=self.kernel, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) @@ -11474,7 +11474,7 @@ class eynollah: # contours_only_text_parent_d_ordered=list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con]) ###print(index_by_text_par_con,'index_by_text_par_con') - if self.full_layout == "true" or self.full_layout == "True" or self.full_layout == "TRUE": + if self.full_layout: ##for iii in range(len(contours_only_text_parent)): ##img1=np.zeros((text_only.shape[0],text_only.shape[1],3)) ##img1=cv2.fillPoly(img1,pts=[contours_only_text_parent[iii]] ,color=(1,1,1)) @@ -11523,12 +11523,12 @@ class eynollah: # print(len(contours_only_text_parent_h),len(contours_only_text_parent_h_d_ordered),'contours_only_text_parent_h') pixel_lines = 6 - if self.headers_off is None or self.headers_off == "false" or self.headers_off == "False" or self.headers_off == "FALSE": + if not self.headers_off: if np.abs(slope_deskew) < 0.13: num_col, peaks_neg_fin, matrix_of_lines_ch, spliter_y_new, seperators_closeup_n = self.find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines, contours_only_text_parent_h) else: num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, spliter_y_new_d, seperators_closeup_n_d = self.find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off == "true" or self.headers_off == "True" or self.headers_off == "TRUE": + elif self.headers_off: if np.abs(slope_deskew) < 0.13: num_col, peaks_neg_fin, matrix_of_lines_ch, spliter_y_new, seperators_closeup_n = self.find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines) else: @@ -11574,7 +11574,7 @@ class eynollah: if self.dir_of_cropped_images is not None: self.write_images_into_directory(polygons_of_images, self.dir_of_cropped_images, image_page) - if self.full_layout == "true" or self.full_layout == "True" or self.full_layout == "TRUE": + if self.full_layout: if np.abs(slope_deskew) < 0.13: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: