From 04e79002b3daa3f4e69921e6b94b3d0a6ee48639 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 24 Aug 2024 12:54:19 +0200 Subject: [PATCH] making light version faster for 1 and 2 columns images --- qurator/eynollah/eynollah.py | 88 ++++++++++++++++++------ qurator/eynollah/utils/separate_lines.py | 16 ++--- 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2bf57a4..640db16 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -28,6 +28,7 @@ from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' stderr = sys.stderr sys.stderr = open(os.devnull, "w") import tensorflow as tf @@ -299,17 +300,25 @@ class Eynollah: def _cache_images(self, image_filename=None, image_pil=None): ret = {} + t_c0 = time.time() if image_filename: ret['img'] = cv2.imread(image_filename) - self.dpi = check_dpi(image_filename) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_filename) else: ret['img'] = pil2cv(image_pil) - self.dpi = check_dpi(image_pil) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_pil) ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret def reset_file_name_dir(self, image_filename): + t_c = time.time() self._imgs = self._cache_images(image_filename=image_filename) self.image_filename = image_filename @@ -491,6 +500,27 @@ class Eynollah: num_column_is_classified = True return img_new, num_column_is_classified + + def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): + self.logger.debug("enter calculate_width_height_by_columns") + if num_col == 1: + img_w_new = 1300 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) + else: + img_w_new = 1500 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1500) + + if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: + img_new = np.copy(img) + num_column_is_classified = False + elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + img_new = np.copy(img) + num_column_is_classified = False + else: + img_new = resize_image(img, img_h_new, img_w_new) + num_column_is_classified = True + + return img_new, num_column_is_classified def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): self.logger.debug("enter resize_image_with_column_classifier") @@ -600,16 +630,24 @@ class Eynollah: self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if dpi < DPI_THRESHOLD: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + else: + img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if light_version: image_res = np.copy(img_new) else: image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: - num_column_is_classified = True - image_res = np.copy(img) - is_image_enhanced = False + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + image_res = np.copy(img_new) + is_image_enhanced = True + else: + num_column_is_classified = True + image_res = np.copy(img) + is_image_enhanced = False self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin @@ -1175,7 +1213,7 @@ class Eynollah: marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=4) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1280,7 +1318,10 @@ class Eynollah: def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") - num_cores = cpu_count() + if len(contours)>15: + num_cores = cpu_count() + else: + num_cores = 1 queue_of_all_params = Queue() processes = [] @@ -1554,8 +1595,6 @@ class Eynollah: mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - # plt.imshow(mask_only_con_region) - # plt.show() if self.textline_light: all_text_region_raw = np.copy(textline_mask_tot_ea) @@ -1660,11 +1699,11 @@ class Eynollah: img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - #print(img.shape,'bin shape') + #print(img.shape,'bin shape textline') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=3) else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=3) prediction_textline = resize_image(prediction_textline, img_h, img_w) if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1747,11 +1786,14 @@ class Eynollah: img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) + t_bin = time.time() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=10) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=10) + + #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2710,10 +2752,10 @@ class Eynollah: return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction def run_enhancement(self,light_version): + t_in = time.time() self.logger.info("Resizing and enhancing image...") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') - scale = 1 if is_image_enhanced: if self.allow_enhancement: @@ -2731,6 +2773,7 @@ class Eynollah: if self.allow_scaling: img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) + #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified def run_textline(self, image_page): @@ -2748,7 +2791,8 @@ class Eynollah: #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') sigma = 2 main_page_deskew = True - slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) + n_total_angles = 30 + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, n_total_angles, main_page_deskew, plotter=self.plotter) slope_first = 0 if self.plotter: @@ -2871,7 +2915,7 @@ class Eynollah: def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') - + t_full0 = time.time() if self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) @@ -2963,12 +3007,12 @@ class Eynollah: text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) - + #print("full inside 1", time.time()- t_full0) if self.light_version: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) else: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) - + #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 @@ -3012,6 +3056,7 @@ class Eynollah: img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') + #print("full inside 3", time.time()- t_full0) return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables def our_load_model(self, model_file): @@ -3534,6 +3579,7 @@ class Eynollah: t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) + #print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) @@ -3922,7 +3968,7 @@ class Eynollah: if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/separate_lines.py b/qurator/eynollah/utils/separate_lines.py index acdc2e9..1004a92 100644 --- a/qurator/eynollah/utils/separate_lines.py +++ b/qurator/eynollah/utils/separate_lines.py @@ -1569,7 +1569,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1626,7 +1626,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): ang_int=0 - angels=np.linspace(ang_int-22.5,ang_int+22.5,100) + angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) var_res=[] for rot in angels: @@ -1649,7 +1649,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): #plt.imshow(img_resized) #plt.show() - angels=np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) + angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) var_res=[] @@ -1680,7 +1680,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-12,100) + angels=np.linspace(-90,-12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1700,7 +1700,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,12,100) + angels=np.linspace(90,12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1719,7 +1719,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): except: ang_int=0 else: - angels=np.linspace(-25,25,60) + angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) var_res=[] indexer=0 for rot in angels: @@ -1749,7 +1749,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=22 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-25,60) + angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) var_res=[] @@ -1772,7 +1772,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,25,60) + angels=np.linspace(90,25,int(n_tot_angles/2.)+10) var_res=[]