diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 7cbd82f..a895b0d 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -73,6 +73,12 @@ from qurator.eynollah.eynollah import Eynollah is_flag=True, help="if this parameter set to true, this tool will try to return all elements of layout.", ) +@click.option( + "--input_binary/--input-RGB", + "-ib/-irgb", + is_flag=True, + help="in general, eynollah uses RGB as input but if the input document is strongly dark, bright or for any other reason you can turn binarized input on. This option does not mean that you have to provide a binary image, otherwise this means that the tool itself will binarized the RGB input document.", +) @click.option( "--allow_scaling/--no-allow-scaling", "-as/-noas", @@ -103,6 +109,7 @@ def main( allow_enhancement, curved_line, full_layout, + input_binary, allow_scaling, headers_off, log_level @@ -128,6 +135,7 @@ def main( allow_enhancement=allow_enhancement, curved_line=curved_line, full_layout=full_layout, + input_binary=input_binary, allow_scaling=allow_scaling, headers_off=headers_off, ) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e37f84b..2bfd91b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -27,6 +27,7 @@ import tensorflow as tf tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") + from .utils.contour import ( filter_contours_area_of_image, find_contours_mean_y_diff, @@ -91,6 +92,7 @@ class Eynollah: allow_enhancement=False, curved_line=False, full_layout=False, + input_binary=False, allow_scaling=False, headers_off=False, override_dpi=None, @@ -108,6 +110,7 @@ class Eynollah: self.allow_enhancement = allow_enhancement self.curved_line = curved_line self.full_layout = full_layout + self.input_binary = input_binary self.allow_scaling = allow_scaling self.headers_off = headers_off self.plotter = None if not enable_plotting else EynollahPlotter( @@ -125,6 +128,7 @@ class Eynollah: self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5" + self.model_dir_of_binarization = dir_models + "/model_bin_sbb_ens.h5" self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5" self.model_region_dir_p = dir_models + "/model_main_covid19_lr5-5_scale_1_1_great.h5" self.model_region_dir_p2 = dir_models + "/model_main_home_corona3_rot.h5" @@ -133,7 +137,7 @@ class Eynollah: self.model_page_dir = dir_models + "/model_page_mixed_best.h5" self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5" self.model_textline_dir = dir_models + "/model_textline_newspapers.h5" - + def _cache_images(self, image_filename=None, image_pil=None): ret = {} if image_filename: @@ -309,27 +313,36 @@ class Eynollah: return img_new, num_column_is_classified - def resize_image_with_column_classifier(self, is_image_enhanced): + def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): self.logger.debug("enter resize_image_with_column_classifier") - img = self.imread() + if self.input_binary: + img = np.copy(img_bin) + else: + img = self.imread() - _, page_coord = self.early_page_for_num_of_column_classification() + _, page_coord = self.early_page_for_num_of_column_classification(img) model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + if self.input_binary: + img_in = np.copy(img) + img_in = img_in / 255.0 + width_early = img_in.shape[1] + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True, uint8=False) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_1ch = self.imread(grayscale=True, uint8=False) - width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - - # plt.imshow(img_1ch) - # plt.show() - img_1ch = img_1ch / 255.0 + # plt.imshow(img_1ch) + # plt.show() + img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) - img_in[0, :, :, 0] = img_1ch[:, :] - img_in[0, :, :, 1] = img_1ch[:, :] - img_in[0, :, :, 2] = img_1ch[:, :] + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = model_num_classifier.predict(img_in) num_col = np.argmax(label_p_pred[0]) + 1 @@ -358,24 +371,51 @@ class Eynollah: self.logger.debug("enter resize_and_enhance_image_with_column_classifier") dpi = self.dpi self.logger.info("Detected %s DPI", dpi) - img = self.imread() + if self.input_binary: + img = self.imread() + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img, model_bin) + + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + session_bin.close() + del model_bin + del session_bin + gc.collect() + + prediction_bin = prediction_bin.astype(np.uint8) + img= np.copy(prediction_bin) + img_bin = np.copy(prediction_bin) + else: + img = self.imread() + img_bin = None - _, page_coord = self.early_page_for_num_of_column_classification() + _, page_coord = self.early_page_for_num_of_column_classification(img_bin) model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) - img_1ch = self.imread(grayscale=True) - width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - # plt.imshow(img_1ch) - # plt.show() - img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) - img_in[0, :, :, 0] = img_1ch[:, :] - img_in[0, :, :, 1] = img_1ch[:, :] - img_in[0, :, :, 2] = img_1ch[:, :] - - # plt.imshow(img_in[0,:,:,:]) - # plt.show() + + if self.input_binary: + img_in = np.copy(img) + width_early = img_in.shape[1] + img_in = img_in / 255.0 + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_1ch = img_1ch / 255.0 + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] + + label_p_pred = model_num_classifier.predict(img_in) num_col = np.argmax(label_p_pred[0]) + 1 @@ -396,7 +436,7 @@ class Eynollah: self.logger.debug("exit resize_and_enhance_image_with_column_classifier") - return is_image_enhanced, img, image_res, num_col, num_column_is_classified + return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin # pylint: disable=attribute-defined-outside-init def get_image_and_scales(self, img_org, img_res, scale): @@ -587,9 +627,13 @@ class Eynollah: gc.collect() return prediction_true - def early_page_for_num_of_column_classification(self): + def early_page_for_num_of_column_classification(self,img_bin): self.logger.debug("enter early_page_for_num_of_column_classification") - img = self.imread() + if self.input_binary: + img =np.copy(img_bin) + img = img.astype(np.uint8) + else: + img = self.imread() model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) @@ -1149,6 +1193,8 @@ class Eynollah: self.logger.info("ratio_of_two_models: %s", rate_two_models) if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): prediction_regions_org = np.copy(prediction_regions_org_copy) + + prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 mask_lines_only=(prediction_regions_org[:,:]==3)*1 @@ -1158,6 +1204,47 @@ class Eynollah: #plt.show() prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) + + + if rate_two_models<=40: + if self.input_binary: + prediction_bin = np.copy(img_org) + else: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) + + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + session_bin.close() + del model_bin + del session_bin + gc.collect() + + + + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + ratio_y=1 + ratio_x=1 + + + img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) + + prediction_regions_org = self.do_prediction(True, img, model_region) + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) + prediction_regions_org=prediction_regions_org[:,:,0] + + mask_lines_only=(prediction_regions_org[:,:]==3)*1 + session_region.close() + del model_region + del session_region + gc.collect() + + mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 @@ -1170,26 +1257,71 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) - + K.clear_session() return text_regions_p_true, erosion_hurts except: - img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) + if self.input_binary: + prediction_bin = np.copy(img_org) + else: + session_region.close() + del model_region + del session_region + gc.collect() + + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) + prediction_bin=prediction_bin[:,:,0] + + prediction_bin = (prediction_bin[:,:]==0)*1 + + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + + + session_bin.close() + del model_bin + del session_bin + gc.collect() - prediction_regions_org = self.do_prediction(True, img, model_region) - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - prediction_regions_org = prediction_regions_org[:,:,0] + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + ratio_y=1 + ratio_x=1 + + + img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) + + prediction_regions_org = self.do_prediction(True, img, model_region) + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) + prediction_regions_org=prediction_regions_org[:,:,0] - prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 + #mask_lines_only=(prediction_regions_org[:,:]==3)*1 session_region.close() del model_region del session_region gc.collect() + #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) + + #prediction_regions_org = self.do_prediction(True, img, model_region) + + #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) + + #prediction_regions_org = prediction_regions_org[:,:,0] + + #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 + #session_region.close() + #del model_region + #del session_region + #gc.collect() + @@ -1506,7 +1638,7 @@ class Eynollah: def run_enhancement(self): self.logger.info("resize and enhance image") - is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified = self.resize_and_enhance_image_with_column_classifier() + is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier() self.logger.info("Image is %senhanced", '' if is_image_enhanced else 'not ') K.clear_session() scale = 1 @@ -1522,7 +1654,7 @@ class Eynollah: else: self.get_image_and_scales(img_org, img_res, scale) if self.allow_scaling: - img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced) + img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified @@ -1688,13 +1820,10 @@ class Eynollah: t0 = time.time() img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement() - - self.logger.info("Enhancing took %ss ", str(time.time() - t0)) t1 = time.time() text_regions_p_1 ,erosion_hurts = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %ss ", str(time.time() - t1)) t1 = time.time() diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index fa2e4ae..74e985e 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -1595,13 +1595,12 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho try: if erosion_hurts: - num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=3.) + num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=6.) else: num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.) except: peaks_neg_fin=[] - - print(peaks_neg_fin,'peaks_neg_fin0') + try: peaks_neg_fin_org=np.copy(peaks_neg_fin) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index d9a9239..11ec7f9 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -64,13 +64,14 @@ class EynollahXmlWriter(): for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])): if not self.curved_line: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) + textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) + textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) else: - points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y)) + textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) + textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) + points_co += str(textline_x_coord) + points_co += ',' + points_co += str(textline_y_coord) if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))