From f765e2603b14186574ec86ff70a1767adfec867d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 15:57:13 +0000 Subject: [PATCH 01/36] move Torch to optional dependencies (to avoid clash with TF over CuDNN) --- pyproject.toml | 4 ++++ requirements.txt | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b056cb7..61d488a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,10 @@ classifiers = [ "Topic :: Scientific/Engineering :: Image Processing", ] +[project.optional-dependencies] +OCR = ["torch <= 2.0.1", "transformers <= 4.30.2"] +plotting = ["matplotlib"] + [project.scripts] eynollah = "eynollah.cli:main" ocrd-eynollah-segment = "eynollah.ocrd_cli:main" diff --git a/requirements.txt b/requirements.txt index 02450aa..d72df29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,8 +4,4 @@ numpy <1.24.0 scikit-learn >= 0.23.2 tensorflow < 2.13 imutils >= 0.5.3 -matplotlib -setuptools >= 50 -transformers <= 4.30.2 -torch <= 2.0.1 numba <= 0.58.1 From 7ae64f3717ac84f7aebc12c5933015d8bc4b8056 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 16:18:35 +0000 Subject: [PATCH 02/36] RO model: do not reload when in dir_in mode --- src/eynollah/eynollah.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index e802e29..2dd5505 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -255,7 +255,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" + self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" @@ -289,7 +289,7 @@ class Eynollah: ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) + self.model_reading_order = self.our_load_model(self.model_reading_order_dir) if self.ocr: self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -331,7 +331,7 @@ class Eynollah: self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) - self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) + self.model_reading_order = self.our_load_model(self.model_reading_order_dir) if self.tables: self.model_table = self.our_load_model(self.model_table_dir) @@ -3804,7 +3804,7 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + def do_order_of_regions_with_model(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -3818,7 +3818,8 @@ class Eynollah: img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + if not self.dir_in: + self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) height1 =672#448 width1 = 448#224 @@ -3896,7 +3897,7 @@ class Eynollah: batch_counter = batch_counter+1 if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr=model_ro_machine.predict(input_1 , verbose=0) + y_pr = self.model_reading_order.predict(input_1 , verbose=0) if batch_counter==inference_bs: iteration_batches = inference_bs @@ -3952,7 +3953,7 @@ class Eynollah: else: early_list_bigger_than_one = -20 return list_inp, early_list_bigger_than_one - def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + def do_order_of_regions_with_model_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -3969,7 +3970,7 @@ class Eynollah: if self.dir_in: pass else: - self.model_reading_order_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) height1 =672#448 width1 = 448#224 @@ -4055,7 +4056,7 @@ class Eynollah: batch_counter = batch_counter+1 if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr=self.model_reading_order_machine.predict(input_1 , verbose=0) + y_pr = self.model_reading_order.predict(input_1 , verbose=0) if batch_counter==inference_bs: iteration_batches = inference_bs @@ -5362,7 +5363,7 @@ class Eynollah: if self.full_layout: if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) @@ -5384,7 +5385,7 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) From 3b9a29bc5c187fe6ae4c41450a0095c3271ec703 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 18:19:54 +0000 Subject: [PATCH 03/36] simplify dir_in conditionals --- src/eynollah/eynollah.py | 78 +++++++++++++--------------------------- 1 file changed, 24 insertions(+), 54 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 2dd5505..c1e0f4d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -274,7 +274,8 @@ class Eynollah: self.models = {} - if dir_in and light_version: + if dir_in: + # as in start_new_session: config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) @@ -283,62 +284,31 @@ class Eynollah: self.model_page = self.our_load_model(self.model_page_dir) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_bin = self.our_load_model(self.model_dir_of_binarization) - self.model_textline = self.our_load_model(self.model_textline_dir) - self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) - self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) - ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) - self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) - self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - self.model_reading_order = self.our_load_model(self.model_reading_order_dir) - if self.ocr: - self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") - if self.tables: - self.model_table = self.our_load_model(self.model_table_dir) - - - self.ls_imgs = os.listdir(self.dir_in) - - if dir_in and self.extract_only_images: - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True - session = tf.compat.v1.Session(config=config) - set_session(session) - - self.model_page = self.our_load_model(self.model_page_dir) - self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) - self.model_bin = self.our_load_model(self.model_dir_of_binarization) - #self.model_textline = self.our_load_model(self.model_textline_dir) - self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction) - #self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) - #self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - - self.ls_imgs = os.listdir(self.dir_in) - - if dir_in and not (light_version or self.extract_only_images): - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True - session = tf.compat.v1.Session(config=config) - set_session(session) + if self.extract_only_images: + self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction) + else: + self.model_textline = self.our_load_model(self.model_textline_dir) + if self.light_version: + self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) + self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) + else: + self.model_region = self.our_load_model(self.model_region_dir_p_ens) + self.model_region_p2 = self.our_load_model(self.model_region_dir_p2) + self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) + ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) + self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) + self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + if self.reading_order_machine_based: + self.model_reading_order = self.our_load_model(self.model_reading_order_dir) + if self.ocr: + self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + if self.tables: + self.model_table = self.our_load_model(self.model_table_dir) - self.model_page = self.our_load_model(self.model_page_dir) - self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) - self.model_bin = self.our_load_model(self.model_dir_of_binarization) - self.model_textline = self.our_load_model(self.model_textline_dir) - self.model_region = self.our_load_model(self.model_region_dir_p_ens) - self.model_region_p2 = self.our_load_model(self.model_region_dir_p2) - self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) - self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) - self.model_reading_order = self.our_load_model(self.model_reading_order_dir) - if self.tables: - self.model_table = self.our_load_model(self.model_table_dir) - self.ls_imgs = os.listdir(self.dir_in) - - def _cache_images(self, image_filename=None, image_pil=None): ret = {} From 329fac23f67b2a46fe3c00b0340bd3a56143bed2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 18:29:49 +0000 Subject: [PATCH 04/36] do not reload enhancement model in dir_in mode, simplify --- src/eynollah/eynollah.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index c1e0f4d..145f722 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -363,10 +363,11 @@ class Eynollah: def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") - model_enhancement, session_enhancement = self.start_new_session_and_model(self.model_dir_of_enhancement) + if not self.dir_in: + self.model_enhancement, _ = self.start_new_session_and_model(self.model_dir_of_enhancement) - img_height_model = model_enhancement.layers[len(model_enhancement.layers) - 1].output_shape[1] - img_width_model = model_enhancement.layers[len(model_enhancement.layers) - 1].output_shape[2] + img_height_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[1] + img_width_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[2] if img.shape[0] < img_height_model: img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) @@ -409,9 +410,8 @@ class Eynollah: index_y_u = img_h index_y_d = img_h - img_height_model - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model_enhancement.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) + img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] + label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) seg = label_p_pred[0, :, :, :] seg = seg * 255 From 14beb46224b3f0660f44dafbf4bbe094b68d7274 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 21:07:26 +0000 Subject: [PATCH 05/36] simplify loading models w/o dir_in mode --- src/eynollah/eynollah.py | 203 +++++++++++++++------------------------ 1 file changed, 75 insertions(+), 128 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 145f722..d11531a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -567,7 +567,8 @@ class Eynollah: _, page_coord = self.early_page_for_num_of_column_classification(img) if not self.dir_in: - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier) + if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 @@ -590,10 +591,7 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - if not self.dir_in: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) - else: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 @@ -613,12 +611,10 @@ class Eynollah: self.logger.info("Detected %s DPI", dpi) if self.input_binary: img = self.imread() - if self.dir_in: - prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) - else: - - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img, model_bin, n_batch_inference=5) + if not self.dir_in: + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + + prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 @@ -641,7 +637,7 @@ class Eynollah: self.page_coord = page_coord if not self.dir_in: - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier) if self.num_col_upper and not self.num_col_lower: num_col = self.num_col_upper @@ -669,10 +665,7 @@ class Eynollah: img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) - else: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): if self.input_binary: @@ -693,10 +686,7 @@ class Eynollah: img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) - else: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 if num_col > self.num_col_upper: @@ -1381,12 +1371,9 @@ class Eynollah: img = cv2.GaussianBlur(self.image, (5, 5), 0) if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) - if not self.dir_in: - img_page_prediction = self.do_prediction(False, img, model_page) - else: - img_page_prediction = self.do_prediction(False, img, self.model_page) + img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) @@ -1429,13 +1416,10 @@ class Eynollah: else: img = self.imread() if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) - if self.dir_in: - img_page_prediction = self.do_prediction(False, img, self.model_page) - else: - img_page_prediction = self.do_prediction(False, img, model_page) + img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -1462,9 +1446,12 @@ class Eynollah: img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) - else: - model_region = self.model_region_fl if patches else self.model_region_fl_np + if patches: + self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) + else: + self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) + + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: if self.light_version: @@ -1546,9 +1533,12 @@ class Eynollah: img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) - else: - model_region = self.model_region_fl if patches else self.model_region_fl_np + if patches: + self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) + else: + self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) + + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: img = otsu_copy_binary(img) @@ -2049,26 +2039,18 @@ class Eynollah: else: thresholding_for_artificial_class_in_light_version = False if not self.dir_in: - model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) + self.model_textline, _ = self.start_new_session_and_model(self.model_textline_dir) #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - - #if not thresholding_for_artificial_class_in_light_version: - #if num_col_classifier==1: - #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 - else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - #if not thresholding_for_artificial_class_in_light_version: - #if num_col_classifier==1: - #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 @@ -2092,10 +2074,7 @@ class Eynollah: if not thresholding_for_artificial_class_in_light_version: prediction_textline[:,:][old_art[:,:]==1]=2 - if not self.dir_in: - prediction_textline_longshot = self.do_prediction(False, img, model_textline) - else: - prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) + prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') @@ -2161,10 +2140,8 @@ class Eynollah: if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region) - else: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) @@ -2256,7 +2233,7 @@ class Eynollah: img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + #model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) #print(num_col_classifier,'num_col_classifier') @@ -2290,10 +2267,8 @@ class Eynollah: #img_bin = np.copy(img_resized) ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): ###if not self.dir_in: - ###model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - ###prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - ###else: - ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + ###self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) ####print("inside bin ", time.time()-t_bin) ###prediction_bin=prediction_bin[:,:,0] @@ -2309,10 +2284,8 @@ class Eynollah: ###img_bin = np.copy(img_resized) if self.ocr and not self.input_binary: if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2341,30 +2314,27 @@ class Eynollah: if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) - else: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + self.model_region_1_2, _ = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + ##self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + + if num_col_classifier == 1 or num_col_classifier == 2: + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept( + True, img_resized, self.model_region_1_2, n_batch_inference=1, + thresholding_for_some_classes_in_light_version=True) else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) - ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept( + False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, + thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - if num_col_classifier == 1 or num_col_classifier == 2: - if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) - else: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page - else: - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) - ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + new_h = (900+ (num_col_classifier-3)*100) + img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) + prediction_regions_org = self.do_prediction_new_concept( + True, img_resized, self.model_region_1_2, n_batch_inference=2, + thresholding_for_some_classes_in_light_version=True) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -2466,16 +2436,13 @@ class Eynollah: img_width_h = img_org.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1.3 ratio_x=1 img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - if not self.dir_in: - prediction_regions_org_y = self.do_prediction(True, img, model_region) - else: - prediction_regions_org_y = self.do_prediction(True, img, self.model_region) + prediction_regions_org_y = self.do_prediction(True, img, self.model_region) prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) #plt.imshow(prediction_regions_org_y[:,:,0]) @@ -2494,10 +2461,7 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) - if self.dir_in: - prediction_regions_org = self.do_prediction(True, img, self.model_region) - else: - prediction_regions_org = self.do_prediction(True, img, model_region) + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -2505,14 +2469,11 @@ class Eynollah: if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) + self.model_region_p2, _ = self.start_new_session_and_model(self.model_region_dir_p2) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) - if self.dir_in: - prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) - else: - prediction_regions_org2 = self.do_prediction(True, img, model_region, marginal_of_patch_percent=0.2) + prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) @@ -2544,10 +2505,8 @@ class Eynollah: prediction_bin = np.copy(img_org) else: if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -2557,17 +2516,14 @@ class Eynollah: prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1 ratio_x=1 img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - if not self.dir_in: - prediction_regions_org = self.do_prediction(True, img, model_region) - else: - prediction_regions_org = self.do_prediction(True, img, self.model_region) + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -2597,10 +2553,8 @@ class Eynollah: prediction_bin = np.copy(img_org) if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -2612,7 +2566,7 @@ class Eynollah: if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) else: prediction_bin = np.copy(img_org) @@ -2621,17 +2575,14 @@ class Eynollah: img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - if not self.dir_in: - prediction_regions_org = self.do_prediction(True, img, model_region) - else: - prediction_regions_org = self.do_prediction(True, img, self.model_region) + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] #mask_lines_only=(prediction_regions_org[:,:]==3)*1 #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) - #prediction_regions_org = self.do_prediction(True, img, model_region) + #prediction_regions_org = self.do_prediction(True, img, self.model_region) #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) @@ -3173,9 +3124,7 @@ class Eynollah: - if self.dir_in: - pass - else: + if not self.dir_in: self.model_table, _ = self.start_new_session_and_model(self.model_table_dir) patches = False @@ -3937,9 +3886,7 @@ class Eynollah: img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - if self.dir_in: - pass - else: + if not self.dir_in: self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) height1 =672#448 From 9f12fa241dfb09eff9119e940285e1271dfc700b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:09:15 +0000 Subject: [PATCH 06/36] log-level: only set 'eynollah' logger level --- src/eynollah/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index bed0c03..5f4b5a4 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -1,6 +1,6 @@ import sys import click -from ocrd_utils import initLogging, setOverrideLogLevel +from ocrd_utils import initLogging, getLevelName, getLogger from eynollah.eynollah import Eynollah from eynollah.sbb_binarize import SbbBinarizer @@ -254,9 +254,9 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) ) def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): - if log_level: - setOverrideLogLevel(log_level) initLogging() + if log_level: + getLogger('eynollah').setLevel(getLevelName(log_level)) if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep") sys.exit(1) From 5b82320707e92162220b819734705c40e77bce74 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:09:32 +0000 Subject: [PATCH 07/36] avoid indentation --- src/eynollah/eynollah.py | 899 ++++++++++++++++++++------------------- 1 file changed, 450 insertions(+), 449 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d11531a..4f1d8e3 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4926,496 +4926,497 @@ class Eynollah: if self.dir_in: self.writer.write_pagexml(pcgts) + continue else: return pcgts - else: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) - t1 = time.time() - if not self.skip_layout_and_reading_order: - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) - - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) - else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) - t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - if self.light_version and num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) + #print("text region early -1 in %.1fs", time.time() - t0) + t1 = time.time() + if not self.skip_layout_and_reading_order: + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) + + if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: - img_w_new = 2000 + img_w_new = 1000 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - + elif num_col_classifier == 2: - img_w_new = 2400 + img_w_new = 1300 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - - - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early in %.1fs", time.time() - t0) + t1 = time.time() + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + t1 = time.time() + #plt.imshow(table_prediction) + #plt.show() + if self.light_version and num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.full_layout: + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - + + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] + else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + #print("text region early 3 in %.1fs", time.time() - t0) + if self.light_version: + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + + contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) else: - pass - - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + else: + + scale_param = 1 + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: - if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - - contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) - - else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - - if not self.reading_order_machine_based: + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() - - if self.full_layout: - - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - if self.ocr: - ocr_all_textlines = [] + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - ocr_all_textlines = None - - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - - + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() + + if self.full_layout: + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts - if self.ocr: - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) - - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - - ocr_textline_in_textregion.append(text_ocr) - - - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) else: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) - - page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) - - - ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) - - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - all_found_textline_polygons=[ all_found_textline_polygons ] - - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") - - - order_text_new = [0] - slopes =[0] - id_of_texts_tot =['region_0001'] - - polygons_of_images = [] - slopes_marginals = [] - polygons_of_marginals = [] - all_found_textline_polygons_marginals = [] - all_box_coord_marginals = [] - polygons_lines_xml = [] - contours_tables = [] - ocr_all_textlines = None - - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + + + if self.ocr: + + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) + + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) + + + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - - if self.dir_in: - self.writer.write_pagexml(pcgts) - #self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs", time.time() - t0) + #print("text region early 7 in %.1fs", time.time() - t0) + else: + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) + + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + + ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + all_found_textline_polygons=[ all_found_textline_polygons ] + + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") + + + order_text_new = [0] + slopes =[0] + id_of_texts_tot =['region_0001'] + + polygons_of_images = [] + slopes_marginals = [] + polygons_of_marginals = [] + all_found_textline_polygons_marginals = [] + all_box_coord_marginals = [] + polygons_lines_xml = [] + contours_tables = [] + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + if not self.dir_in: + return pcgts + + if self.dir_in: + self.writer.write_pagexml(pcgts) + #self.logger.info("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs" % time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From cd4e426977193e34452f76abf5af8b7af222d8b0 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:11:34 +0000 Subject: [PATCH 08/36] avoid indentation (skip_layout_and_reading_order) --- src/eynollah/eynollah.py | 824 ++++++++++++++++++++------------------- 1 file changed, 413 insertions(+), 411 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 4f1d8e3..2772bd4 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4934,489 +4934,491 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not self.skip_layout_and_reading_order: - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) + if self.skip_layout_and_reading_order: + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) - else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) + all_found_textline_polygons=[ all_found_textline_polygons ] - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") - t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - if self.light_version and num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] - if num_col_classifier == 1: - img_w_new = 2000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - elif num_col_classifier == 2: - img_w_new = 2400 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + order_text_new = [0] + slopes =[0] + id_of_texts_tot =['region_0001'] - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + polygons_of_images = [] + slopes_marginals = [] + polygons_of_marginals = [] + all_found_textline_polygons_marginals = [] + all_box_coord_marginals = [] + polygons_lines_xml = [] + contours_tables = [] + ocr_all_textlines = None - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + if self.dir_in: + continue + else: + return pcgts + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early in %.1fs", time.time() - t0) + t1 = time.time() + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) - index_con_parents = np.argsort(areas_cnt_text_parent) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + t1 = time.time() + #plt.imshow(table_prediction) + #plt.show() + if self.light_version and num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.full_layout: + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + + + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() else: - pass + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: - if self.light_version: - if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + #print("text region early 3 in %.1fs", time.time() - t0) + if self.light_version: + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 + else: - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + scale_param = 1 + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 - if not self.reading_order_machine_based: + if not self.reading_order_machine_based: + if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() - - if self.full_layout: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if self.ocr: - ocr_all_textlines = [] + if not self.reading_order_machine_based: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - ocr_all_textlines = None + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() + if self.full_layout: + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - - if self.ocr: - - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) - - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - - ocr_textline_in_textregion.append(text_ocr) + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) else: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + if self.ocr: - ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - all_found_textline_polygons=[ all_found_textline_polygons ] + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + ocr_textline_in_textregion.append(text_ocr) - order_text_new = [0] - slopes =[0] - id_of_texts_tot =['region_0001'] - polygons_of_images = [] - slopes_marginals = [] - polygons_of_marginals = [] - all_found_textline_polygons_marginals = [] - all_box_coord_marginals = [] - polygons_lines_xml = [] - contours_tables = [] - ocr_all_textlines = None + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs" % time.time() - t0) + print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From a520bd1f771b9263ed865e879248b38692cdef86 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:49:34 +0000 Subject: [PATCH 09/36] wrap extremely long lines --- src/eynollah/eynollah.py | 95 ++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 2772bd4..769093d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4935,7 +4935,8 @@ class Eynollah: #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.skip_layout_and_reading_order: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, + skip_layout_and_reading_order=self.skip_layout_and_reading_order) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) @@ -4964,7 +4965,10 @@ class Eynollah: contours_tables = [] ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if self.dir_in: continue else: @@ -5005,6 +5009,8 @@ class Eynollah: self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) self.logger.info("Graphics detection took %.1fs ", time.time() - t1) #self.logger.info('cont_page %s', cont_page) + #plt.imshow(table_prediction) + #plt.show() if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") @@ -5016,19 +5022,16 @@ class Eynollah: continue else: return pcgts + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info("textline detection took %.1fs", time.time() - t1) - t1 = time.time() slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - if self.light_version and num_col_classifier in (1,2): + elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] org_w_l_m = textline_mask_tot_ea.shape[1] if num_col_classifier == 1: @@ -5062,14 +5065,13 @@ class Eynollah: ## birdan sora chock chakir t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + else: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.light_version: drop_label_in_full_layout = 4 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 @@ -5219,14 +5221,19 @@ class Eynollah: if not self.curved_line: if self.light_version: if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) @@ -5237,22 +5244,28 @@ class Eynollah: else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: @@ -5261,17 +5274,17 @@ class Eynollah: #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) #except: #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: #takes long timee contours_only_text_parent_d_ordered = None - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + if self.light_version: + fun = check_any_text_region_in_model_one_is_main_or_header_light + else: + fun = check_any_text_region_in_model_one_is_main_or_header + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ + all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ + fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) @@ -5279,7 +5292,9 @@ class Eynollah: pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) pixel_lines = 6 if not self.reading_order_machine_based: @@ -5303,7 +5318,6 @@ class Eynollah: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if not self.reading_order_machine_based: if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: @@ -5329,7 +5343,10 @@ class Eynollah: else: ocr_all_textlines = None - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts @@ -5409,7 +5426,9 @@ class Eynollah: ocr_all_textlines = None #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts From 3d88b207fc15c73b44675eb9e454840531095825 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 09:39:55 +0000 Subject: [PATCH 10/36] run: log instead of print --- src/eynollah/eynollah.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 769093d..6333a7f 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4906,7 +4906,7 @@ class Eynollah: self.ls_imgs = [1] for img_name in self.ls_imgs: - print(img_name) + self.logger.info(img_name) t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) @@ -5436,8 +5436,8 @@ class Eynollah: if self.dir_in: self.writer.write_pagexml(pcgts) - #self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs" % (time.time() - t0)) + self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From aaea2ef4637b19a2731119f763085ee1eda12249 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 09:40:02 +0000 Subject: [PATCH 11/36] simplify --- src/eynollah/eynollah.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 6333a7f..a3e6f9e 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -832,8 +832,7 @@ class Eynollah: img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), - verbose=0) + label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] @@ -1082,6 +1081,7 @@ class Eynollah: #del model #gc.collect() return prediction_true + def do_padding_with_scale(self,img, scale): h_n = int(img.shape[0]*scale) w_n = int(img.shape[1]*scale) @@ -2032,22 +2032,20 @@ class Eynollah: all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): + def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') - if self.textline_light: - thresholding_for_artificial_class_in_light_version = True#False - else: - thresholding_for_artificial_class_in_light_version = False if not self.dir_in: self.model_textline, _ = self.start_new_session_and_model(self.model_textline_dir) + #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - #if not thresholding_for_artificial_class_in_light_version: + prediction_textline = self.do_prediction(use_patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, + thresholding_for_artificial_class_in_light_version=self.textline_light) + #if not self.textline_light: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 @@ -2057,7 +2055,7 @@ class Eynollah: old_art = np.copy(textline_mask_tot_ea_art) - if not thresholding_for_artificial_class_in_light_version: + if not self.textline_light: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) @@ -2066,12 +2064,12 @@ class Eynollah: textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') - if not thresholding_for_artificial_class_in_light_version: + if not self.textline_light: textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 - if not thresholding_for_artificial_class_in_light_version: + if not self.textline_light: prediction_textline[:,:][old_art[:,:]==1]=2 prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) @@ -3366,8 +3364,7 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - patches = True - textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) From 055463d23a3ef6b3bbdf2581740c5d0dab3d501a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 09:43:30 +0000 Subject: [PATCH 12/36] avoid indentation --- src/eynollah/eynollah.py | 453 +++++++++++++++++++-------------------- 1 file changed, 226 insertions(+), 227 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index a3e6f9e..4cf9e81 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -846,238 +846,237 @@ class Eynollah: seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) + return prediction_true + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) - else: - if img.shape[0] < img_height_model: - img = resize_image(img, img_height_model, img.shape[1]) + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) - if img.shape[1] < img_width_model: - img = resize_image(img, img.shape[0], img_width_model) + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) + margin = int(marginal_of_patch_percent * img_height_model) + width_mid = img_width_model - 2 * margin + height_mid = img_height_model - 2 * margin + img = img / float(255.0) + #img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) + nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) - margin = int(marginal_of_patch_percent * img_height_model) - width_mid = img_width_model - 2 * margin - height_mid = img_height_model - 2 * margin - img = img / float(255.0) - #img = img.astype(np.float16) - img_h = img.shape[0] - img_w = img.shape[1] - prediction_true = np.zeros((img_h, img_w, 3)) - mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) - nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - for i in range(nxf): - for j in range(nyf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - else: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - else: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - img_width_model - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - img_height_model - - list_i_s.append(i) - list_j_s.append(j) - list_x_u.append(index_x_u) - list_x_d.append(index_x_d) - list_y_d.append(index_y_d) - list_y_u.append(index_y_u) - + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 - - if batch_indexer == n_batch_inference: - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - - if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - prediction_true = prediction_true.astype(np.uint8) + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + for i in range(nxf): + for j in range(nyf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + else: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + else: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - img_width_model + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - img_height_model + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + + + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() return prediction_true From c3163caefdb9a0843cc3e3f1408ff874fc4ce46e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 14:28:17 +0000 Subject: [PATCH 13/36] avoid indentation --- src/eynollah/eynollah.py | 427 +++++++++++++++++++-------------------- 1 file changed, 213 insertions(+), 214 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 4cf9e81..d483cac 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1140,227 +1140,226 @@ class Eynollah: seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) + return prediction_true + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) - else: - if img.shape[0] < img_height_model: - img = resize_image(img, img_height_model, img.shape[1]) + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) - if img.shape[1] < img_width_model: - img = resize_image(img, img.shape[0], img_width_model) + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) + margin = int(marginal_of_patch_percent * img_height_model) + width_mid = img_width_model - 2 * margin + height_mid = img_height_model - 2 * margin + img = img / float(255.0) + img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) + nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) - margin = int(marginal_of_patch_percent * img_height_model) - width_mid = img_width_model - 2 * margin - height_mid = img_height_model - 2 * margin - img = img / float(255.0) - img = img.astype(np.float16) - img_h = img.shape[0] - img_w = img.shape[1] - prediction_true = np.zeros((img_h, img_w, 3)) - mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) - nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - for i in range(nxf): - for j in range(nyf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - else: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - else: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - img_width_model - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - img_height_model - - - list_i_s.append(i) - list_j_s.append(j) - list_x_u.append(index_x_u) - list_x_d.append(index_x_d) - list_y_d.append(index_y_d) - list_y_u.append(index_y_u) - + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 + batch_indexer = 0 + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - if batch_indexer == n_batch_inference: - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - - if thresholding_for_some_classes_in_light_version: - seg_art = label_p_pred[:,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg[seg_art==1]=4 - seg[(seg_line==1) & (seg==0)]=3 - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_art = label_p_pred[:,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg[seg_art==1]=4 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + for i in range(nxf): + for j in range(nyf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + else: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + else: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - img_width_model + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - img_height_model - prediction_true = prediction_true.astype(np.uint8) + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + + + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + if thresholding_for_some_classes_in_light_version: + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg[seg_art==1]=4 + seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg[seg_art==1]=4 + seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + prediction_true = prediction_true.astype(np.uint8) return prediction_true def extract_page(self): From ad748d003978b643dab6ec482542b03fdb3dc1e4 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 10:55:41 +0000 Subject: [PATCH 14/36] do_prediction: avoid code duplication --- src/eynollah/eynollah.py | 169 +++------------------------------------ 1 file changed, 9 insertions(+), 160 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d483cac..50f0f34 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -912,7 +912,10 @@ class Eynollah: batch_indexer = batch_indexer + 1 - if batch_indexer == n_batch_inference: + if (batch_indexer == n_batch_inference or + # last batch + i == nxf - 1 and j == nyf - 1): + self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) @@ -994,88 +997,6 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -1111,7 +1032,7 @@ class Eynollah: return img_scaled_padded#, label_scaled_padded def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction") + self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] img_width_model = model.layers[len(model.layers) - 1].output_shape[2] @@ -1207,7 +1128,10 @@ class Eynollah: batch_indexer = batch_indexer + 1 - if batch_indexer == n_batch_inference: + if (batch_indexer == n_batch_inference or + # last batch + i == nxf - 1 and j == nyf - 1): + self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) @@ -1284,81 +1208,6 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_art = label_p_pred[:,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg[seg_art==1]=4 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - prediction_true = prediction_true.astype(np.uint8) return prediction_true From d68017037ce0f42dc036b30e1de36d8c49b73429 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 11:27:11 +0000 Subject: [PATCH 15/36] do_prediction: trigger GC to avoid CUDA OOM --- src/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 50f0f34..90824c8 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -999,7 +999,7 @@ class Eynollah: prediction_true = prediction_true.astype(np.uint8) #del model - #gc.collect() + gc.collect() return prediction_true def do_padding_with_scale(self,img, scale): @@ -1209,6 +1209,7 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) + gc.collect() return prediction_true def extract_page(self): From 6fe02df97394edcc741243a9f3d635e4b2b472e2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 16:35:31 +0000 Subject: [PATCH 16/36] do_image_rotation: fix f93fa12 (do return results) --- src/eynollah/utils/separate_lines.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index f8df33f..a57acbb 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1573,13 +1573,14 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): def do_image_rotation(queue_of_all_params,angels_per_process, img_resized, sigma_des): angels_per_each_subprocess = [] for mv in range(len(angels_per_process)): + print(f"rotating image by {angels_per_process[mv]}") img_rot=rotate_image(img_resized,angels_per_process[mv]) img_rot[img_rot!=0]=1 try: var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) except: var_spectrum=0 - angels_per_each_subprocess.append(var_spectrum) + angels_per_each_subprocess.append(var_spectrum) queue_of_all_params.put([angels_per_each_subprocess]) From 54cb15056b352fa8f04212071851f4e66b5931bb Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 16:37:34 +0000 Subject: [PATCH 17/36] do_image_rotation / return_deskew_slop: avoid code duplication, simplify via mp.Pool --- src/eynollah/utils/separate_lines.py | 418 +++------------------------ 1 file changed, 45 insertions(+), 373 deletions(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index a57acbb..36a1b01 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np import cv2 from scipy.signal import find_peaks @@ -1570,19 +1571,15 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def do_image_rotation(queue_of_all_params,angels_per_process, img_resized, sigma_des): - angels_per_each_subprocess = [] - for mv in range(len(angels_per_process)): - print(f"rotating image by {angels_per_process[mv]}") - img_rot=rotate_image(img_resized,angels_per_process[mv]) - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - angels_per_each_subprocess.append(var_spectrum) - - queue_of_all_params.put([angels_per_each_subprocess]) +def do_image_rotation(angle, img, sigma_des): + print(f"rotating image by {angle}") + img_rot = rotate_image(img, angle) + img_rot[img_rot!=0] = 1 + try: + var = find_num_col_deskew(img_rot, sigma_des, 20.3) + except: + var = 0 + return var def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): num_cores = cpu_count() @@ -1613,376 +1610,51 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() - if main_page and img_patch_org.shape[1]>img_patch_org.shape[0]: - + if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: #plt.imshow(img_resized) #plt.show() - angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - ####plt.imshow(img_rot) - ####plt.show() - ###img_rot[img_rot!=0]=1 - ####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####print(var_spectrum,'var_spectrum') - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #####print(rot,var_spectrum,'var_spectrum') - ###except: - ###var_spectrum=0 - ###var_res.append(var_spectrum) - - - - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 + angles = np.array([-45, 0, 45, 90,]) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - - angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) + angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ##var_res=[] - ##for rot in angels: - ##img_rot=rotate_image(img_resized,rot) - ####plt.imshow(img_rot) - ####plt.show() - ##img_rot[img_rot!=0]=1 - ##try: - ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##except: - ##var_spectrum=0 - ##var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif main_page and img_patch_org.shape[1]<=img_patch_org.shape[0]: - + elif main_page: #plt.imshow(img_resized) #plt.show() - angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) - - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - - ##var_res=[] - - ##for rot in angels: - ##img_rot=rotate_image(img_resized,rot) - ###plt.imshow(img_rot) - ###plt.show() - ##img_rot[img_rot!=0]=1 - ###neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - ###print(var_spectrum,'var_spectrum') - ##try: - ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - - ##except: - ##var_spectrum=0 - - ##var_res.append(var_spectrum) - - - if plotter: - plotter.save_plot_of_rotation_angle(angels, var_res) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 + angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) early_slope_edge=11 - if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-12,n_tot_angles) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - ##var_res=[] - ##for rot in angels: - ##img_rot=rotate_image(img_resized,rot) - ####plt.imshow(img_rot) - ####plt.show() - ##img_rot[img_rot!=0]=1 - ##try: - ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##except: - ##var_spectrum=0 - ##var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif abs(ang_int)>early_slope_edge and ang_int>0: - - angels=np.linspace(90,12,n_tot_angles) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - - ###var_res=[] - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ###img_rot[img_rot!=0]=1 - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####print(indexer,'indexer') - ###except: - ###var_spectrum=0 - ###var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - else: - angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) - indexer=0 - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - ####var_res=[] - - ####for rot in angels: - ####img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ####img_rot[img_rot!=0]=1 - #####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #####print(var_spectrum,'var_spectrum') - ####try: - ####var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####except: - ####var_spectrum=0 - ####var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 + if abs(angle) > early_slope_edge: + if angle < 0: + angles = np.linspace(-90, -12, n_tot_angles) + else: + angles = np.linspace(90, 12, n_tot_angles) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - #plt.plot(var_res) - #plt.show() - ##plt.plot(mom3_res) - ##plt.show() - #print(ang_int,'ang_int111') + else: + angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) early_slope_edge=22 - if abs(ang_int)>early_slope_edge and ang_int<0: - - angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ###var_res=[] - - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ###img_rot[img_rot!=0]=1 - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ###except: - ###var_spectrum=0 - ###var_res.append(var_spectrum) - - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif abs(ang_int)>early_slope_edge and ang_int>0: - - angels=np.linspace(90,25,int(n_tot_angles/2.)+10) - indexer=0 - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ###var_res=[] + if abs(angle) > early_slope_edge: + if angle < 0: + angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) + else: + angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ###img_rot[img_rot!=0]=1 - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####print(indexer,'indexer') - ###except: - ###var_spectrum=0 - - ###var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - return ang_int + return angle +def get_smallest_skew(img, sigma_des, angles, num_cores=1, plotter=None): + with Pool(processes=num_cores) as pool: + results = pool.map(partial(do_image_rotation, img=img, sigma_des=sigma_des), angles) + if plotter: + plotter.save_plot_of_rotation_angle(angles, results) + try: + var_res = np.array(results) + angle = angles[np.argmax(var_res)] + except: + angle = 0 + return angle From 5e0c1da7111690bd4898d928bfcde0c1de39c3b9 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 00:18:58 +0000 Subject: [PATCH 18/36] simplify --- src/eynollah/eynollah.py | 87 ++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 58 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 90824c8..3b43f7b 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -823,8 +823,8 @@ class Eynollah: def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") - img_height_model = model.layers[len(model.layers) - 1].output_shape[1] - img_width_model = model.layers[len(model.layers) - 1].output_shape[2] + img_height_model = model.layers[-1].output_shape[1] + img_width_model = model.layers[-1].output_shape[2] if not patches: img_h_page = img.shape[0] @@ -1034,8 +1034,8 @@ class Eynollah: def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction_new_concept") - img_height_model = model.layers[len(model.layers) - 1].output_shape[1] - img_width_model = model.layers[len(model.layers) - 1].output_shape[2] + img_height_model = model.layers[-1].output_shape[1] + img_width_model = model.layers[-1].output_shape[2] if not patches: img_h_page = img.shape[0] @@ -1043,7 +1043,7 @@ class Eynollah: img = img / 255.0 img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) + label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: @@ -4928,31 +4928,31 @@ class Eynollah: #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) @@ -5018,35 +5018,6 @@ class Eynollah: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) - else: - pass #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: From 21efea87116aeb7c89bea31a0227f614f19c9c6b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:36:57 +0000 Subject: [PATCH 19/36] no del on function argument --- src/eynollah/utils/contour.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 8a92ace..c5d56b8 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -39,7 +39,6 @@ def get_text_region_boxes_by_given_contours(contours): boxes.append([x, y, w, h]) contours_new.append(contours[jj]) - del contours return boxes, contours_new def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): From 25e967397d753a0fdfd1c4c9181cfc93f94414b7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 11:24:56 +0000 Subject: [PATCH 20/36] exit early if no text regions found (to avoid segfault) --- src/eynollah/eynollah.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 3b43f7b..d6ba8a9 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5019,6 +5019,20 @@ class Eynollah: contours_only_text_parent_d = [] contours_only_text_parent = [] + if not len(contours_only_text_parent): + # stop early + empty_marginals = [[]] * len(polygons_of_marginals) + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) + else: + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) @@ -5164,10 +5178,12 @@ class Eynollah: all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: return pcgts - else: contours_only_text_parent_h = None if self.reading_order_machine_based: From 68456ea0022b47f50fb7e2614358879b6484d0b0 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 11:30:38 +0000 Subject: [PATCH 21/36] do_work_of_slopes_new*, do_back_rotation_and_get_cnt_back, do_work_of_contours_in_image: use mp.Pool, simplify --- src/eynollah/eynollah.py | 454 +++++---------------------- src/eynollah/utils/contour.py | 176 +++-------- src/eynollah/utils/separate_lines.py | 207 +++++++++++- 3 files changed, 324 insertions(+), 513 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d6ba8a9..ae292c6 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -11,8 +11,9 @@ import os import sys import time import warnings +from functools import partial from pathlib import Path -from multiprocessing import Process, Queue, cpu_count +from multiprocessing import Pool, cpu_count import gc from ocrd_utils import getLogger import cv2 @@ -60,14 +61,20 @@ from .utils.contour import ( from .utils.rotate import ( rotate_image, rotation_not_90_func, - rotation_not_90_func_full_layout) + rotation_not_90_func_full_layout +) from .utils.separate_lines import ( textline_contours_postprocessing, separate_lines_new2, - return_deskew_slop) + return_deskew_slop, + do_work_of_slopes_new, + do_work_of_slopes_new_curved, + do_work_of_slopes_new_light, +) from .utils.drop_capitals import ( adhere_drop_capital_region_into_corresponding_textline, - filter_small_drop_capitals_from_no_patch_layout) + filter_small_drop_capitals_from_no_patch_layout +) from .utils.marginals import get_marginals from .utils.resize import resize_image from .utils import ( @@ -82,7 +89,8 @@ from .utils import ( small_textlines_to_parent_adherence2, order_of_regions, find_number_of_columns_in_document, - return_boxes_of_images_by_order_of_reading_new) + return_boxes_of_images_by_order_of_reading_new +) from .utils.pil_cv2 import check_dpi, pil2cv from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter @@ -1504,381 +1512,73 @@ class Eynollah: all_box_coord.append(crop_coor) - return slopes, all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))) + return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): - self.logger.debug("enter get_slopes_and_deskew_new") + if not len(contours): + return [], [], [], [], [], [], [] + self.logger.debug("enter get_slopes_and_deskew_new_light") if len(contours)>15: num_cores = cpu_count() else: num_cores = 1 - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(boxes), num_cores + 1) - indexes_by_text_con = np.array(range(len(contours_par))) - for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new_light, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) - for i in range(num_cores): - processes[i].start() - - slopes = [] - all_found_textline_polygons = [] - all_found_text_regions = [] - all_found_text_regions_par = [] - boxes = [] - all_box_coord = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - slopes_for_sub_process = list_all_par[0] - polys_for_sub_process = list_all_par[1] - boxes_for_sub_process = list_all_par[2] - contours_for_subprocess = list_all_par[3] - contours_par_for_subprocess = list_all_par[4] - boxes_coord_for_subprocess = list_all_par[5] - indexes_for_subprocess = list_all_par[6] - for j in range(len(slopes_for_sub_process)): - slopes.append(slopes_for_sub_process[j]) - all_found_textline_polygons.append(polys_for_sub_process[j]) - boxes.append(boxes_for_sub_process[j]) - all_found_text_regions.append(contours_for_subprocess[j]) - all_found_text_regions_par.append(contours_par_for_subprocess[j]) - all_box_coord.append(boxes_coord_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - for i in range(num_cores): - processes[i].join() - self.logger.debug('slopes %s', slopes) - self.logger.debug("exit get_slopes_and_deskew_new") - return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_slopes_new_light, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + logger=self.logger, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + plotter=self.plotter,), + zip(boxes, contours, contours_par, range(len(contours_par)))) + #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + self.logger.debug("exit get_slopes_and_deskew_new_light") + return tuple(zip(*results)) def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + if not len(contours): + return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") num_cores = cpu_count() - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(boxes), num_cores + 1) - indexes_by_text_con = np.array(range(len(contours_par))) - for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) - for i in range(num_cores): - processes[i].start() - - slopes = [] - all_found_textline_polygons = [] - all_found_text_regions = [] - all_found_text_regions_par = [] - boxes = [] - all_box_coord = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - slopes_for_sub_process = list_all_par[0] - polys_for_sub_process = list_all_par[1] - boxes_for_sub_process = list_all_par[2] - contours_for_subprocess = list_all_par[3] - contours_par_for_subprocess = list_all_par[4] - boxes_coord_for_subprocess = list_all_par[5] - indexes_for_subprocess = list_all_par[6] - for j in range(len(slopes_for_sub_process)): - slopes.append(slopes_for_sub_process[j]) - all_found_textline_polygons.append(polys_for_sub_process[j]) - boxes.append(boxes_for_sub_process[j]) - all_found_text_regions.append(contours_for_subprocess[j]) - all_found_text_regions_par.append(contours_par_for_subprocess[j]) - all_box_coord.append(boxes_coord_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - for i in range(num_cores): - processes[i].join() - self.logger.debug('slopes %s', slopes) + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_slopes_new, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + logger=self.logger, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + plotter=self.plotter,), + zip(boxes, contours, contours_par, range(len(contours_par)))) + #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") - return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con + return tuple(zip(*results)) def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + if not len(contours): + return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") num_cores = cpu_count() - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(boxes), num_cores + 1) - indexes_by_text_con = np.array(range(len(contours_par))) - - for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new_curved, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_text_con_per_process, slope_deskew))) - - for i in range(num_cores): - processes[i].start() - - slopes = [] - all_found_textline_polygons = [] - all_found_text_regions = [] - all_found_text_regions_par = [] - boxes = [] - all_box_coord = [] - all_index_text_con = [] - - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - polys_for_sub_process = list_all_par[0] - boxes_for_sub_process = list_all_par[1] - contours_for_subprocess = list_all_par[2] - contours_par_for_subprocess = list_all_par[3] - boxes_coord_for_subprocess = list_all_par[4] - indexes_for_subprocess = list_all_par[5] - slopes_for_sub_process = list_all_par[6] - for j in range(len(polys_for_sub_process)): - slopes.append(slopes_for_sub_process[j]) - all_found_textline_polygons.append(polys_for_sub_process[j][::-1]) - boxes.append(boxes_for_sub_process[j]) - all_found_text_regions.append(contours_for_subprocess[j]) - all_found_text_regions_par.append(contours_par_for_subprocess[j]) - all_box_coord.append(boxes_coord_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - # print(slopes,'slopes') - return all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con, slopes - - def do_work_of_slopes_new_curved(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_r_con_per_pro, slope_deskew): - self.logger.debug("enter do_work_of_slopes_new_curved") - slopes_per_each_subprocess = [] - bounding_box_of_textregion_per_each_subprocess = [] - textlines_rectangles_per_each_subprocess = [] - contours_textregion_per_each_subprocess = [] - contours_textregion_par_per_each_subprocess = [] - all_box_coord_per_process = [] - index_by_text_region_contours = [] - - textline_cnt_separated = np.zeros(textline_mask_tot_ea.shape) - - for mv in range(len(boxes_text)): - - all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - all_text_region_raw = all_text_region_raw.astype(np.uint8) - img_int_p = all_text_region_raw[:, :] - - # img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) - # plt.imshow(img_int_p) - # plt.show() - - if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: - slopes_per_each_subprocess.append(0) - slope_for_all = [slope_deskew][0] - else: - try: - textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008) - y_diff_mean = find_contours_mean_y_diff(textline_con_fil) - if self.isNaN(y_diff_mean): - slope_for_all = MAX_SLOPE - else: - sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) - img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter) - - if abs(slope_for_all) < 0.5: - slope_for_all = [slope_deskew][0] - - except Exception as why: - self.logger.error(why) - slope_for_all = MAX_SLOPE - - if slope_for_all == MAX_SLOPE: - slope_for_all = [slope_deskew][0] - slopes_per_each_subprocess.append(slope_for_all) - - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - _, crop_coor = crop_image_inside_box(boxes_text[mv], image_page_rotated) - - if abs(slope_for_all) < 45: - # all_box_coord.append(crop_coor) - textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) - cnt_o_t_max = contours_par_per_process[mv] - x, y, w, h = cv2.boundingRect(cnt_o_t_max) - mask_biggest = np.zeros(mask_texts_only.shape) - mask_biggest = cv2.fillPoly(mask_biggest, pts=[cnt_o_t_max], color=(1, 1, 1)) - mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] - textline_biggest_region = mask_biggest * textline_mask_tot_ea - - # print(slope_for_all,'slope_for_all') - textline_rotated_separated = separate_lines_new2(textline_biggest_region[y : y + h, x : x + w], 0, num_col, slope_for_all, plotter=self.plotter) - - # new line added - ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) - textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0 - # till here - - textline_cnt_separated[y : y + h, x : x + w] = textline_rotated_separated - textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated - - # plt.imshow(textline_region_in_image) - # plt.show() - # plt.imshow(textline_cnt_separated) - # plt.show() - - pixel_img = 1 - cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img) - - textlines_cnt_per_region = [] - for jjjj in range(len(cnt_textlines_in_image)): - mask_biggest2 = np.zeros(mask_texts_only.shape) - mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1)) - if num_col + 1 == 1: - mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5) - else: - mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) - - pixel_img = 1 - mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) - cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) - try: - textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) - except Exception as why: - self.logger.error(why) - else: - add_boxes_coor_into_textlines = True - textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], add_boxes_coor_into_textlines) - add_boxes_coor_into_textlines = False - # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') - - textlines_rectangles_per_each_subprocess.append(textlines_cnt_per_region) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - contours_textregion_per_each_subprocess.append(contours_per_process[mv]) - contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) - all_box_coord_per_process.append(crop_coor) - - queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess]) - def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): - self.logger.debug('enter do_work_of_slopes_new_light') - slopes_per_each_subprocess = [] - bounding_box_of_textregion_per_each_subprocess = [] - textlines_rectangles_per_each_subprocess = [] - contours_textregion_per_each_subprocess = [] - contours_textregion_par_per_each_subprocess = [] - all_box_coord_per_process = [] - index_by_text_region_contours = [] - for mv in range(len(boxes_text)): - _, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated) - mask_textline = np.zeros((textline_mask_tot_ea.shape)) - mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) - all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] - all_text_region_raw=all_text_region_raw.astype(np.uint8) - - slopes_per_each_subprocess.append([slope_deskew][0]) - mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) - mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - - - if self.textline_light: - all_text_region_raw = np.copy(textline_mask_tot_ea) - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) - cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - else: - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) - - textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - - contours_textregion_per_each_subprocess.append(contours_per_process[mv]) - contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) - all_box_coord_per_process.append(crop_coor) - queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - - def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): - self.logger.debug('enter do_work_of_slopes_new') - slopes_per_each_subprocess = [] - bounding_box_of_textregion_per_each_subprocess = [] - textlines_rectangles_per_each_subprocess = [] - contours_textregion_per_each_subprocess = [] - contours_textregion_par_per_each_subprocess = [] - all_box_coord_per_process = [] - index_by_text_region_contours = [] - for mv in range(len(boxes_text)): - _, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated) - mask_textline = np.zeros((textline_mask_tot_ea.shape)) - mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) - all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] - all_text_region_raw=all_text_region_raw.astype(np.uint8) - img_int_p=all_text_region_raw[:,:]#self.all_text_region_raw[mv] - img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) - - if img_int_p.shape[0]/img_int_p.shape[1]<0.1: - slopes_per_each_subprocess.append(0) - slope_for_all = [slope_deskew][0] - all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], 0) - textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - else: - try: - textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) - y_diff_mean = find_contours_mean_y_diff(textline_con_fil) - if self.isNaN(y_diff_mean): - slope_for_all = MAX_SLOPE - else: - sigma_des = int(y_diff_mean * (4.0 / 40.0)) - if sigma_des < 1: - sigma_des = 1 - img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter) - if abs(slope_for_all) <= 0.5: - slope_for_all = [slope_deskew][0] - except Exception as why: - self.logger.error(why) - slope_for_all = MAX_SLOPE - if slope_for_all == MAX_SLOPE: - slope_for_all = [slope_deskew][0] - slopes_per_each_subprocess.append(slope_for_all) - mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) - mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - - # plt.imshow(mask_only_con_region) - # plt.show() - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - - ##plt.imshow(textline_mask_tot_ea) - ##plt.show() - ##plt.imshow(all_text_region_raw) - ##plt.show() - ##plt.imshow(mask_only_con_region) - ##plt.show() - - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv]) - - textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - - contours_textregion_per_each_subprocess.append(contours_per_process[mv]) - contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) - all_box_coord_per_process.append(crop_coor) - queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_slopes_new_curved, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + mask_texts_only=mask_texts_only, + num_col=num_col, + scale_par=scale_par, + slope_deskew=slope_deskew, + logger=self.logger, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + plotter=self.plotter,), + zip(boxes, contours, contours_par, range(len(contours_par)))) + #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + self.logger.debug("exit get_slopes_and_deskew_new_curved") + return tuple(zip(*results)) def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') @@ -1923,6 +1623,7 @@ class Eynollah: prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) + self.logger.debug('exit textline_contours') return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') @@ -1959,6 +1660,7 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) + self.logger.debug('exit do_work_of_slopes') def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_extract_images_only") @@ -2069,6 +1771,7 @@ class Eynollah: polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + self.logger.debug("exit get_regions_extract_images_only") return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): @@ -2146,6 +1849,7 @@ class Eynollah: #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) + self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2269,9 +1973,11 @@ class Eynollah: #plt.imshow(textline_mask_tot_ea) #plt.show() #print("inside 4 ", time.time()-t_in) + self.logger.debug("exit get_regions_light_v") return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) + self.logger.debug("exit get_regions_light_v") return None, erosion_hurts, None, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): @@ -2392,6 +2098,7 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) + self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml except: @@ -2461,6 +2168,7 @@ class Eynollah: text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) erosion_hurts = True + self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2633,6 +2341,7 @@ class Eynollah: for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) + self.logger.debug("exit do_order_of_regions_full_layout") return order_text_new, id_of_texts_tot def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2743,6 +2452,7 @@ class Eynollah: for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) + self.logger.debug("exit do_order_of_regions_no_full_layout") return order_text_new, id_of_texts_tot def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, num_col_classifier): layout_org = np.copy(layout) @@ -5051,12 +4761,12 @@ class Eynollah: if not self.curved_line: if self.light_version: if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ @@ -5074,17 +4784,17 @@ class Eynollah: else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index c5d56b8..65331c2 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,10 +1,11 @@ +from functools import partial +from multiprocessing import cpu_count, Pool import cv2 import numpy as np from shapely import geometry from .rotate import rotate_image, rotation_image_new -from multiprocessing import Process, Queue, cpu_count -from multiprocessing import Pool + def contours_in_same_horizon(cy_main_hor): X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) @@ -29,7 +30,6 @@ def find_contours_mean_y_diff(contours_main): def get_text_region_boxes_by_given_contours(contours): - kernel = np.ones((5, 5), np.uint8) boxes = [] contours_new = [] @@ -144,73 +144,11 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): return contours_imgs -def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first): - cnts_org_per_each_subprocess = [] - index_by_text_region_contours = [] - for mv in range(len(contours_per_process)): - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contours_per_process[mv]], color=(1, 1, 1)) - - img_copy = rotation_image_new(img_copy, -slope_first) - - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) - cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - - - cnts_org_per_each_subprocess.append(cont_int[0]) - - queue_of_all_params.put([ cnts_org_per_each_subprocess, index_by_text_region_contours]) - - -def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): - - num_cores = cpu_count() - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(cnts), num_cores + 1) - indexes_by_text_con = np.array(range(len(cnts))) - for i in range(num_cores): - contours_per_process = cnts[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img,slope_first ))) - for i in range(num_cores): - processes[i].start() - cnts_org = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - contours_for_sub_process = list_all_par[0] - indexes_for_sub_process = list_all_par[1] - for j in range(len(contours_for_sub_process)): - cnts_org.append(contours_for_sub_process[j]) - all_index_text_con.append(indexes_for_sub_process[j]) - for i in range(num_cores): - processes[i].join() - - print(all_index_text_con) - return cnts_org -def loop_contour_image(index_l, cnts,img, slope_first): +def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1)) - - # plt.imshow(img_copy) - # plt.show() + img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) - # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') - # plt.imshow(img_copy) - # plt.show() img_copy = img_copy.astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) @@ -220,17 +158,22 @@ def loop_contour_image(index_l, cnts,img, slope_first): cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - # print(np.shape(cont_int[0])) - return cont_int[0] -def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): - cnts_org = [] - # print(cnts,'cnts') - with Pool(cpu_count()) as p: - cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) - - return cnts_org + return cont_int[0], index_r_con + +def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): + if not len(cnts): + return [], [] + num_cores = cpu_count() + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_contours_in_image, + img=img, + slope_first=slope_first, + ), + zip(cnts, range(len(cnts)))) + return tuple(zip(*results)) def get_textregion_contours_in_org_image(cnts, img, slope_first): @@ -292,69 +235,40 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): return cnts_org -def return_list_of_contours_with_desired_order(ls_cons, sorted_indexes): - return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] -def do_back_rotation_and_get_cnt_back(queue_of_all_params, contours_par_per_process,indexes_r_con_per_pro, img, slope_first): - contours_textregion_per_each_subprocess = [] - index_by_text_region_contours = [] - for mv in range(len(contours_par_per_process)): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) +def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first): + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) - img_copy = rotation_image_new(img_copy, -slope_first) + img_copy = rotation_image_new(img_copy, -slope_first) - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) - cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) - cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - # print(np.shape(cont_int[0])) - contours_textregion_per_each_subprocess.append(cont_int[0]*6) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - - queue_of_all_params.put([contours_textregion_per_each_subprocess, index_by_text_region_contours]) + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + # print(np.shape(cont_int[0])) + return cont_int[0], index_r_con def get_textregion_contours_in_org_image_light(cnts, img, slope_first): - num_cores = cpu_count() - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(cnts), num_cores + 1) - indexes_by_text_con = np.array(range(len(cnts))) - - h_o = img.shape[0] - w_o = img.shape[1] - - img = cv2.resize(img, (int(img.shape[1]/6.), int(img.shape[0]/6.)), interpolation=cv2.INTER_NEAREST) + if not len(cnts): + return [] + img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) #cnts = cnts/2 - cnts = [(i/ 6).astype(np.int32) for i in cnts] - - for i in range(num_cores): - contours_par_per_process = cnts[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_back_rotation_and_get_cnt_back, args=(queue_of_all_params, contours_par_per_process, indexes_text_con_per_process, img, slope_first))) - - for i in range(num_cores): - processes[i].start() - - cnts_org = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - contours_for_subprocess = list_all_par[0] - indexes_for_subprocess = list_all_par[1] - for j in range(len(contours_for_subprocess)): - cnts_org.append(contours_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - for i in range(num_cores): - processes[i].join() - - cnts_org = return_list_of_contours_with_desired_order(cnts_org, all_index_text_con) - - return cnts_org + cnts = [(i/6).astype(np.int) for i in cnts] + num_cores = cpu_count() + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_back_rotation_and_get_cnt_back, + img=img, + slope_first=slope_first, + ), + zip(cnts, range(len(cnts)))) + contours, indexes = tuple(zip(*results)) + return [i*6 for i in contours] def return_contours_of_interested_textline(region_pre_p, pixel): diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 36a1b01..922fa14 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1,22 +1,23 @@ +import os from functools import partial +from multiprocessing import Pool, cpu_count import numpy as np import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d -import os -from multiprocessing import Process, Queue, cpu_count -from multiprocessing import Pool from .rotate import rotate_image +from .resize import resize_image from .contour import ( return_parent_contours, filter_contours_area_of_image_tables, return_contours_of_image, - filter_contours_area_of_image + filter_contours_area_of_image, + return_contours_of_interested_textline, + find_contours_mean_y_diff, ) -from .is_nan import isNaN from . import ( find_num_col_deskew, - isNaN, + crop_image_inside_box, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): @@ -1249,13 +1250,13 @@ def separate_lines_new_inside_tiles(img_path, thetha): forest.append(peaks_neg[i + 1]) if diff_peaks[i] > cut_off: # print(forest[np.argmin(z[forest]) ] ) - if not isNaN(forest[np.argmin(z[forest])]): + if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) forest = [] forest.append(peaks_neg[i + 1]) if i == (len(peaks_neg) - 1): # print(print(forest[np.argmin(z[forest]) ] )) - if not isNaN(forest[np.argmin(z[forest])]): + if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) diff_peaks_pos = np.abs(np.diff(peaks)) @@ -1272,13 +1273,13 @@ def separate_lines_new_inside_tiles(img_path, thetha): forest.append(peaks[i + 1]) if diff_peaks_pos[i] > cut_off: # print(forest[np.argmin(z[forest]) ] ) - if not isNaN(forest[np.argmax(z[forest])]): + if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) forest = [] forest.append(peaks[i + 1]) if i == (len(peaks) - 1): # print(print(forest[np.argmin(z[forest]) ] )) - if not isNaN(forest[np.argmax(z[forest])]): + if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) # print(len(peaks_neg_true) ,len(peaks_pos_true) ,'lensss') @@ -1658,3 +1659,189 @@ def get_smallest_skew(img, sigma_des, angles, num_cores=1, plotter=None): except: angle = 0 return angle + +def do_work_of_slopes_new( + box_text, contour, contour_par, index_r_con, + textline_mask_tot_ea, image_page_rotated, slope_deskew, + logger, MAX_SLOPE=999, KERNEL=None, plotter=None +): + logger.debug('enter do_work_of_slopes_new') + if KERNEL is None: + KERNEL = np.ones((5, 5), np.uint8) + + x, y, w, h = box_text + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + mask_textline = np.zeros(textline_mask_tot_ea.shape) + mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) + all_text_region_raw = textline_mask_tot_ea * mask_textline + all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8) + img_int_p = all_text_region_raw[:,:] + img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) + + if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: + slope = 0 + slope_for_all = slope_deskew + all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, 0) + else: + try: + textline_con, hierarchy = return_contours_of_image(img_int_p) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) + y_diff_mean = find_contours_mean_y_diff(textline_con_fil) + if np.isnan(y_diff_mean): + slope_for_all = MAX_SLOPE + else: + sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) + img_int_p[img_int_p > 0] = 1 + slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + if abs(slope_for_all) <= 0.5: + slope_for_all = slope_deskew + except Exception as why: + logger.error(why) + slope_for_all = MAX_SLOPE + + if slope_for_all == MAX_SLOPE: + slope_for_all = slope_deskew + slope = slope_for_all + + mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) + mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1)) + + # plt.imshow(mask_only_con_region) + # plt.show() + all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy() + mask_only_con_region = mask_only_con_region[y: y + h, x: x + w] + + ##plt.imshow(textline_mask_tot_ea) + ##plt.show() + ##plt.imshow(all_text_region_raw) + ##plt.show() + ##plt.imshow(mask_only_con_region) + ##plt.show() + + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text) + + return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope + + +def do_work_of_slopes_new_curved( + box_text, contour, contour_par, index_r_con, + textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, + logger, MAX_SLOPE=999, KERNEL=None, plotter=None +): + logger.debug("enter do_work_of_slopes_new_curved") + if KERNEL is None: + KERNEL = np.ones((5, 5), np.uint8) + + x, y, w, h = box_text + all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].astype(np.uint8) + img_int_p = all_text_region_raw[:, :] + + # img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) + # plt.imshow(img_int_p) + # plt.show() + + if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: + slope = 0 + slope_for_all = slope_deskew + else: + try: + textline_con, hierarchy = return_contours_of_image(img_int_p) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008) + y_diff_mean = find_contours_mean_y_diff(textline_con_fil) + if np.isnan(y_diff_mean): + slope_for_all = MAX_SLOPE + else: + sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) + img_int_p[img_int_p > 0] = 1 + slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + if abs(slope_for_all) < 0.5: + slope_for_all = slope_deskew + except Exception as why: + logger.error(why) + slope_for_all = MAX_SLOPE + + if slope_for_all == MAX_SLOPE: + slope_for_all = slope_deskew + slope = slope_for_all + + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + + if abs(slope_for_all) < 45: + textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) + x, y, w, h = cv2.boundingRect(contour_par) + mask_biggest = np.zeros(mask_texts_only.shape) + mask_biggest = cv2.fillPoly(mask_biggest, pts=[contour_par], color=(1, 1, 1)) + mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] + textline_biggest_region = mask_biggest * textline_mask_tot_ea + + # print(slope_for_all,'slope_for_all') + textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all, + plotter=plotter) + + # new line added + ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) + textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0 + # till here + + textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated + + # plt.imshow(textline_region_in_image) + # plt.show() + + pixel_img = 1 + cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img) + + textlines_cnt_per_region = [] + for jjjj in range(len(cnt_textlines_in_image)): + mask_biggest2 = np.zeros(mask_texts_only.shape) + mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1)) + if num_col + 1 == 1: + mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5) + else: + mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) + + pixel_img = 1 + mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) + cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) + try: + textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) + except Exception as why: + logger.error(why) + else: + textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) + # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') + + return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope + +def do_work_of_slopes_new_light( + box_text, contour, contour_par, index_r_con, + textline_mask_tot_ea, image_page_rotated, slope_deskew, + logger +): + logger.debug('enter do_work_of_slopes_new_light') + + x, y, w, h = box_text + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + mask_textline = np.zeros(textline_mask_tot_ea.shape) + mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) + all_text_region_raw = textline_mask_tot_ea * mask_textline + all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8) + + mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) + mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1)) + + if self.textline_light: + all_text_region_raw = np.copy(textline_mask_tot_ea) + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) + cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, + max_area=1, min_area=0.00001) + else: + all_text_region_raw = np.copy(textline_mask_tot_ea[y: y + h, x: x + w]) + mask_only_con_region = mask_only_con_region[y: y + h, x: x + w] + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) + + return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope From 7e9ee90e6ec5e3e8455e75f24ec4a0c4e4b95d70 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 12:18:29 +0000 Subject: [PATCH 22/36] switch from (ad-hoc) mp.Pool to (attribute) concurrent.futures.ProcessPoolExecutor --- src/eynollah/eynollah.py | 88 ++++++++++++---------------- src/eynollah/utils/contour.py | 31 ++++------ src/eynollah/utils/separate_lines.py | 67 ++++++++++++--------- 3 files changed, 91 insertions(+), 95 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index ae292c6..8c92b92 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -13,7 +13,8 @@ import time import warnings from functools import partial from pathlib import Path -from multiprocessing import Pool, cpu_count +from multiprocessing import cpu_count +from concurrent.futures import ProcessPoolExecutor import gc from ocrd_utils import getLogger import cv2 @@ -251,6 +252,8 @@ class Eynollah: textline_light = self.textline_light, pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') + # for parallelization of CPU-intensive tasks: + self.executor = ProcessPoolExecutor(max_workers=cpu_count()) self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" @@ -1518,21 +1521,15 @@ class Eynollah: if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") - if len(contours)>15: - num_cores = cpu_count() - else: - num_cores = 1 - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_slopes_new_light, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - logger=self.logger, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - plotter=self.plotter,), - zip(boxes, contours, contours_par, range(len(contours_par)))) + results = self.executor.map(partial(do_work_of_slopes_new_light, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) @@ -1541,18 +1538,15 @@ class Eynollah: if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_slopes_new, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - logger=self.logger, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - plotter=self.plotter,), - zip(boxes, contours, contours_par, range(len(contours_par)))) + results = self.executor.map(partial(do_work_of_slopes_new, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) @@ -1561,21 +1555,18 @@ class Eynollah: if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_slopes_new_curved, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - mask_texts_only=mask_texts_only, - num_col=num_col, - scale_par=scale_par, - slope_deskew=slope_deskew, - logger=self.logger, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - plotter=self.plotter,), - zip(boxes, contours, contours_par, range(len(contours_par)))) + results = self.executor.map(partial(do_work_of_slopes_new_curved, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + mask_texts_only=mask_texts_only, + num_col=num_col, + scale_par=scale_par, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) @@ -1643,7 +1634,8 @@ class Eynollah: y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 - slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, plotter=self.plotter) + slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, + map=self.executor.map, logger=self.logger, plotter=self.plotter) except Exception as why: self.logger.error(why) slope_corresponding_textregion = MAX_SLOPE @@ -2932,10 +2924,8 @@ class Eynollah: def run_deskew(self, textline_mask_tot_ea): #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') - sigma = 2 - main_page_deskew = True - n_total_angles = 30 - slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, n_total_angles, main_page_deskew, plotter=self.plotter) + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, + map=self.executor.map, logger=self.logger, plotter=self.plotter) slope_first = 0 if self.plotter: @@ -4748,7 +4738,7 @@ class Eynollah: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 65331c2..e47c5e7 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,5 +1,4 @@ from functools import partial -from multiprocessing import cpu_count, Pool import cv2 import numpy as np from shapely import geometry @@ -162,17 +161,14 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): return cont_int[0], index_r_con -def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): +def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): if not len(cnts): return [], [] - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_contours_in_image, - img=img, - slope_first=slope_first, - ), - zip(cnts, range(len(cnts)))) + results = map(partial(do_work_of_contours_in_image, + img=img, + slope_first=slope_first, + ), + cnts, range(len(cnts))) return tuple(zip(*results)) def get_textregion_contours_in_org_image(cnts, img, slope_first): @@ -252,21 +248,18 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first # print(np.shape(cont_int[0])) return cont_int[0], index_r_con -def get_textregion_contours_in_org_image_light(cnts, img, slope_first): +def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map): if not len(cnts): return [] img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) #cnts = cnts/2 cnts = [(i/6).astype(np.int) for i in cnts] - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_back_rotation_and_get_cnt_back, - img=img, - slope_first=slope_first, - ), - zip(cnts, range(len(cnts)))) + results = map(partial(do_back_rotation_and_get_cnt_back, + img=img, + slope_first=slope_first, + ), + cnts, range(len(cnts))) contours, indexes = tuple(zip(*results)) return [i*6 for i in contours] diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 922fa14..48e1c5b 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1,6 +1,6 @@ import os +from logging import getLogger from functools import partial -from multiprocessing import Pool, cpu_count import numpy as np import cv2 from scipy.signal import find_peaks @@ -1464,7 +1464,9 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest return contours_rotated_clean -def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): +def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): + if logger is None: + logger = getLogger(__package__) if num_col == 1: num_patches = int(img_path.shape[1] / 200.0) @@ -1572,18 +1574,20 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def do_image_rotation(angle, img, sigma_des): - print(f"rotating image by {angle}") +def do_image_rotation(angle, img, sigma_des, logger=None): + if logger is None: + logger = getLogger(__package__) img_rot = rotate_image(img, angle) img_rot[img_rot!=0] = 1 try: var = find_num_col_deskew(img_rot, sigma_des, 20.3) except: + logger.exception("cannot determine variance for angle %.2f°", angle) var = 0 return var -def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): - num_cores = cpu_count() +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, + main_page=False, logger=None, plotter=None, map=map): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1615,16 +1619,16 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angles = np.array([-45, 0, 45, 90,]) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) elif main_page: #plt.imshow(img_resized) #plt.show() angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=11 if abs(angle) > early_slope_edge: @@ -1632,11 +1636,11 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals angles = np.linspace(-90, -12, n_tot_angles) else: angles = np.linspace(90, 12, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) else: angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=22 if abs(angle) > early_slope_edge: @@ -1644,30 +1648,35 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) else: angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) return angle -def get_smallest_skew(img, sigma_des, angles, num_cores=1, plotter=None): - with Pool(processes=num_cores) as pool: - results = pool.map(partial(do_image_rotation, img=img, sigma_des=sigma_des), angles) +def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): + if logger is None: + logger = getLogger(__package__) + results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles)) if plotter: plotter.save_plot_of_rotation_angle(angles, results) try: var_res = np.array(results) + assert var_res.any() angle = angles[np.argmax(var_res)] except: + logger.exception("cannot determine best angle among %s", str(angles)) angle = 0 return angle def do_work_of_slopes_new( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, slope_deskew, - logger, MAX_SLOPE=999, KERNEL=None, plotter=None + logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): - logger.debug('enter do_work_of_slopes_new') if KERNEL is None: KERNEL = np.ones((5, 5), np.uint8) + if logger is None: + logger = getLogger(__package__) + logger.debug('enter do_work_of_slopes_new') x, y, w, h = box_text _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) @@ -1693,11 +1702,11 @@ def do_work_of_slopes_new( else: sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + slope_for_all = return_deskew_slop(img_int_p, sigma_des, logger=logger, plotter=plotter) if abs(slope_for_all) <= 0.5: slope_for_all = slope_deskew - except Exception as why: - logger.error(why) + except: + logger.exception("cannot determine angle of contours") slope_for_all = MAX_SLOPE if slope_for_all == MAX_SLOPE: @@ -1728,11 +1737,13 @@ def do_work_of_slopes_new( def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, - logger, MAX_SLOPE=999, KERNEL=None, plotter=None + logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): - logger.debug("enter do_work_of_slopes_new_curved") if KERNEL is None: KERNEL = np.ones((5, 5), np.uint8) + if logger is None: + logger = getLogger(__package__) + logger.debug("enter do_work_of_slopes_new_curved") x, y, w, h = box_text all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].astype(np.uint8) @@ -1755,11 +1766,11 @@ def do_work_of_slopes_new_curved( else: sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + slope_for_all = return_deskew_slop(img_int_p, sigma_des, logger=logger, plotter=plotter) if abs(slope_for_all) < 0.5: slope_for_all = slope_deskew - except Exception as why: - logger.error(why) + except: + logger.exception("cannot determine angle of contours") slope_for_all = MAX_SLOPE if slope_for_all == MAX_SLOPE: @@ -1778,7 +1789,7 @@ def do_work_of_slopes_new_curved( # print(slope_for_all,'slope_for_all') textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all, - plotter=plotter) + logger=logger, plotter=plotter) # new line added ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) @@ -1818,8 +1829,10 @@ def do_work_of_slopes_new_curved( def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, slope_deskew, - logger + logger=None ): + if logger is None: + logger = getLogger(__package__) logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text From 3b70b11ea6e18b00e70ec0169ebe4963d91fd364 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:36:20 +0000 Subject: [PATCH 23/36] avoid deskewing patches if binary-empty --- src/eynollah/utils/separate_lines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 48e1c5b..788a510 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1515,9 +1515,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] img_xline = img_patch_ineterst[:, index_x_d:index_x_u] - sigma = 2 try: - slope_xline = return_deskew_slop(img_xline, sigma, plotter=plotter) + assert img_xline.any() + slope_xline = return_deskew_slop(img_xline, 2, logger=logger, plotter=plotter) except: slope_xline = 0 From 9270ea4550a39a07699338693962c6dedf146097 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:37:20 +0000 Subject: [PATCH 24/36] annotate region angles in PAGE --- src/eynollah/writer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 496b3db..5f282f2 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -60,6 +60,7 @@ class EynollahXmlWriter(): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) marginal_region.add_TextLine(textline) + marginal_region.set_orientation(slopes_marginals[marginal_idx]) points_co = '' for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): if not (self.curved_line or self.textline_light): @@ -102,6 +103,7 @@ class EynollahXmlWriter(): if ocr_all_textlines_textregion: textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) text_region.add_TextLine(textline) + text_region.set_orientation(slopes[region_idx]) region_bboxes = all_box_coord[region_idx] points_co = '' for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): From b9ca7a6191f672c4b9f0da0179ff0d49cc3d63be Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:44:54 +0000 Subject: [PATCH 25/36] log num_cols-dependent resizing --- src/eynollah/eynollah.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8c92b92..8b8808c 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1861,6 +1861,8 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + self.logger.debug("resized to %dx%d for %d cols", + img_resized.shape[1], img_resized.shape[0], num_col_classifier) prediction_regions_org = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) @@ -1873,6 +1875,8 @@ class Eynollah: else: new_h = (900+ (num_col_classifier-3)*100) img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) + self.logger.debug("resized to %dx%d (new_h=%d) for %d cols", + img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) prediction_regions_org = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) From b4b0890294d2dc1fbf6ca84794587d5185a7546f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:45:18 +0000 Subject: [PATCH 26/36] add option to overwrite output xml, but skip by default if file exists --- src/eynollah/cli.py | 9 ++++++++- src/eynollah/eynollah.py | 13 +++++++++++-- src/eynollah/writer.py | 6 +++--- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 5f4b5a4..a9b5765 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -97,6 +97,12 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) type=click.Path(exists=True, file_okay=False), required=True, ) +@click.option( + "--overwrite", + "-O", + help="overwrite (instead of skipping) if output xml exists", + is_flag=True, +) @click.option( "--dir_in", "-di", @@ -253,7 +259,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): +def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): initLogging() if log_level: getLogger('eynollah').setLevel(getLevelName(log_level)) @@ -273,6 +279,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s sys.exit(1) eynollah = Eynollah( image_filename=image, + overwrite=overwrite, dir_out=out, dir_in=dir_in, dir_models=model, diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8b8808c..8883f19 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -165,6 +165,7 @@ class Eynollah: image_filename=None, image_pil=None, image_filename_stem=None, + overwrite=False, dir_out=None, dir_in=None, dir_of_cropped_images=None, @@ -203,6 +204,7 @@ class Eynollah: if override_dpi: self.dpi = override_dpi self.image_filename = image_filename + self.overwrite = overwrite self.dir_out = dir_out self.dir_in = dir_in self.dir_of_all = dir_of_all @@ -360,6 +362,7 @@ class Eynollah: curved_line=self.curved_line, textline_light = self.textline_light, pcgts=self.pcgts) + def imread(self, grayscale=False, uint8=True): key = 'img' if grayscale: @@ -4460,8 +4463,14 @@ class Eynollah: if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) #print("text region early -11 in %.1fs", time.time() - t0) - - + + if os.path.exists(self.writer.output_filename): + if self.overwrite: + self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename) + else: + self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) + continue + if self.extract_only_images: img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 5f282f2..dc5a5dc 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -28,6 +28,7 @@ class EynollahXmlWriter(): self.counter = EynollahIdCounter() self.dir_out = dir_out self.image_filename = image_filename + self.output_filename = os.path.join(self.dir_out, self.image_filename_stem) + ".xml" self.curved_line = curved_line self.textline_light = textline_light self.pcgts = pcgts @@ -163,9 +164,8 @@ class EynollahXmlWriter(): coords.set_points(points_co[:-1]) def write_pagexml(self, pcgts): - out_fname = os.path.join(self.dir_out, self.image_filename_stem) + ".xml" - self.logger.info("output filename: '%s'", out_fname) - with open(out_fname, 'w') as f: + self.logger.info("output filename: '%s'", self.output_filename) + with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines): From dcaf79628371d03e2eed790c792930ba30079545 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 23:07:56 +0000 Subject: [PATCH 27/36] change polarity of orientation angle (PAGE schema required cw=positive) --- src/eynollah/writer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index dc5a5dc..66747b1 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -61,7 +61,7 @@ class EynollahXmlWriter(): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) marginal_region.add_TextLine(textline) - marginal_region.set_orientation(slopes_marginals[marginal_idx]) + marginal_region.set_orientation(-slopes_marginals[marginal_idx]) points_co = '' for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): if not (self.curved_line or self.textline_light): @@ -104,7 +104,7 @@ class EynollahXmlWriter(): if ocr_all_textlines_textregion: textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) text_region.add_TextLine(textline) - text_region.set_orientation(slopes[region_idx]) + text_region.set_orientation(-slopes[region_idx]) region_bboxes = all_box_coord[region_idx] points_co = '' for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): From e9c0d716f62659466c66ae9c8b634cd22634e181 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 23:48:56 +0000 Subject: [PATCH 28/36] CI: install optional dependencies, too --- .github/workflows/test-eynollah.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 8a6941f..479c371 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -36,7 +36,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install . + pip install .[OCR,plotting] pip install -r requirements-test.txt - name: Test with pytest run: make test From 0e8c561618ce267259f8e9b354f151e4b2fd040d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 14 Dec 2024 00:24:29 +0100 Subject: [PATCH 29/36] debugging issues --- src/eynollah/eynollah.py | 831 ++++++++++++++------------- src/eynollah/utils/separate_lines.py | 6 +- 2 files changed, 419 insertions(+), 418 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8883f19..443b5e9 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -195,6 +195,8 @@ class Eynollah: logger=None, pcgts=None, ): + if skip_layout_and_reading_order: + textline_light = True self.light_version = light_version if not dir_in: if image_pil: @@ -1512,7 +1514,7 @@ class Eynollah: textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] all_found_textline_polygons.append(textlines_ins) - slopes.append(0) + slopes.append(slope_deskew) _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) @@ -1527,11 +1529,8 @@ class Eynollah: results = self.executor.map(partial(do_work_of_slopes_new_light, textline_mask_tot_ea=textline_mask_tot, image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - logger=self.logger, - plotter=self.plotter,), + slope_deskew=slope_deskew,textline_light=self.textline_light, + logger=self.logger,), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") @@ -4245,7 +4244,7 @@ class Eynollah: - def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline): + def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): ###contours_txtline_of_all_textregions = [] @@ -4282,8 +4281,9 @@ class Eynollah: contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) text_con_org.pop(ind_u_a_trs) + contours_only_text_parent_d_ordered.pop(ind_u_a_trs) - return contours, text_con_org, contours_textline + return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): @@ -4470,7 +4470,7 @@ class Eynollah: else: self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) continue - + if self.extract_only_images: img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) @@ -4487,12 +4487,9 @@ class Eynollah: continue else: return pcgts - - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) - t1 = time.time() if self.skip_layout_and_reading_order: + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) @@ -4522,467 +4519,471 @@ class Eynollah: polygons_lines_xml = [] contours_tables = [] ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if self.dir_in: + self.writer.write_pagexml(pcgts) continue else: return pcgts + if not self.extract_only_images and not self.skip_layout_and_reading_order: + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) + #print("text region early -1 in %.1fs", time.time() - t0) + t1 = time.time() + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - #plt.imshow(table_prediction) - #plt.show() + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + #plt.imshow(table_prediction) + #plt.show() - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - elif num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] - if num_col_classifier == 1: - img_w_new = 2000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 2400 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ - self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ - self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - - - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + elif num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + else: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - - if not len(contours_only_text_parent): - # stop early - empty_marginals = [[]] * len(polygons_of_marginals) - if self.full_layout: - pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) - else: - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) - self.logger.info("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + if not len(contours_only_text_parent): + # stop early + empty_marginals = [[]] * len(polygons_of_marginals) + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) + else: + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - if self.textline_light: - #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) - contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) + else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + scale_param = 1 + textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - else: - scale_param = 1 - textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - fun = check_any_text_region_in_model_one_is_main_or_header_light - else: - fun = check_any_text_region_in_model_one_is_main_or_header - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ - all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ - fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, - all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + fun = check_any_text_region_in_model_one_is_main_or_header_light + else: + fun = check_any_text_region_in_model_one_is_main_or_header + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ + all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ + fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 + + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + else: + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() - if self.full_layout: + if self.full_layout: - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - if self.ocr: - ocr_all_textlines = [] - else: - ocr_all_textlines = None + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - - if self.ocr: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + + + if self.ocr: + + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + ocr_textline_in_textregion.append(text_ocr) - ocr_textline_in_textregion.append(text_ocr) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + if self.dir_in: + self.writer.write_pagexml(pcgts) self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) - - if self.dir_in: - self.writer.write_pagexml(pcgts) - self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs" % (time.time() - t0)) + #print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 788a510..f037a9f 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1828,7 +1828,7 @@ def do_work_of_slopes_new_curved( def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, + textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, logger=None ): if logger is None: @@ -1845,7 +1845,7 @@ def do_work_of_slopes_new_light( mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1)) - if self.textline_light: + if textline_light: all_text_region_raw = np.copy(textline_mask_tot_ea) all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) @@ -1857,4 +1857,4 @@ def do_work_of_slopes_new_light( all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) - return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope + return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope_deskew From f93c6c288d9525202957da5bb000202a657e6df8 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 14 Dec 2024 02:50:17 +0100 Subject: [PATCH 30/36] function of patch-wise inference with scatter_nd is added --- src/eynollah/eynollah.py | 107 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 443b5e9..28cb330 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1047,6 +1047,110 @@ class Eynollah: #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] return img_scaled_padded#, label_scaled_padded + def do_prediction_new_concept_scatter_nd(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): + self.logger.debug("enter do_prediction_new_concept") + + img_height_model = model.layers[-1].output_shape[1] + img_width_model = model.layers[-1].output_shape[2] + + if not patches: + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / 255.0 + img = resize_image(img, img_height_model, img_width_model) + + label_p_pred = model.predict(img[np.newaxis], verbose=0) + seg = np.argmax(label_p_pred, axis=3)[0] + + if thresholding_for_artificial_class_in_light_version: + #seg_text = label_p_pred[0,:,:,1] + #seg_text[seg_text<0.2] =0 + #seg_text[seg_text>0] =1 + #seg[seg_text==1]=1 + + seg_art = label_p_pred[0,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + seg[seg_art==1]=4 + + + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) + prediction_true = prediction_true.astype(np.uint8) + return prediction_true + + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) + + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) + + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) + ##margin = int(marginal_of_patch_percent * img_height_model) + #width_mid = img_width_model - 2 * margin + #height_mid = img_height_model - 2 * margin + img = img / float(255.0) + + img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + + stride_x = img_width_model - 100 + stride_y = img_height_model - 100 + + one_tensor = tf.ones_like(img) + img_patches = tf.image.extract_patches(images=[img,one_tensor], + sizes=[1, img_height_model, img_width_model, 1], + strides=[1, stride_y, stride_x, 1], + rates=[1, 1, 1, 1], + padding='SAME') + + one_patches = img_patches[1] + img_patches = img_patches[0] + img_patches = tf.squeeze(img_patches) + + img_patches_resh = tf.reshape(img_patches, shape = (img_patches.shape[0]*img_patches.shape[1], img_height_model, img_width_model, 3)) + + pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) + + one_patches = tf.squeeze(one_patches) + one_patches = tf.reshape(one_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model,img_width_model,3]) + + x = tf.range(img.shape[1]) + y = tf.range(img.shape[0]) + x, y = tf.meshgrid(x, y) + indices = tf.stack([y, x], axis=-1) + + indices_patches = tf.image.extract_patches(images=tf.expand_dims(indices, axis=0), sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') + indices_patches = tf.squeeze(indices_patches) + indices_patches = tf.reshape(indices_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model, img_width_model,2]) + + margin_y = int( (img_height_model - stride_y)/2. ) + margin_x = int( (img_width_model - stride_x)/2. ) + + mask_margin = np.zeros((img_height_model, img_width_model)) + + mask_margin[margin_y:img_height_model-margin_y, margin_x:img_width_model-margin_x] = 1 + + indices_patches_array = indices_patches.numpy() + + for i in range(indices_patches_array.shape[0]): + indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin + indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin + + reconstructed = tf.scatter_nd(indices=indices_patches_array, updates=pred_patches, shape=(img.shape[0],img.shape[1],pred_patches.shape[-1])) + reconstructed_argmax = reconstructed.numpy() + + prediction_true = np.argmax(reconstructed_argmax, axis=2) + prediction_true = prediction_true.astype(np.uint8) + + gc.collect() + return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2) + + + + + def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction_new_concept") @@ -4891,7 +4995,7 @@ class Eynollah: all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.writer.write_pagexml(pcgts) continue @@ -4975,6 +5079,7 @@ class Eynollah: pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + #print("Job done in %.1fs" % (time.time() - t0)) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts From 0ae28f7d3ef33d6bf3650b99bc7646c3234341d1 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 14 Dec 2024 12:15:56 +0000 Subject: [PATCH 31/36] switch from stdlib to loky.ProcessPoolExecutor, ensure shutdown --- requirements.txt | 1 + src/eynollah/eynollah.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index d72df29..ef3fe31 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ scikit-learn >= 0.23.2 tensorflow < 2.13 imutils >= 0.5.3 numba <= 0.58.1 +loky diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 28cb330..8139b11 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -10,11 +10,12 @@ import math import os import sys import time +import atexit import warnings from functools import partial from pathlib import Path from multiprocessing import cpu_count -from concurrent.futures import ProcessPoolExecutor +from loky import ProcessPoolExecutor import gc from ocrd_utils import getLogger import cv2 @@ -257,7 +258,8 @@ class Eynollah: pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') # for parallelization of CPU-intensive tasks: - self.executor = ProcessPoolExecutor(max_workers=cpu_count()) + self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) + atexit.register(self.executor.shutdown) self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" From 01376af9055440366fb7effece949e903a7de710 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 22 Dec 2024 13:10:05 +0000 Subject: [PATCH 32/36] do_order_of_regions_with_model: simplify --- src/eynollah/eynollah.py | 298 ++++++++------------------------------- 1 file changed, 59 insertions(+), 239 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8139b11..651bd17 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -2771,6 +2771,7 @@ class Eynollah: for ijv in range(len(y_min_tab_col1)): image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table return image_revised_last + def do_order_of_regions(self, *args, **kwargs): if self.full_layout: return self.do_order_of_regions_full_layout(*args, **kwargs) @@ -3380,171 +3381,35 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model + def do_order_of_regions_with_model(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] - - img_poly = np.zeros((y_len,x_len), dtype='uint8') - - unique_pix = np.unique(text_regions_p) - + img_poly = np.zeros((y_len,x_len), dtype='uint8') img_poly[text_regions_p[:,:]==1] = 1 img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - if not self.dir_in: - self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) - - height1 =672#448 - width1 = 448#224 - - height2 =672#448 - width2= 448#224 - - height3 =672#448 - width3 = 448#224 - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - if contours_only_text_parent_h: _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) for j in range(len(cy_main)): img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - + co_text_all = contours_only_text_parent + contours_only_text_parent_h else: co_text_all = contours_only_text_parent + if not len(co_text_all): + return [], [] - labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + labels_con = np.zeros((y_len, x_len, len(co_text_all)), dtype=bool) for i in range(len(co_text_all)): - img_label = np.zeros((y_len,x_len,3),dtype='uint8') - img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) - labels_con[:,:,i] = img_label[:,:,0] - - - img3= np.copy(img_poly) - - labels_con = resize_image(labels_con, height1, width1) - - img_header_and_sep = resize_image(img_header_and_sep, height1, width1) - - img3= resize_image (img3, height3, width3) - - img3 = img3.astype(np.uint16) - - - order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 - inference_bs = 6 - tot_counter = 1 - batch_counter = 0 - i_indexer = [] - j_indexer =[] - - input_1= np.zeros( (inference_bs, height1, width1,3)) - - tot_iteration = int( ( labels_con.shape[2]*(labels_con.shape[2]-1) )/2. ) - full_bs_ite= tot_iteration//inference_bs - last_bs = tot_iteration % inference_bs - - #print(labels_con.shape[2],"number of regions for reading order") - for i in range(labels_con.shape[2]): - for j in range(labels_con.shape[2]): - if j>i: - img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) - img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) - - img2[:,:,0][img3[:,:]==5] = 2 - img2[:,:,0][img_header_and_sep[:,:]==1] = 3 - - img1[:,:,0][img3[:,:]==5] = 2 - img1[:,:,0][img_header_and_sep[:,:]==1] = 3 - - - i_indexer.append(i) - j_indexer.append(j) - - input_1[batch_counter,:,:,0] = img1[:,:,0]/3. - input_1[batch_counter,:,:,2] = img2[:,:,0]/3. - input_1[batch_counter,:,:,1] = img3[:,:]/5. - - batch_counter = batch_counter+1 - - if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr = self.model_reading_order.predict(input_1 , verbose=0) - - if batch_counter==inference_bs: - iteration_batches = inference_bs - else: - iteration_batches = last_bs - for jb in range(iteration_batches): - if y_pr[jb][0]>=0.5: - order_class = 1 - else: - order_class = 0 - - order_matrix[i_indexer[jb],j_indexer[jb]] = y_pr[jb][0]#order_class - order_matrix[j_indexer[jb],i_indexer[jb]] = 1-y_pr[jb][0]#int( 1 - order_class) - - batch_counter = 0 - - i_indexer = [] - j_indexer = [] - tot_counter = tot_counter+1 - - - sum_mat = np.sum(order_matrix, axis=1) - index_sort = np.argsort(sum_mat) - index_sort = index_sort[::-1] - - REGION_ID_TEMPLATE = 'region_%04d' - order_of_texts = [] - id_of_texts = [] - for order, id_text in enumerate(index_sort): - order_of_texts.append(id_text) - id_of_texts.append( REGION_ID_TEMPLATE % order ) - - - return order_of_texts, id_of_texts - - def update_list_and_return_first_with_length_bigger_than_one(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): - list_inp.pop(index_element_to_be_updated) - if len(pr_list)>0: - list_inp.insert(index_element_to_be_updated, pr_list) - else: - index_element_to_be_updated = index_element_to_be_updated -1 - - list_inp.insert(index_element_to_be_updated+1, [innner_index_pr_pos]) - if len(pos_list)>0: - list_inp.insert(index_element_to_be_updated+2, pos_list) - - len_all_elements = [len(i) for i in list_inp] - list_len_bigger_1 = np.where(np.array(len_all_elements)>1) - list_len_bigger_1 = list_len_bigger_1[0] - - if len(list_len_bigger_1)>0: - early_list_bigger_than_one = list_len_bigger_1[0] - else: - early_list_bigger_than_one = -20 - return list_inp, early_list_bigger_than_one - def do_order_of_regions_with_model_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): - y_len = text_regions_p.shape[0] - x_len = text_regions_p.shape[1] - - img_poly = np.zeros((y_len,x_len), dtype='uint8') - - unique_pix = np.unique(text_regions_p) - - - img_poly[text_regions_p[:,:]==1] = 1 - img_poly[text_regions_p[:,:]==2] = 2 - img_poly[text_regions_p[:,:]==3] = 4 - img_poly[text_regions_p[:,:]==6] = 5 - - if not self.dir_in: - self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) + img = labels_con[:,:,i].astype(np.uint8) + cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,)) + labels_con[:,:,i] = img height1 =672#448 width1 = 448#224 @@ -3554,112 +3419,67 @@ class Eynollah: height3 =672#448 width3 = 448#224 - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - if contours_only_text_parent_h: - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h - else: - co_text_all = contours_only_text_parent - - - labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') - for i in range(len(co_text_all)): - img_label = np.zeros((y_len,x_len,3),dtype='uint8') - img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) - labels_con[:,:,i] = img_label[:,:,0] - - - img3= np.copy(img_poly) - - labels_con = resize_image(labels_con, height1, width1) + labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool) img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + img_poly = resize_image(img_poly, height3, width3) - img3= resize_image (img3, height3, width3) + if not self.dir_in: + self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) - img3 = img3.astype(np.uint16) - inference_bs = 3 - input_1= np.zeros( (inference_bs, height1, width1,3)) - starting_list_of_regions = [] - if len(co_text_all)<=1: - starting_list_of_regions.append( list(range(1)) ) - else: - starting_list_of_regions.append( list(range(labels_con.shape[2])) ) + input_1 = np.zeros((inference_bs, height1, width1, 3)) + ordered = [list(range(len(co_text_all)))] index_update = 0 - index_selected = starting_list_of_regions[0] #print(labels_con.shape[2],"number of regions for reading order") while index_update>=0: - ij_list = starting_list_of_regions[index_update] - i = ij_list[0] - ij_list.pop(0) - - pr_list = [] + ij_list = ordered.pop(index_update) + i = ij_list.pop(0) + + ante_list = [] post_list = [] - - batch_counter = 0 - tot_counter = 1 - - tot_iteration = len(ij_list) - full_bs_ite= tot_iteration//inference_bs - last_bs = tot_iteration % inference_bs - - jbatch_indexer =[] + tot_counter = 0 + batch = [] for j in ij_list: - img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) - img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) - - img2[:,:,0][img3[:,:]==5] = 2 - img2[:,:,0][img_header_and_sep[:,:]==1] = 3 - - img1[:,:,0][img3[:,:]==5] = 2 - img1[:,:,0][img_header_and_sep[:,:]==1] = 3 - - jbatch_indexer.append(j) - - input_1[batch_counter,:,:,0] = img1[:,:,0]/3. - input_1[batch_counter,:,:,2] = img2[:,:,0]/3. - input_1[batch_counter,:,:,1] = img3[:,:]/5. - - batch_counter = batch_counter+1 - - if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + img1 = labels_con[:,:,i].astype(float) + img2 = labels_con[:,:,j].astype(float) + img1[img_poly==5] = 2 + img2[img_poly==5] = 2 + img1[img_header_and_sep==1] = 3 + img2[img_header_and_sep==1] = 3 + + input_1[len(batch), :, :, 0] = img1 / 3. + input_1[len(batch), :, :, 2] = img2 / 3. + input_1[len(batch), :, :, 1] = img_poly / 5. + + tot_counter += 1 + batch.append(j) + if tot_counter % inference_bs == 0 or tot_counter == len(ij_list): y_pr = self.model_reading_order.predict(input_1 , verbose=0) - - if batch_counter==inference_bs: - iteration_batches = inference_bs - else: - iteration_batches = last_bs - for jb in range(iteration_batches): + for jb, j in enumerate(batch): if y_pr[jb][0]>=0.5: - post_list.append(jbatch_indexer[jb]) + post_list.append(j) else: - pr_list.append(jbatch_indexer[jb]) - - batch_counter = 0 - jbatch_indexer = [] - - tot_counter = tot_counter+1 - - starting_list_of_regions, index_update = self.update_list_and_return_first_with_length_bigger_than_one(index_update, i, pr_list, post_list,starting_list_of_regions) + ante_list.append(j) + batch = [] + + if len(ante_list): + ordered.insert(index_update, ante_list) + index_update += 1 + ordered.insert(index_update, [i]) + if len(post_list): + ordered.insert(index_update + 1, post_list) + + index_update = -1 + for index_next, ij_list in enumerate(ordered): + if len(ij_list) > 1: + index_update = index_next + break + + ordered = [i[0] for i in ordered] + region_ids = ['region_%04d' % i for i in range(len(co_text_all))] + return ordered, region_ids - index_sort = [i[0] for i in starting_list_of_regions ] - - REGION_ID_TEMPLATE = 'region_%04d' - order_of_texts = [] - id_of_texts = [] - for order, id_text in enumerate(index_sort): - order_of_texts.append(id_text) - id_of_texts.append( REGION_ID_TEMPLATE % order ) - - - return order_of_texts, id_of_texts def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] @@ -4980,7 +4800,7 @@ class Eynollah: if self.full_layout: if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) @@ -5007,7 +4827,7 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) From cfc65128b1d0bbf8cc4b0e66c2ba17b4b0729f90 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 22 Dec 2024 14:56:32 +0000 Subject: [PATCH 33/36] reduce redundancy/indentation --- src/eynollah/eynollah.py | 816 +++++++++++++++++++-------------------- 1 file changed, 406 insertions(+), 410 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 651bd17..c0603fc 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4397,10 +4397,9 @@ class Eynollah: self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) continue + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - text_regions_p_1 ,erosion_hurts, polygons_lines_xml,polygons_of_images,image_page, page_coord, cont_page = self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [], ocr_all_textlines) @@ -4413,9 +4412,8 @@ class Eynollah: continue else: return pcgts + if self.skip_layout_and_reading_order: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) @@ -4454,463 +4452,461 @@ class Eynollah: continue else: return pcgts - if not self.extract_only_images and not self.skip_layout_and_reading_order: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) - t1 = time.time() - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) - - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) - else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - #plt.imshow(table_prediction) - #plt.show() - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + #print("text region early -1 in %.1fs", time.time() - t0) + t1 = time.time() + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) - t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - elif num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: - img_w_new = 2000 + img_w_new = 1000 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 2400 + img_w_new = 1300 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ - self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + #plt.imshow(table_prediction) + #plt.show() + + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ - self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + return pcgts + + #print("text region early in %.1fs", time.time() - t0) + t1 = time.time() + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + elif num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + else: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + + + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - if not len(contours_only_text_parent): - # stop early - empty_marginals = [[]] * len(polygons_of_marginals) - if self.full_layout: - pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) - else: - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) - self.logger.info("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + if not len(contours_only_text_parent): + # stop early + empty_marginals = [[]] * len(polygons_of_marginals) + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: - if self.light_version: - if self.textline_light: - #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + #print("text region early 3 in %.1fs", time.time() - t0) + if self.light_version: + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: - scale_param = 1 - textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - fun = check_any_text_region_in_model_one_is_main_or_header_light - else: - fun = check_any_text_region_in_model_one_is_main_or_header - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ - all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ - fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, - all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + else: + scale_param = 1 + textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + fun = check_any_text_region_in_model_one_is_main_or_header_light + else: + fun = check_any_text_region_in_model_one_is_main_or_header + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ + all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ + fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 + + if not self.reading_order_machine_based: + if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() - - if self.full_layout: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if self.ocr: - ocr_all_textlines = [] + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() + + if self.full_layout: + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - return pcgts + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + if self.ocr: + ocr_all_textlines = [] else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + + else: + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - - if self.ocr: - - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + if self.ocr: - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - ocr_textline_in_textregion.append(text_ocr) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - #print("Job done in %.1fs" % (time.time() - t0)) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + ocr_textline_in_textregion.append(text_ocr) - if self.dir_in: - self.writer.write_pagexml(pcgts) - self.logger.info("Job done in %.1fs", time.time() - t0) + + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) #print("Job done in %.1fs" % (time.time() - t0)) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) + + if self.dir_in: + self.writer.write_pagexml(pcgts) + self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From 335aa273a1c71587c42a55858730cebb5b82c55e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 23 Dec 2024 03:13:21 +0000 Subject: [PATCH 34/36] simplify, wrap extremely long lines --- src/eynollah/eynollah.py | 1971 +++++++++++++------------- src/eynollah/utils/__init__.py | 1357 ++++++++---------- src/eynollah/utils/contour.py | 156 +- src/eynollah/utils/separate_lines.py | 306 ++-- 4 files changed, 1799 insertions(+), 1991 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index c0603fc..25d5ec4 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -6,6 +6,7 @@ document layout analysis (segmentation) with output in PAGE-XML """ +import tracemalloc import math import os import sys @@ -266,20 +267,45 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + #"/modelens_full_lay_1_3_031124" + #"/modelens_full_lay_13__3_19_241024" + #"/model_full_lay_13_241024" + #"/modelens_full_lay_13_17_231024" + #"/modelens_full_lay_1_2_221024" + #"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + #"/modelens_12sp_elay_0_3_4__3_6_n" + #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" + #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" + #"/modelens_1_2_4_5_early_lay_1_2_spaltige" + #"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + #"/modelens_full_lay_1_3_031124" + #"/modelens_full_lay_13__3_19_241024" + #"/model_full_lay_13_241024" + #"/modelens_full_lay_13_17_231024" + #"/modelens_full_lay_1_2_221024" + #"/modelens_full_layout_24_till_28" + #"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124" if self.textline_light: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + #"/modelens_textline_1_4_16092024" + #"/model_textline_ens_3_4_5_6_artificial" + #"/modelens_textline_1_3_4_20240915" + #"/model_textline_ens_3_4_5_6_artificial" + #"/modelens_textline_9_12_13_14_15" + #"/eynollah-textline_light_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" else: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + #"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" if self.ocr: self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" @@ -320,13 +346,13 @@ class Eynollah: if self.ocr: self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") if self.tables: self.model_table = self.our_load_model(self.model_table_dir) self.ls_imgs = os.listdir(self.dir_in) - - + def _cache_images(self, image_filename=None, image_pil=None): ret = {} t_c0 = time.time() @@ -346,6 +372,7 @@ class Eynollah: for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret + def reset_file_name_dir(self, image_filename): t_c = time.time() self._imgs = self._cache_images(image_filename=image_filename) @@ -378,31 +405,27 @@ class Eynollah: def isNaN(self, num): return num != num - def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") if not self.dir_in: self.model_enhancement, _ = self.start_new_session_and_model(self.model_dir_of_enhancement) - img_height_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[1] - img_width_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[2] + img_height_model = self.model_enhancement.layers[-1].output_shape[1] + img_width_model = self.model_enhancement.layers[-1].output_shape[2] if img.shape[0] < img_height_model: img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) - if img.shape[1] < img_width_model: img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST) margin = int(0 * img_width_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin - img = img / float(255.0) - + img = img / 255. img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) @@ -430,37 +453,53 @@ class Eynollah: img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) - - seg = label_p_pred[0, :, :, :] - seg = seg * 255 + seg = label_p_pred[0, :, :, :] * 255 if i == 0 and j == 0: - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg + prediction_true[index_y_d + 0:index_y_u - margin, + index_x_d + 0:index_x_u - margin] = \ + seg[0:-margin or None, + 0:-margin or None] elif i == nxf - 1 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg + prediction_true[index_y_d + margin:index_y_u - 0, + index_x_d + margin:index_x_u - 0] = \ + seg[margin:, + margin:] elif i == 0 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - 0, + index_x_d + 0:index_x_u - margin] = \ + seg[margin:, + 0:-margin or None] elif i == nxf - 1 and j == 0: - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg + prediction_true[index_y_d + 0:index_y_u - margin, + index_x_d + margin:index_x_u - 0] = \ + seg[0:-margin or None, + margin:] elif i == 0 and j != 0 and j != nyf - 1: - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - margin, + index_x_d + 0:index_x_u - margin] = \ + seg[margin:-margin or None, + 0:-margin or None] elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg + prediction_true[index_y_d + margin:index_y_u - margin, + index_x_d + margin:index_x_u - 0] = \ + seg[margin:-margin or None, + margin:] elif i != 0 and i != nxf - 1 and j == 0: - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg + prediction_true[index_y_d + 0:index_y_u - margin, + index_x_d + margin:index_x_u - margin] = \ + seg[0:-margin or None, + margin:-margin or None] elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - 0, + index_x_d + margin:index_x_u - margin] = \ + seg[margin:, + margin:-margin or None] else: - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - margin, + index_x_d + margin:index_x_u - margin] = \ + seg[margin:-margin or None, + margin:-margin or None] prediction_true = prediction_true.astype(int) return prediction_true @@ -469,55 +508,39 @@ class Eynollah: self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1 and width_early < 1100: img_w_new = 2000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 2500: img_w_new = 2000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 1100 and width_early < 2500: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 2 and width_early < 2000: img_w_new = 2400 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 3500: img_w_new = 2400 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 2000 and width_early < 3500: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 3 and width_early < 2000: img_w_new = 3000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 4000: img_w_new = 3000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 2000 and width_early < 4000: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 4 and width_early < 2500: img_w_new = 4000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 5000: img_w_new = 4000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 2500 and width_early < 5000: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 5 and width_early < 3700: img_w_new = 5000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 7000: img_w_new = 5000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 3700 and width_early < 7000: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 6 and width_early < 4500: img_w_new = 6500 # 5400 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 6500) else: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) + img_h_new = img_w_new * img.shape[0] // img.shape[1] if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) @@ -536,10 +559,9 @@ class Eynollah: self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: img_w_new = 1000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 1000) else: img_w_new = 1300 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) + img_h_new = img_w_new * img.shape[0] // img.shape[1] if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) @@ -568,7 +590,7 @@ class Eynollah: img_w_new = 2200 elif num_col == 6: img_w_new = 2500 - img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) + img_h_new = img_w_new * img.shape[0] // img.shape[1] img_new = resize_image(img, img_h_new, img_w_new) num_column_is_classified = True @@ -601,7 +623,6 @@ class Eynollah: # plt.imshow(img_1ch) # plt.show() img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) @@ -610,11 +631,9 @@ class Eynollah: img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = self.model_classifier.predict(img_in, verbose=0) - num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) - img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if img_new.shape[1] > img.shape[1]: @@ -623,7 +642,7 @@ class Eynollah: return img, img_new, is_image_enhanced - def resize_and_enhance_image_with_column_classifier(self,light_version): + def resize_and_enhance_image_with_column_classifier(self, light_version): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") dpi = self.dpi self.logger.info("Detected %s DPI", dpi) @@ -633,16 +652,10 @@ class Eynollah: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) - - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - prediction_bin = prediction_bin.astype(np.uint8) + prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) + img_bin = prediction_bin else: img = self.imread() img_bin = None @@ -663,8 +676,7 @@ class Eynollah: elif self.num_col_lower and not self.num_col_upper: num_col = self.num_col_lower label_p_pred = [np.ones(6)] - - elif (not self.num_col_upper and not self.num_col_lower): + elif not self.num_col_upper and not self.num_col_lower: if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 @@ -682,7 +694,6 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): @@ -703,7 +714,6 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 @@ -713,20 +723,19 @@ class Eynollah: if num_col < self.num_col_lower: num_col = self.num_col_lower label_p_pred = [np.ones(6)] - else: num_col = self.num_col_upper label_p_pred = [np.ones(6)] - self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) - if not self.extract_only_images: if dpi < DPI_THRESHOLD: if light_version and num_col in (1,2): - img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( + img, num_col, width_early, label_p_pred) else: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + img_new, num_column_is_classified = self.calculate_width_height_by_columns( + img, num_col, width_early, label_p_pred) if light_version: image_res = np.copy(img_new) else: @@ -734,7 +743,8 @@ class Eynollah: is_image_enhanced = True else: if light_version and num_col in (1,2): - img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( + img, num_col, width_early, label_p_pred) image_res = np.copy(img_new) is_image_enhanced = True else: @@ -809,7 +819,6 @@ class Eynollah: return model, session - def start_new_session_and_model(self, model_dir): self.logger.debug("enter start_new_session_and_model (model_dir=%s)", model_dir) #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) @@ -830,17 +839,20 @@ class Eynollah: else: try: model = load_model(model_dir, compile=False) - self.models[model_dir] = model except: - model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) - self.models[model_dir] = model - + model = load_model(model_dir , compile=False, custom_objects={ + "PatchEncoder": PatchEncoder, "Patches": Patches}) + self.models[model_dir] = model return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction") + def do_prediction( + self, patches, img, model, + n_batch_inference=1, marginal_of_patch_percent=0.1, + thresholding_for_some_classes_in_light_version=False, + thresholding_for_artificial_class_in_light_version=False): + self.logger.debug("enter do_prediction") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] @@ -851,7 +863,6 @@ class Eynollah: img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img[np.newaxis], verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: @@ -862,13 +873,11 @@ class Eynollah: seg[seg_art==1]=2 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = resize_image(seg_color, img_h_page, img_w_page) - prediction_true = prediction_true.astype(np.uint8) + prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) - if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) @@ -876,7 +885,7 @@ class Eynollah: margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin - img = img / float(255.0) + img = img / 255. #img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] @@ -895,7 +904,6 @@ class Eynollah: list_y_d = [] batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): @@ -925,17 +933,14 @@ class Eynollah: list_y_d.append(index_y_d) list_y_u.append(index_y_u) - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 + batch_indexer += 1 if (batch_indexer == n_batch_inference or # last batch i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) - label_p_pred = model.predict(img_patch,verbose=0) - + label_p_pred = model.predict(img_patch, verbose=0) seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: @@ -964,8 +969,7 @@ class Eynollah: indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + seg_in = seg[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] @@ -974,34 +978,60 @@ class Eynollah: index_x_d_in = list_x_d[indexer_inside_batch] if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:, + margin:, + np.newaxis] elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[0:-margin or None, + margin:, + np.newaxis] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:-margin or None, + margin:, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + margin:-margin or None, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:, + margin:-margin or None, + np.newaxis] else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + margin:-margin or None, + np.newaxis] + indexer_inside_batch += 1 list_i_s = [] @@ -1012,15 +1042,14 @@ class Eynollah: list_y_d = [] batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + img_patch[:] = 0 prediction_true = prediction_true.astype(np.uint8) #del model gc.collect() return prediction_true - def do_padding_with_scale(self,img, scale): + def do_padding_with_scale(self, img, scale): h_n = int(img.shape[0]*scale) w_n = int(img.shape[1]*scale) @@ -1031,8 +1060,8 @@ class Eynollah: h_diff = img.shape[0] - h_n w_diff = img.shape[1] - w_n - h_start = int(h_diff / 2.) - w_start = int(w_diff / 2.) + h_start = int(0.5 * h_diff) + w_start = int(0.5 * w_diff) img_res = resize_image(img, h_n, w_n) #label_res = resize_image(label, h_n, w_n) @@ -1049,9 +1078,14 @@ class Eynollah: #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] return img_scaled_padded#, label_scaled_padded - def do_prediction_new_concept_scatter_nd(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction_new_concept") + def do_prediction_new_concept_scatter_nd( + self, patches, img, model, + n_batch_inference=1, marginal_of_patch_percent=0.1, + thresholding_for_some_classes_in_light_version=False, + thresholding_for_artificial_class_in_light_version=False): + + self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] @@ -1074,16 +1108,13 @@ class Eynollah: seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = resize_image(seg_color, img_h_page, img_w_page) - prediction_true = prediction_true.astype(np.uint8) + prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) - if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) @@ -1091,8 +1122,7 @@ class Eynollah: ##margin = int(marginal_of_patch_percent * img_height_model) #width_mid = img_width_model - 2 * margin #height_mid = img_height_model - 2 * margin - img = img / float(255.0) - + img = img / 255.0 img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] @@ -1101,61 +1131,61 @@ class Eynollah: stride_y = img_height_model - 100 one_tensor = tf.ones_like(img) - img_patches = tf.image.extract_patches(images=[img,one_tensor], - sizes=[1, img_height_model, img_width_model, 1], - strides=[1, stride_y, stride_x, 1], - rates=[1, 1, 1, 1], - padding='SAME') - - one_patches = img_patches[1] - img_patches = img_patches[0] + img_patches, one_patches = tf.image.extract_patches( + images=[img, one_tensor], + sizes=[1, img_height_model, img_width_model, 1], + strides=[1, stride_y, stride_x, 1], + rates=[1, 1, 1, 1], + padding='SAME') img_patches = tf.squeeze(img_patches) - - img_patches_resh = tf.reshape(img_patches, shape = (img_patches.shape[0]*img_patches.shape[1], img_height_model, img_width_model, 3)) - - pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) - one_patches = tf.squeeze(one_patches) - one_patches = tf.reshape(one_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model,img_width_model,3]) - + img_patches_resh = tf.reshape(img_patches, shape=(img_patches.shape[0] * img_patches.shape[1], + img_height_model, img_width_model, 3)) + pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) + one_patches = tf.reshape(one_patches, shape=(img_patches.shape[0] * img_patches.shape[1], + img_height_model, img_width_model, 3)) x = tf.range(img.shape[1]) y = tf.range(img.shape[0]) x, y = tf.meshgrid(x, y) indices = tf.stack([y, x], axis=-1) - indices_patches = tf.image.extract_patches(images=tf.expand_dims(indices, axis=0), sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') + indices_patches = tf.image.extract_patches( + images=tf.expand_dims(indices, axis=0), + sizes=[1, img_height_model, img_width_model, 1], + strides=[1, stride_y, stride_x, 1], + rates=[1, 1, 1, 1], + padding='SAME') indices_patches = tf.squeeze(indices_patches) - indices_patches = tf.reshape(indices_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model, img_width_model,2]) - - margin_y = int( (img_height_model - stride_y)/2. ) - margin_x = int( (img_width_model - stride_x)/2. ) + indices_patches = tf.reshape(indices_patches, shape=(img_patches.shape[0] * img_patches.shape[1], + img_height_model, img_width_model, 2)) + margin_y = int( 0.5 * (img_height_model - stride_y) ) + margin_x = int( 0.5 * (img_width_model - stride_x) ) mask_margin = np.zeros((img_height_model, img_width_model)) - - mask_margin[margin_y:img_height_model-margin_y, margin_x:img_width_model-margin_x] = 1 + mask_margin[margin_y:img_height_model - margin_y, + margin_x:img_width_model - margin_x] = 1 indices_patches_array = indices_patches.numpy() - for i in range(indices_patches_array.shape[0]): indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin - reconstructed = tf.scatter_nd(indices=indices_patches_array, updates=pred_patches, shape=(img.shape[0],img.shape[1],pred_patches.shape[-1])) - reconstructed_argmax = reconstructed.numpy() - - prediction_true = np.argmax(reconstructed_argmax, axis=2) - prediction_true = prediction_true.astype(np.uint8) + reconstructed = tf.scatter_nd( + indices=indices_patches_array, + updates=pred_patches, + shape=(img.shape[0], img.shape[1], pred_patches.shape[-1])).numpy() + prediction_true = np.argmax(reconstructed, axis=2).astype(np.uint8) gc.collect() return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2) - - - - - - def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction_new_concept") + def do_prediction_new_concept( + self, patches, img, model, + n_batch_inference=1, marginal_of_patch_percent=0.1, + thresholding_for_some_classes_in_light_version=False, + thresholding_for_artificial_class_in_light_version=False): + + self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] @@ -1178,16 +1208,13 @@ class Eynollah: seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = resize_image(seg_color, img_h_page, img_w_page) - prediction_true = prediction_true.astype(np.uint8) + prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) - if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) @@ -1195,7 +1222,7 @@ class Eynollah: margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin - img = img / float(255.0) + img = img / 255.0 img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] @@ -1215,7 +1242,6 @@ class Eynollah: batch_indexer = 0 img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - for i in range(nxf): for j in range(nyf): if i == 0: @@ -1237,7 +1263,6 @@ class Eynollah: index_y_u = img_h index_y_d = img_h - img_height_model - list_i_s.append(i) list_j_s.append(j) list_x_u.append(index_x_u) @@ -1245,17 +1270,14 @@ class Eynollah: list_y_d.append(index_y_d) list_y_u.append(index_y_u) - - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 + img_patch[batch_indexer] = img[index_y_d:index_y_u, index_x_d:index_x_u] + batch_indexer += 1 if (batch_indexer == n_batch_inference or # last batch i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch,verbose=0) - seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: @@ -1279,8 +1301,7 @@ class Eynollah: indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + seg_in = seg[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] @@ -1289,35 +1310,60 @@ class Eynollah: index_x_d_in = list_x_d[indexer_inside_batch] if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:, + margin:, + np.newaxis] elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[0:-margin or None, + margin:, + np.newaxis] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:-margin or None, + margin:, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + margin:-margin or None, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:, + margin:-margin or None, + np.newaxis] else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + margin:-margin or None, + np.newaxis] + indexer_inside_batch += 1 list_i_s = [] list_j_s = [] @@ -1327,8 +1373,7 @@ class Eynollah: list_y_d = [] batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + img_patch[:] = 0 prediction_true = prediction_true.astype(np.uint8) gc.collect() @@ -1338,11 +1383,10 @@ class Eynollah: self.logger.debug("enter extract_page") cont_page = [] if not self.ignore_page_extraction: - img = cv2.GaussianBlur(self.image, (5, 5), 0) - if not self.dir_in: self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) - + + img = cv2.GaussianBlur(self.image, (5, 5), 0) img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -1350,7 +1394,8 @@ class Eynollah: contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(contours[j]) + for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) if x <= 30: @@ -1363,32 +1408,34 @@ class Eynollah: y = 0 if (self.image.shape[0] - (y + h)) <= 30: h = h + (self.image.shape[0] - (y + h)) - box = [x, y, w, h] else: box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - + cropped_page, page_coord = crop_image_inside_box(box, self.image) + cont_page.append(np.array([[page_coord[2], page_coord[0]], + [page_coord[3], page_coord[0]], + [page_coord[3], page_coord[1]], + [page_coord[2], page_coord[1]]])) self.logger.debug("exit extract_page") else: box = [0, 0, self.image.shape[1], self.image.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - return croped_page, page_coord, cont_page + cropped_page, page_coord = crop_image_inside_box(box, self.image) + cont_page.append(np.array([[page_coord[2], page_coord[0]], + [page_coord[3], page_coord[0]], + [page_coord[3], page_coord[1]], + [page_coord[2], page_coord[1]]])) + return cropped_page, page_coord, cont_page def early_page_for_num_of_column_classification(self,img_bin): if not self.ignore_page_extraction: self.logger.debug("enter early_page_for_num_of_column_classification") if self.input_binary: - img =np.copy(img_bin) - img = img.astype(np.uint8) + img = np.copy(img_bin).astype(np.uint8) else: img = self.imread() if not self.dir_in: self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) - img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) @@ -1396,20 +1443,20 @@ class Eynollah: thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(contours[j]) + for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] - x, y, w, h = cv2.boundingRect(cnt) - box = [x, y, w, h] + box = cv2.boundingRect(cnt) else: box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, img) + cropped_page, page_coord = crop_image_inside_box(box, img) self.logger.debug("exit early_page_for_num_of_column_classification") else: img = self.imread() box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, img) - return croped_page, page_coord + cropped_page, page_coord = crop_image_inside_box(box, img) + return cropped_page, page_coord def extract_text_regions_new(self, img, patches, cols): self.logger.debug("enter extract_text_regions") @@ -1420,84 +1467,33 @@ class Eynollah: self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) else: self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) - model_region = self.model_region_fl if patches else self.model_region_fl_np - if not patches: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - #img = img.astype(np.uint8) - prediction_regions2 = None - else: + if self.light_version: + pass + elif not patches: + img = otsu_copy_binary(img).astype(np.uint8) + prediction_regions = None + elif cols: + img = otsu_copy_binary(img).astype(np.uint8) if cols == 1: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - - img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) - img = img.astype(np.uint8) - - if cols == 2: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) - img = img.astype(np.uint8) - - if cols == 3: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) - img = img.astype(np.uint8) - - if cols == 4: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) - img = img.astype(np.uint8) - - if cols == 5: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) - img = img.astype(np.uint8) - - if cols >= 6: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) - img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000).astype(np.uint8) + elif cols == 2: + img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300).astype(np.uint8) + elif cols == 3: + img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600).astype(np.uint8) + elif cols == 4: + img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900).astype(np.uint8) + elif cols == 5: + img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200).astype(np.uint8) + else: + img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) - marginal_of_patch_percent = 0.1 - - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) - - - ##prediction_regions = self.do_prediction(False, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) - + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions - - + def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] @@ -1507,92 +1503,51 @@ class Eynollah: self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) else: self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) - model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: img = otsu_copy_binary(img) img = img.astype(np.uint8) prediction_regions2 = None - else: + elif cols: if cols == 1: - img2 = otsu_copy_binary(img) - img2 = img2.astype(np.uint8) - img2 = resize_image(img2, int(img_height_h * 0.7), int(img_width_h * 0.7)) - marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) - prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) - - if cols == 2: - img2 = otsu_copy_binary(img) - img2 = img2.astype(np.uint8) - img2 = resize_image(img2, int(img_height_h * 0.4), int(img_width_h * 0.4)) - marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) - prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) - - elif cols > 2: - img2 = otsu_copy_binary(img) - img2 = img2.astype(np.uint8) - img2 = resize_image(img2, int(img_height_h * 0.3), int(img_width_h * 0.3)) - marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) - prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) - - if cols == 2: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - if img_width_h >= 2000: - img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) - img = img.astype(np.uint8) - + img_height_new = int(img_height_h * 0.7) + img_width_new = int(img_width_h * 0.7) + elif cols == 2: + img_height_new = int(img_height_h * 0.4) + img_width_new = int(img_width_h * 0.4) + else: + img_height_new = int(img_height_h * 0.3) + img_width_new = int(img_width_h * 0.3) + img2 = otsu_copy_binary(img) + img2 = img2.astype(np.uint8) + img2 = resize_image(img2, img_height_new, img_width_new) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=0.1) + prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) + + img = otsu_copy_binary(img).astype(np.uint8) if cols == 1: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 0.5), int(img_width_h * 0.5)) - img = img.astype(np.uint8) - - if cols == 3: - if (self.scale_x == 1 and img_width_h > 3000) or (self.scale_x != 1 and img_width_h > 2800): - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 2800 / float(img_width_h)), 2800) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - - if cols == 4: - if (self.scale_x == 1 and img_width_h > 4000) or (self.scale_x != 1 and img_width_h > 3700): - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 3700 / float(img_width_h)), 3700) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) - - if cols == 5: - if self.scale_x == 1 and img_width_h > 5000: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9) ) - - if cols >= 6: - if img_width_h > 5600: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 5600 / float(img_width_h)), 5600) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) + img = resize_image(img, int(img_height_h * 0.5), int(img_width_h * 0.5)).astype(np.uint8) + elif cols == 2 and img_width_h >= 2000: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) + elif cols == 3 and ((self.scale_x == 1 and img_width_h > 3000) or + (self.scale_x != 1 and img_width_h > 2800)): + img = resize_image(img, 2800 * img_height_h // img_width_h, 2800).astype(np.uint8) + elif cols == 4 and ((self.scale_x == 1 and img_width_h > 4000) or + (self.scale_x != 1 and img_width_h > 3700)): + img = resize_image(img, 3700 * img_height_h // img_width_h, 3700).astype(np.uint8) + elif cols == 4: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) + elif cols == 5 and self.scale_x == 1 and img_width_h > 5000: + img = resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)).astype(np.uint8) + elif cols == 5: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) + elif img_width_h > 5600: + img = resize_image(img, 5600 * img_height_h // img_width_h, 5600).astype(np.uint8) + else: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) - marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 @@ -1600,9 +1555,8 @@ class Eynollah: def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) - - - M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] + M_main_tot = [cv2.moments(polygons_of_textlines[j]) + for j in range(len(polygons_of_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] @@ -1612,18 +1566,16 @@ class Eynollah: all_box_coord =[] for index, con_region_ind in enumerate(contours_par): - results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) for ind in args_textlines ] + results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) + for ind in args_textlines ] results = np.array(results) - indexes_in = args_textlines[results==1] - textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] all_found_textline_polygons.append(textlines_ins) slopes.append(slope_deskew) _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) - all_box_coord.append(crop_coor) return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes @@ -1690,32 +1642,29 @@ class Eynollah: img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction(use_patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, - thresholding_for_artificial_class_in_light_version=self.textline_light) + prediction_textline = self.do_prediction( + use_patches, img, self.model_textline, + marginal_of_patch_percent=0.15, n_batch_inference=3, + thresholding_for_artificial_class_in_light_version=self.textline_light) #if not self.textline_light: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) - textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 old_art = np.copy(textline_mask_tot_ea_art) - if not self.textline_light: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) - prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') - if not self.textline_light: textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 - if not self.textline_light: prediction_textline[:,:][old_art[:,:]==1]=2 @@ -1723,7 +1672,8 @@ class Eynollah: prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) self.logger.debug('exit textline_contours') - return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') + return ((prediction_textline[:, :, 0]==1).astype(np.uint8), + (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8)) def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): @@ -1752,7 +1702,8 @@ class Eynollah: slope_corresponding_textregion = slope_biggest slopes_sub.append(slope_corresponding_textregion) - cnt_clean_rot = textline_contours_postprocessing(crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) + cnt_clean_rot = textline_contours_postprocessing( + crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) poly_sub.append(cnt_clean_rot) boxes_sub_new.append(boxes_per_process[mv]) @@ -1782,55 +1733,41 @@ class Eynollah: elif num_col_classifier == 6: img_w_new = 2500 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) - img_resized = resize_image(img,img_h_new, img_w_new ) - - if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - image_page, page_coord, cont_page = self.extract_page() - prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - - prediction_regions_org=prediction_regions_org[:,:,0] mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - - + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1)) text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0 text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0 ##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001) polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001) - image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) ###image_boundary_of_doc[:6, :] = 1 @@ -1839,14 +1776,13 @@ class Eynollah: ###image_boundary_of_doc[:, :6] = 1 ###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1 - polygons_of_images_fin = [] for ploy_img_ind in polygons_of_images: """ test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) - test_poly_image = cv2.fillPoly(test_poly_image, pts = [ploy_img_ind], color=(1,1,1)) + test_poly_image = cv2.fillPoly(test_poly_image, pts=[ploy_img_ind], color=(1,1,1)) - test_poly_image = test_poly_image[:,:] + image_boundary_of_doc[:,:] + test_poly_image = test_poly_image + image_boundary_of_doc test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1 test_poly_image_intersected_area = test_poly_image_intersected_area.sum() @@ -1854,22 +1790,30 @@ class Eynollah: if test_poly_image_intersected_area==0: ##polygons_of_images_fin.append(ploy_img_ind) - x, y, w, h = cv2.boundingRect(ploy_img_ind) - box = [x, y, w, h] + box = cv2.boundingRect(ploy_img_ind) _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) - #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - - polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + # cont_page.append(np.array([[page_coord[2], page_coord[0]], + # [page_coord[3], page_coord[0]], + # [page_coord[3], page_coord[1]], + # [page_coord[2], page_coord[1]]])) + polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], + [page_coord_img[3], page_coord_img[0]], + [page_coord_img[3], page_coord_img[1]], + [page_coord_img[2], page_coord_img[1]]]) ) """ - x, y, w, h = cv2.boundingRect(ploy_img_ind) + box = x, y, w, h = cv2.boundingRect(ploy_img_ind) if h < 150 or w < 150: pass else: - box = [x, y, w, h] _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) - #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - - polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + # cont_page.append(np.array([[page_coord[2], page_coord[0]], + # [page_coord[3], page_coord[0]], + # [page_coord[3], page_coord[1]], + # [page_coord[2], page_coord[1]]])) + polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], + [page_coord_img[3], page_coord_img[0]], + [page_coord_img[3], page_coord_img[1]], + [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page @@ -1883,34 +1827,24 @@ class Eynollah: img_width_h = img_org.shape[1] #model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) - #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: img_w_new = 1000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - elif num_col_classifier == 2: img_w_new = 1500#1500 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - elif num_col_classifier == 3: img_w_new = 2000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - elif num_col_classifier == 4: img_w_new = 2500 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: img_w_new = 3000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: img_w_new = 4000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + img_h_new = img_w_new * img_org.shape[0] // img_org.shape[1] img_resized = resize_image(img,img_h_new, img_w_new ) t_bin = time.time() - #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) @@ -1935,12 +1869,8 @@ class Eynollah: if not self.dir_in: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + prediction_bin = 255 * (prediction_bin[:,:,0] == 0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = prediction_bin.astype(np.uint16) #img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) @@ -1951,11 +1881,8 @@ class Eynollah: ###textline_mask_tot_ea = self.run_textline(img_bin) self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) - - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - #print(self.image_org.shape) #cv2.imwrite('out_13.png', self.image_page_org_size) @@ -1979,7 +1906,9 @@ class Eynollah: prediction_regions_page = self.do_prediction_new_concept( False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + ys = slice(*self.page_coord[0:2]) + xs = slice(*self.page_coord[2:4]) + prediction_regions_org[ys, xs] = prediction_regions_page else: new_h = (900+ (num_col_classifier-3)*100) img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) @@ -1989,26 +1918,16 @@ class Eynollah: True, img_resized, self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - #print("inside 3 ", time.time()-t_in) - #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - img_bin = resize_image(img_bin,img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - mask_texts_only = mask_texts_only.astype('uint8') ##if num_col_classifier == 1 or num_col_classifier == 2: @@ -2016,57 +1935,39 @@ class Eynollah: ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - + test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) + #plt.imshow(test_khat[:,:]) #plt.show() - #for jv in range(1): #print(jv, hir_lines_xml[0][232][3]) #test_khat = np.zeros(prediction_regions_org.shape) - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) - - #plt.imshow(test_khat[:,:]) #plt.show() - - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - #plt.imshow(test_khat[:,:]) #plt.show() #sys.exit() polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) - - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) #plt.imshow(textline_mask_tot_ea) @@ -2107,14 +2008,10 @@ class Eynollah: mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1 ##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1 - img_only_regions_with_sep = ( prediction_regions_org_y[:,:] == 1 )*1 - img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + img_only_regions_with_sep = (prediction_regions_org_y == 1).astype(np.uint8) try: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20) - _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) - img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) prediction_regions_org = self.do_prediction(True, img, self.model_region) @@ -2122,8 +2019,7 @@ class Eynollah: prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 - - + if not self.dir_in: self.model_region_p2, _ = self.start_new_session_and_model(self.model_region_dir_p2) @@ -2132,30 +2028,23 @@ class Eynollah: prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) - mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) mask_lines2 = (prediction_regions_org2[:,:,0] == 3) text_sume_early = (prediction_regions_org[:,:] == 1).sum() prediction_regions_org_copy = np.copy(prediction_regions_org) prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0 text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum() - - rate_two_models = text_sume_second / float(text_sume_early) * 100 + rate_two_models = 100. * text_sume_second / text_sume_early self.logger.info("ratio_of_two_models: %s", rate_two_models) if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): prediction_regions_org = np.copy(prediction_regions_org_copy) - - prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 mask_lines_only=(prediction_regions_org[:,:]==3)*1 prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) - - prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) - - + if rate_two_models<=40: if self.input_binary: prediction_bin = np.copy(img_org) @@ -2164,19 +2053,14 @@ class Eynollah: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1 ratio_x=1 - img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) prediction_regions_org = self.do_prediction(True, img, self.model_region) @@ -2188,10 +2072,9 @@ class Eynollah: mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 - - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -2205,7 +2088,6 @@ class Eynollah: self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml except: - if self.input_binary: prediction_bin = np.copy(img_org) @@ -2213,14 +2095,8 @@ class Eynollah: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - prediction_bin=prediction_bin[:,:,0] - - prediction_bin = (prediction_bin[:,:]==0)*1 - - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) @@ -2248,49 +2124,53 @@ class Eynollah: #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 + mask_lines_only = (prediction_regions_org == 3)*1 + mask_texts_only = (prediction_regions_org == 1)*1 + mask_images_only= (prediction_regions_org == 2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml - def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + def do_order_of_regions_full_layout( + self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + self.logger.debug("enter do_order_of_regions_full_layout") - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(contours_only_text_parent) - cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours(contours_only_text_parent_h) + boxes = np.array(boxes, dtype=int) # to be on the safe side + cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + contours_only_text_parent) + cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours( + contours_only_text_parent_h) try: arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: + if (x_min_text_only[ii] + 80 >= boxes[jj][0] and + x_min_text_only[ii] + 80 < boxes[jj][1] and + y_cor_x_min_main[ii] >= boxes[jj][2] and + y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) @@ -2298,12 +2178,17 @@ class Eynollah: for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only_h[ii] + 80) >= boxes[jj][0] and (x_min_text_only_h[ii] + 80) < boxes[jj][1] and y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]: + if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and + x_min_text_only_h[ii] + 80 < boxes[jj][1] and + y_cor_x_min_main_h[ii] >= boxes[jj][2] and + y_cor_x_min_main_h[ii] < boxes[jj][3]): arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) @@ -2315,7 +2200,8 @@ class Eynollah: order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): - + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] @@ -2327,9 +2213,12 @@ class Eynollah: for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] @@ -2338,11 +2227,13 @@ class Eynollah: for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2366,17 +2257,22 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if (cx_text_only[ii] >= boxes[jj][0] and + cx_text_only[ii] < boxes[jj][1] and + cy_text_only[ii] >= boxes[jj][2] and + cy_text_only[ii] < boxes[jj][3]): + # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) ############################# head @@ -2385,22 +2281,29 @@ class Eynollah: for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if (cx_text_only_h[ii] >= boxes[jj][0] and + cx_text_only_h[ii] < boxes[jj][1] and + cy_text_only_h[ii] >= boxes[jj][2] and + cy_text_only_h[ii] < boxes[jj][3]): + # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) - order_by_con_head = np.zeros(len(arg_text_con_h)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij, _ in enumerate(boxes): + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] @@ -2412,9 +2315,12 @@ class Eynollah: for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] @@ -2423,11 +2329,13 @@ class Eynollah: for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2448,21 +2356,30 @@ class Eynollah: self.logger.debug("exit do_order_of_regions_full_layout") return order_text_new, id_of_texts_tot - def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + def do_order_of_regions_no_full_layout( + self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + self.logger.debug("enter do_order_of_regions_no_full_layout") - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(contours_only_text_parent) + boxes = np.array(boxes, dtype=int) # to be on the safe side + cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + contours_only_text_parent) try: arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: + if (x_min_text_only[ii] + 80 >= boxes[jj][0] and + x_min_text_only[ii] + 80 < boxes[jj][1] and + y_cor_x_min_main[ii] >= boxes[jj][2] and + y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) @@ -2472,22 +2389,28 @@ class Eynollah: order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2508,39 +2431,49 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if (cx_text_only[ii] >= boxes[jj][0] and + cx_text_only[ii] < boxes[jj][1] and + cy_text_only[ii] >= boxes[jj][2] and + cy_text_only[ii] < boxes[jj][3]): + # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] - for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2558,10 +2491,13 @@ class Eynollah: self.logger.debug("exit do_order_of_regions_no_full_layout") return order_text_new, id_of_texts_tot - def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, num_col_classifier): + + def check_iou_of_bounding_box_and_contour_for_tables( + self, layout, table_prediction_early, pixel_table, num_col_classifier): + layout_org = np.copy(layout) - layout_org[:,:,0][layout_org[:,:,0]==pixel_tabel] = 0 - layout = (layout[:,:,0]==pixel_tabel)*1 + layout_org[:,:,0][layout_org[:,:,0]==pixel_table] = 0 + layout = (layout[:,:,0]==pixel_table)*1 layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2) layout = layout.astype(np.uint8) @@ -2569,18 +2505,17 @@ class Eynollah: _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(contours[j]) + for j in range(len(contours))]) contours_new = [] for i in range(len(contours)): x, y, w, h = cv2.boundingRect(contours[i]) iou = cnt_size[i] /float(w*h) *100 - if iou<80: layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1])) layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1)) - - + layout_contour_sum = layout_contour.sum(axis=0) layout_contour_sum_diff = np.diff(layout_contour_sum) layout_contour_sum_diff= np.abs(layout_contour_sum_diff) @@ -2607,65 +2542,77 @@ class Eynollah: contours_new.append(contours_sep[ji]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours_sep[ji]] ,color=(1,1,1)) - table_pixels_masked_from_early_pre = only_recent_contour_image[:,:]*table_prediction_early[:,:] - iou_in = table_pixels_masked_from_early_pre.sum() /float(only_recent_contour_image.sum()) *100 + only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1)) + table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early + iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') if iou_in>30: - layout_org= cv2.fillPoly(layout_org,pts=[contours_sep[ji]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: pass else: - - layout_org= cv2.fillPoly(layout_org,pts=[contours_sep[ji]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) - + layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: contours_new.append(contours[i]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1)) - table_pixels_masked_from_early_pre = only_recent_contour_image[:,:]*table_prediction_early[:,:] - iou_in = table_pixels_masked_from_early_pre.sum() /float(only_recent_contour_image.sum()) *100 + table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early + iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in') if iou_in>30: - layout_org= cv2.fillPoly(layout_org,pts=[contours[i]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) else: pass else: - layout_org= cv2.fillPoly(layout_org,pts=[contours[i]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) return layout_org, contours_new - def delete_separator_around(self,spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): + + def delete_separator_around(self, spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): # format of subboxes: box=[x1, x2 , y1, y2] pix_del = 100 if len(image_by_region.shape)==3: for i in range(len(spliter_y)-1): for j in range(1,len(peaks_neg[i])-1): - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0]==pixel_line ]=0 - image_by_region[spliter_y[i]:spliter_y[i+1],peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,1]==pixel_line ]=0 - image_by_region[spliter_y[i]:spliter_y[i+1],peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,2]==pixel_line ]=0 + ys = slice(int(spliter_y[i]), + int(spliter_y[i+1])) + xs = slice(peaks_neg[i][j] - pix_del, + peaks_neg[i][j] + pix_del) + image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_line] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_line] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_line] = 0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0]==pixel_table ]=0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,1]==pixel_table ]=0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,2]==pixel_table ]=0 + image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_table] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_table] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_table] = 0 else: for i in range(len(spliter_y)-1): for j in range(1,len(peaks_neg[i])-1): - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del]==pixel_line ]=0 - - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del]==pixel_table ]=0 + ys = slice(int(spliter_y[i]), + int(spliter_y[i+1])) + xs = slice(peaks_neg[i][j] - pix_del, + peaks_neg[i][j] + pix_del) + image_by_region[ys,xs][image_by_region[ys,xs]==pixel_line] = 0 + image_by_region[ys,xs][image_by_region[ys,xs]==pixel_table] = 0 return image_by_region - def add_tables_heuristic_to_layout(self, image_regions_eraly_p,boxes, slope_mean_hor, spliter_y,peaks_neg_tot, image_revised, num_col_classifier, min_area, pixel_line): + + def add_tables_heuristic_to_layout( + self, image_regions_eraly_p, boxes, + slope_mean_hor, spliter_y, peaks_neg_tot, image_revised, + num_col_classifier, min_area, pixel_line): + pixel_table =10 image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table) try: image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0 - image_revised_1[:,image_revised_1.shape[1]-30:][image_revised_1[:,image_revised_1.shape[1]-30:]==pixel_line] = 0 + image_revised_1[:,-30:][image_revised_1[:,-30:]==pixel_line] = 0 except: pass + boxes = np.array(boxes, dtype=int) # to be on the safe side img_comm_e = np.zeros(image_revised_1.shape) img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2) @@ -2690,7 +2637,9 @@ class Eynollah: if not self.isNaN(slope_mean_hor): image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3)) for i in range(len(boxes)): - image_box=img_comm[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:] + box_ys = slice(*boxes[i][2:4]) + box_xs = slice(*boxes[i][0:2]) + image_box = img_comm[box_ys, box_xs] try: image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1 contours_tab,_=return_contours_of_image(image_box_tabels_1) @@ -2753,17 +2702,17 @@ class Eynollah: for ii in range(len(y_up_tabs)): image_box[y_up_tabs[ii]:y_down_tabs[ii],:,0]=pixel_table - image_revised_last[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:]=image_box[:,:,:] + image_revised_last[box_ys, box_xs] = image_box else: for i in range(len(boxes)): - - image_box=img_comm[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:] - image_revised_last[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:]=image_box[:,:,:] + box_ys = slice(*boxes[i][2:4]) + box_xs = slice(*boxes[i][0:2]) + image_box = img_comm[box_ys, box_xs] + image_revised_last[box_ys, box_xs] = image_box if num_col_classifier==1: - img_tables_col_1=( image_revised_last[:,:,0]==pixel_table )*1 - img_tables_col_1=img_tables_col_1.astype(np.uint8) - contours_table_col1,_=return_contours_of_image(img_tables_col_1) + img_tables_col_1 = (image_revised_last[:,:,0] == pixel_table).astype(np.uint8) + contours_table_col1, _ = return_contours_of_image(img_tables_col_1) _,_ ,_ , _, y_min_tab_col1 ,y_max_tab_col1, _= find_new_features_of_contours(contours_table_col1) @@ -2779,17 +2728,13 @@ class Eynollah: def get_tables_from_model(self, img, num_col_classifier): img_org = np.copy(img) - img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - - if not self.dir_in: self.model_table, _ = self.start_new_session_and_model(self.model_table_dir) patches = False - if self.light_version: prediction_table = self.do_prediction_new_concept(patches, img, self.model_table) prediction_table = prediction_table.astype(np.int16) @@ -2804,52 +2749,52 @@ class Eynollah: prediction_table = prediction_table.astype(np.int16) elif num_col_classifier ==2: - height_ext = 0#int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/8. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_ext = 0 # img.shape[0] // 4 + h_start = height_ext // 2 + width_ext = img.shape[1] // 8 + w_start = width_ext // 2 + + img_new = np.zeros((img.shape[0] + height_ext, + img.shape[1] + width_ext, + img.shape[2])).astype(float) + ys = slice(h_start, h_start + img.shape[0]) + xs = slice(w_start, w_start + img.shape[1]) + img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table = prediction_ext[ys, xs] + prediction_table_updown = pre_updown[ys, xs] prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 prediction_table = prediction_table.astype(np.int16) - elif num_col_classifier ==1: - height_ext = 0# int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/4. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_ext = 0 # img.shape[0] // 4 + h_start = height_ext // 2 + width_ext = img.shape[1] // 4 + w_start = width_ext // 2 + + img_new =np.zeros((img.shape[0] + height_ext, + img.shape[1] + width_ext, + img.shape[2])).astype(float) + ys = slice(h_start, h_start + img.shape[0]) + xs = slice(w_start, w_start + img.shape[1]) + img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table = prediction_ext[ys, xs] + prediction_table_updown = pre_updown[ys, xs] prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 prediction_table = prediction_table.astype(np.int16) - else: prediction_table = np.zeros(img.shape) - img_w_half = int(img.shape[1]/2.) + img_w_half = img.shape[1] // 2 pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table) pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table) @@ -2877,7 +2822,10 @@ class Eynollah: prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) - def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): + def run_graphics_and_columns_light( + self, text_regions_p_1, textline_mask_tot_ea, + num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') t_in_gr = time.time() @@ -2894,14 +2842,13 @@ class Eynollah: if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: - table_prediction = (np.zeros((image_page.shape[0], image_page.shape[1]))).astype(np.int16) + table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) if self.plotter: self.plotter.save_page_image(image_page) text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 @@ -2931,10 +2878,10 @@ class Eynollah: self.logger.error(why) num_col = None #print("inside graphics 3 ", time.time() - t_in_gr) - return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light + return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, + text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light) def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): - #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') t_in_gr = time.time() @@ -2950,11 +2897,14 @@ class Eynollah: #print("inside graphics 1 ", time.time() - t_in_gr) textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page - def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): + + def run_graphics_and_columns( + self, text_regions_p_1, + num_col_classifier, num_column_is_classified, erosion_hurts): + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) @@ -2969,7 +2919,7 @@ class Eynollah: if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: - table_prediction = (np.zeros((image_page.shape[0], image_page.shape[1]))).astype(np.int16) + table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) if self.plotter: self.plotter.save_page_image(image_page) @@ -2987,7 +2937,6 @@ class Eynollah: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2996,12 +2945,14 @@ class Eynollah: except Exception as why: self.logger.error(why) num_col = None - return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction + return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, + text_regions_p_1, cont_page, table_prediction) - def run_enhancement(self,light_version): + def run_enhancement(self, light_version): t_in = time.time() self.logger.info("Resizing and enhancing image...") - is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) + is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = \ + self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') scale = 1 if is_image_enhanced: @@ -3046,7 +2997,10 @@ class Eynollah: self.logger.info("slope_deskew: %.2f°", slope_deskew) return slope_deskew, slope_first - def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): + def run_marginals( + self, image_page, textline_mask_tot_ea, mask_images, mask_lines, + num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): + image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 @@ -3060,7 +3014,9 @@ class Eynollah: if self.tables: regions_without_separators[table_prediction==1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) - text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) + text_regions_p = get_marginals( + rotate_image(regions_without_separators, slope_deskew), text_regions_p, + num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) @@ -3069,11 +3025,15 @@ class Eynollah: self.plotter.save_plot_of_layout_main(text_regions_p, image_page) return textline_mask_tot, text_regions_p, image_page_rotated - def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts): + def run_boxes_no_full_layout( + self, image_page, textline_mask_tot, text_regions_p, + slope_deskew, num_col_classifier, table_prediction, erosion_hurts): + self.logger.debug('enter run_boxes_no_full_layout') t_0_box = time.time() if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func( + image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -3090,10 +3050,14 @@ class Eynollah: regions_without_separators_d = None pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) @@ -3107,7 +3071,9 @@ class Eynollah: #print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new, regions_without_separators, matrix_of_lines_ch, + num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) #print(time.time()-t_0_box,'time box in 3.1') @@ -3119,12 +3085,17 @@ class Eynollah: text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, + num_col_classifier , 0.000005, pixel_line) #print(time.time()-t_0_box,'time box in 3.2') - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction, 10, num_col_classifier) #print(time.time()-t_0_box,'time box in 3.3') else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, + num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) @@ -3137,8 +3108,11 @@ class Eynollah: text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) @@ -3185,55 +3159,71 @@ class Eynollah: contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) #print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') - return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables + return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, + regions_without_separators_d, boxes, boxes_d, + polygons_of_marginals, contours_tables) + + def run_boxes_full_layout( + self, image_page, textline_mask_tot, text_regions_p, + slope_deskew, num_col_classifier, img_only_regions, + table_prediction, erosion_hurts, img_bin_light): - def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') t_full0 = time.time() if self.tables: if self.light_version: text_regions_p[:,:][table_prediction[:,:]==1] = 10 - img_revised_tab=text_regions_p[:,:] + img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None - regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 + #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators = (text_regions_p[:,:] == 1)*1 regions_without_separators[table_prediction == 1] = 1 else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None - - regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + + # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 + #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators = (text_regions_p[:,:] == 1)*1 regions_without_separators[table_prediction == 1] = 1 pixel_lines=3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) + num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) if num_col_classifier>=3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3247,33 +3237,40 @@ class Eynollah: pass if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new, regions_without_separators, matrix_of_lines_ch, + num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - - img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, + num_col_classifier , 0.000005, pixel_line) + img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction, 10, num_col_classifier) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, + num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.round(text_regions_p_tables) text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction_n, 10, num_col_classifier) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - if np.abs(slope_deskew) < 0.13: img_revised_tab = np.copy(img_revised_tab2[:,:,0]) else: @@ -3281,7 +3278,6 @@ class Eynollah: img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - ##img_revised_tab=img_revised_tab2[:,:,0] #img_revised_tab=text_regions_p[:,:] text_regions_p[:,:][text_regions_p[:,:]==10] = 0 @@ -3310,10 +3306,9 @@ class Eynollah: image_page = image_page.astype(np.uint8) #print("full inside 1", time.time()- t_full0) - if self.light_version: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, False, cols=num_col_classifier) - else: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, False, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new( + img_bin_light if self.light_version else image_page, + False, cols=num_col_classifier) #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model @@ -3328,7 +3323,6 @@ class Eynollah: drop_capital_label_in_full_layout_model = 3 drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 - drops= drops.astype(np.uint8) regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1 @@ -3336,8 +3330,8 @@ class Eynollah: drops = cv2.erode(drops[:,:], KERNEL, iterations=1) regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout( + regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -3353,7 +3347,8 @@ class Eynollah: #plt.show() ####if not self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout( + image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -3371,18 +3366,19 @@ class Eynollah: self.logger.debug('exit run_boxes_full_layout') #print("full inside 3", time.time()- t_full0) - return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables + return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, + regions_without_separators_d, regions_fully, regions_without_separators, + polygons_of_marginals, contours_tables) def our_load_model(self, model_file): - try: model = load_model(model_file, compile=False) except: - model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) - + model = load_model(model_file, compile=False, custom_objects={ + "PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def do_order_of_regions_with_model(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -3394,10 +3390,11 @@ class Eynollah: img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') if contours_only_text_parent_h: - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours( + contours_only_text_parent_h) for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12, + int(x_min_main[j]):int(x_max_main[j])] = 1 co_text_all = contours_only_text_parent + contours_only_text_parent_h else: co_text_all = contours_only_text_parent @@ -3480,7 +3477,7 @@ class Eynollah: region_ids = ['region_%04d' % i for i in range(len(co_text_all))] return ordered, region_ids - def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): + def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.2*width) @@ -3492,18 +3489,14 @@ class Eynollah: sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: print(len(peaks_real), 'len(peaks_real)') peaks_real = peaks_real[(peaks_realwidth1)] arg_sort = np.argsort(sum_smoothed[peaks_real]) - arg_sort4 =arg_sort[::-1][:4] - peaks_sort_4 = peaks_real[arg_sort][::-1][:4] - argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] @@ -3522,9 +3515,8 @@ class Eynollah: return peaks_final[0], peaks_final[1] else: pass - - - def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self,textline_image, ind_tot): + + def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.06*width) @@ -3536,14 +3528,12 @@ class Eynollah: sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: #print(len(peaks_real), 'len(peaks_real)') peaks_real = peaks_real[(peaks_realwidth1)] arg_max = np.argmax(sum_smoothed[peaks_real]) - peaks_final = peaks_real[arg_max] #plt.figure(ind_tot) @@ -3555,15 +3545,15 @@ class Eynollah: return peaks_final else: return None - def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(self,peaks_real, sum_smoothed, start_split, end_split): + + def return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + self, peaks_real, sum_smoothed, start_split, end_split): + peaks_real = peaks_real[(peaks_realstart_split)] arg_sort = np.argsort(sum_smoothed[peaks_real]) - arg_sort4 =arg_sort[::-1][:4] - peaks_sort_4 = peaks_real[arg_sort][::-1][:4] - argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] @@ -3573,8 +3563,8 @@ class Eynollah: arg_sortnew = np.argsort(y_4_sorted) peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) return peaks_final[0] - - def return_start_and_end_of_common_text_of_textline_ocr_new(self,textline_image, ind_tot): + + def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.15*width) @@ -3587,11 +3577,11 @@ class Eynollah: sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: - peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, width1, mid+2) - - peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, mid-2, width2) + peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, width1, mid+2) + peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, mid-2, width2) #plt.figure(ind_tot) #plt.imshow(textline_image) @@ -3602,23 +3592,23 @@ class Eynollah: return peak_start, peak_end else: pass - - def return_ocr_of_textline_without_common_section(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + def return_ocr_of_textline_without_common_section( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] else: - #width = np.shape(textline_image)[1] #height = np.shape(textline_image)[0] #common_window = int(0.3*width) - #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) - - split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image, ind_tot) + split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section( + textline_image, ind_tot) if split_point: image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) @@ -3652,7 +3642,10 @@ class Eynollah: #print(generated_text,'generated_text') #print('########################################') return generated_text - def return_ocr_of_textline(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + def return_ocr_of_textline( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) @@ -3661,7 +3654,6 @@ class Eynollah: #width = np.shape(textline_image)[1] #height = np.shape(textline_image)[0] #common_window = int(0.3*width) - #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) @@ -3683,8 +3675,8 @@ class Eynollah: #print(generated_text2, 'generated_text2') #print('########################################') - match = sq(None, generated_text1, generated_text2).find_longest_match(0, len(generated_text1), 0, len(generated_text2)) - + match = sq(None, generated_text1, generated_text2).find_longest_match( + 0, len(generated_text1), 0, len(generated_text2)) generated_text = generated_text1 + generated_text2[match.b+match.size:] except: pixel_values = processor(textline_image, return_tensors="pt").pixel_values @@ -3692,43 +3684,44 @@ class Eynollah: generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text - + def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): textline_contour[:,0] = textline_contour[:,0] + box_ind[2] textline_contour[:,1] = textline_contour[:,1] + box_ind[0] return textline_contour + def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - - def return_it_in_two_groups(self,x_differential): - split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 for ind in range(len(x_differential)-1)] + def return_it_in_two_groups(self, x_differential): + split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 + for ind in range(len(x_differential)-1)] split_masked = list( np.array(split[:])[np.array(split[:])!=-1] ) - if 0 not in split_masked: split_masked.insert(0, -1) - split_masked.append(len(x_differential)-1) split_masked = np.array(split_masked) +1 - sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) for ind in range(len(split_masked)-1)] - - indexes_to_bec_changed = [ind if ( np.abs(sums[ind-1]) > np.abs(sums[ind]) and np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 for ind in range(1,len(sums)-1) ] + sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) + for ind in range(len(split_masked)-1)] + indexes_to_bec_changed = [ind if (np.abs(sums[ind-1]) > np.abs(sums[ind]) and + np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 + for ind in range(1,len(sums)-1)] indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1] x_differential_new = np.copy(x_differential) for i in indexes_to_bec_changed_filtered: - x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] + i_slice = slice(split_masked[i], split_masked[i+1]) + x_differential_new[i_slice] = -1 * np.array(x_differential)[i_slice] return x_differential_new - def dilate_textregions_contours_textline_version(self,all_found_textline_polygons): + + def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -3736,7 +3729,6 @@ class Eynollah: x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) @@ -3754,7 +3746,6 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) - if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.12) else: @@ -3786,7 +3777,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -3802,20 +3792,16 @@ class Eynollah: con_ind = con_ind.astype(np.int32) - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] - + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) + for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) - #print(results,'results') - results[results==0] = 1 - diff_result = np.diff(results) indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] @@ -3823,27 +3809,22 @@ class Eynollah: #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons - def dilate_textregions_contours(self,all_found_textline_polygons): + + def dilate_textregions_contours(self, all_found_textline_polygons): #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): - con_ind = all_found_textline_polygons[j] #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) @@ -3852,7 +3833,6 @@ class Eynollah: x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) @@ -3870,7 +3850,6 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) - if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.12) else: @@ -3902,7 +3881,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -3918,50 +3896,38 @@ class Eynollah: con_ind = con_ind.astype(np.int32) - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] - + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) + for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) - #print(results,'results') - results[results==0] = 1 - diff_result = np.diff(results) - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons - - def dilate_textline_contours(self,all_found_textline_polygons): + def dilate_textline_contours(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) @@ -3991,7 +3957,6 @@ class Eynollah: dilation_m1 = round(area / (x_max-x_min) * 0.35) else: dilation_m1 = round(area / (y_max-y_min) * 0.35) - if dilation_m1>12: dilation_m1 = 12 @@ -4017,7 +3982,6 @@ class Eynollah: else: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4030,16 +3994,13 @@ class Eynollah: con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - con_ind = con_ind.astype(np.int32) - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] - + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) + for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) - results[results==0] = 1 - diff_result = np.diff(results) indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] @@ -4050,13 +4011,10 @@ class Eynollah: con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] indices_2 = indices_2[:-1] - for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] @@ -4071,12 +4029,11 @@ class Eynollah: areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] area_tot = image.shape[0]*image.shape[1] - M_main = [cv2.moments(contours[j]) for j in range(len(contours))] + M_main = [cv2.moments(contours[j]) + for j in range(len(contours))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - - areas_ratio = np.array(areas)/ area_tot contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] @@ -4084,9 +4041,11 @@ class Eynollah: #contours_> = [contours[ind] for ind in contours_index_big] indexes_to_be_removed = [] for ind_small in contours_index_small: - results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big ] + results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) + for ind in contours_index_big] if marginal_cnts: - results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in range(len(marginal_cnts)) ] + results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) + for ind in range(len(marginal_cnts))] results_marginal = np.array(results_marginal) if np.any(results_marginal==1): @@ -4096,7 +4055,6 @@ class Eynollah: if np.any(results==1): indexes_to_be_removed.append(ind_small) - if len(indexes_to_be_removed)>0: indexes_to_be_removed = np.unique(indexes_to_be_removed) @@ -4105,8 +4063,7 @@ class Eynollah: contours.pop(ind) return contours - - + else: contours_txtline_of_all_textregions = [] indexes_of_textline_tot = [] @@ -4115,32 +4072,23 @@ class Eynollah: for jj in range(len(contours)): contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] - ind_ins = np.zeros( len(contours[jj]) ) + jj - list_ind_ins = list(ind_ins) - - ind_textline_inside_tr = np.array (range(len(contours[jj])) ) + ind_textline_inside_tr = list(range(len(contours[jj]))) + index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr + ind_ins = [0] * len(contours[jj]) + jj + indexes_of_textline_tot = indexes_of_textline_tot + ind_ins - list_ind_textline_inside_tr = list(ind_textline_inside_tr) - - index_textline_inside_textregion = index_textline_inside_textregion + list_ind_textline_inside_tr - - indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins - - - M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) + for j in range(len(contours_txtline_of_all_textregions))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] area_tot_tot = image.shape[0]*image.shape[1] textregion_index_to_del = [] textline_in_textregion_index_to_del = [] for ij in range(len(contours_txtline_of_all_textregions)): - args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) - args_all.pop(ij) areas_without = np.array(areas_tot)[args_all] @@ -4149,38 +4097,38 @@ class Eynollah: args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) + for ind in args_with_bigger_area ] results = np.array(results) if np.any(results==1): #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) - - uniqe_args_trs = np.unique(textregion_index_to_del) - - for ind_u_a_trs in uniqe_args_trs: - textline_in_textregion_index_to_del_ind = np.array(textline_in_textregion_index_to_del)[np.array(textregion_index_to_del)==ind_u_a_trs] + + textregion_index_to_del = np.array(textregion_index_to_del) + textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) + for ind_u_a_trs in np.unique(textregion_index_to_del): + textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] - for ittrd in textline_in_textregion_index_to_del_ind: contours[ind_u_a_trs].pop(ittrd) return contours - - - def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): - + def filter_contours_without_textline_inside( + self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): + ###contours_txtline_of_all_textregions = [] - ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] - ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] - ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] - ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] - + ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) + ### for j in range(len(contours_txtline_of_all_textregions))] + ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) + ### for j in range(len(M_main_textline))] + ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) + ### for j in range(len(M_main_textline))] ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] @@ -4188,8 +4136,8 @@ class Eynollah: ###contours_with_textline = [] ###for ind_tr, con_tr in enumerate(contours): - ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] - + ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) + ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] ###results = np.array(results) ###if np.any(results==1): ###contours_with_textline.append(con_tr) @@ -4202,7 +4150,6 @@ class Eynollah: uniqe_args_trs = np.unique(textregion_index_to_del) uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] - for ind_u_a_trs in uniqe_args_trs_sorted: contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) @@ -4211,11 +4158,10 @@ class Eynollah: return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) - def dilate_textlines(self,all_found_textline_polygons): + def dilate_textlines(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][i] - con_ind = con_ind.astype(np.float) x_differential = np.diff( con_ind[:,0,0]) @@ -4227,11 +4173,8 @@ class Eynollah: x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) - mult = x_biger_than_x*x_differential arg_min_mult = np.argmin(mult) @@ -4239,33 +4182,25 @@ class Eynollah: if y_differential[0]==0: y_differential[0] = 0.1 - if y_differential[-1]==0: y_differential[-1]= 0.1 - - - - y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - + y_differential = [y_differential[ind] if y_differential[ind] != 0 + else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) + for ind in range(len(y_differential))] if y_differential[0]==0.1: y_differential[0] = y_differential[1] if y_differential[-1]==0.1: y_differential[-1] = y_differential[-2] - y_differential.append(y_differential[0]) - y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] - + y_differential = [-1 if y_differential[ind] < 0 else 1 + for ind in range(len(y_differential))] y_differential = self.return_it_in_two_groups(y_differential) - y_differential = np.array(y_differential) - con_scaled = con_ind*1 - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 @@ -4284,10 +4219,8 @@ class Eynollah: except: pass - else: y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - mult = y_biger_than_x*y_differential arg_min_mult = np.argmin(mult) @@ -4295,32 +4228,25 @@ class Eynollah: if x_differential[0]==0: x_differential[0] = 0.1 - if x_differential[-1]==0: x_differential[-1]= 0.1 - - - - x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - + x_differential = [x_differential[ind] if x_differential[ind] != 0 + else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) + for ind in range(len(x_differential))] if x_differential[0]==0.1: x_differential[0] = x_differential[1] if x_differential[-1]==0.1: x_differential[-1] = x_differential[-2] - x_differential.append(x_differential[0]) - x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] - + x_differential = [-1 if x_differential[ind] < 0 else 1 + for ind in range(len(x_differential))] x_differential = self.return_it_in_two_groups(x_differential) x_differential = np.array(x_differential) - con_scaled = con_ind*1 - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 @@ -4338,17 +4264,19 @@ class Eynollah: con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 except: pass - - + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] - + return all_found_textline_polygons - def delete_regions_without_textlines(self,slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): + def delete_regions_without_textlines( + self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, + contours_only_text_parent, index_by_text_par_con): + slopes_rem = [] all_found_textline_polygons_rem = [] boxes_text_rem = [] @@ -4368,9 +4296,11 @@ class Eynollah: index_sort = np.argsort(index_by_text_par_con_rem) indexes_new = np.array(range(len(index_by_text_par_con_rem))) - index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))] + index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] + for j in range(len(index_by_text_par_con_rem))] - return slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort + return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, + contours_only_text_parent_rem, index_by_text_par_con_rem_sort) def run(self): """ @@ -4400,10 +4330,13 @@ class Eynollah: img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml,polygons_of_images,image_page, page_coord, cont_page = self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ + self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - + pcgts = self.writer.build_pagexml_no_full_layout( + [], page_coord, [], [], [], [], + polygons_of_images, [], [], [], [], [], + cont_page, [], [], ocr_all_textlines) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) @@ -4414,21 +4347,26 @@ class Eynollah: return pcgts if self.skip_layout_and_reading_order: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, - skip_layout_and_reading_order=self.skip_layout_and_reading_order) + _ ,_, _, textline_mask_tot_ea, img_bin_light = \ + self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, + skip_layout_and_reading_order=self.skip_layout_and_reading_order) - page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \ + self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + all_found_textline_polygons = filter_contours_area_of_image( + textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") + all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( + all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") order_text_new = [0] @@ -4443,10 +4381,11 @@ class Eynollah: polygons_lines_xml = [] contours_tables = [] ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout( + cont_page, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if self.dir_in: self.writer.write_pagexml(pcgts) continue @@ -4456,17 +4395,16 @@ class Eynollah: #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = \ + self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: + else: img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) @@ -4475,18 +4413,23 @@ class Eynollah: slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ + text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, + num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) #print("text region early -4 in %.1fs", time.time() - t0) else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ + self.get_regions_from_xy_2models(img_res, is_image_enhanced, + num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ + text_regions_p_1, cont_page, table_prediction = \ self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) self.logger.info("Graphics detection took %.1fs ", time.time() - t1) #self.logger.info('cont_page %s', cont_page) @@ -4496,7 +4439,9 @@ class Eynollah: if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout( + [], page_coord, [], [], [], [], [], [], [], [], [], [], + cont_page, [], [], ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t1) if self.dir_in: self.writer.write_pagexml(pcgts) @@ -4517,11 +4462,9 @@ class Eynollah: org_w_l_m = textline_mask_tot_ea.shape[1] if num_col_classifier == 1: img_w_new = 2000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: + else: img_w_new = 2400 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] image_page = resize_image(image_page,img_h_new, img_w_new ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) @@ -4530,7 +4473,9 @@ class Eynollah: text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + textline_mask_tot, text_regions_p, image_page_rotated = \ + self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, + num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) if self.light_version and num_col_classifier in (1,2): image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) @@ -4546,12 +4491,17 @@ class Eynollah: ## birdan sora chock chakir t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ - self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, + num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ - self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, + num_col_classifier, img_only_regions, table_prediction, erosion_hurts, + img_bin_light if self.light_version else None) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 @@ -4572,18 +4522,23 @@ class Eynollah: areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) #self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) + if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + contours_only_text_parent = self.return_list_of_contours_with_desired_order( + contours_only_text_parent, index_con_parents) ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##contours_only_text_parent = \ + ##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##contours_only_text_parent = \ + ##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order( + areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -4598,14 +4553,17 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order( + contours_only_text_parent_d, index_con_parents_d) #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #contours_only_text_parent_d = \ + #list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - + #contours_only_text_parent_d = \ + #list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order( + areas_cnt_text_d, index_con_parents_d) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) @@ -4613,12 +4571,16 @@ class Eynollah: if len(cx_bigest_d) >= 5: cx_bigest_d_last5 = cx_bigest_d[-5:] cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) + for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) else: cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) + for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) cx_bigest_d_big[0] = cx_bigest_d[ind_largest] @@ -4639,7 +4601,9 @@ class Eynollah: p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) p[0] = p[0] - x_diff[0] p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + + (p[1] - cy_biggest_d[j]) ** 2) + for j in range(len(cx_bigest_d))] contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) @@ -4659,9 +4623,17 @@ class Eynollah: # stop early empty_marginals = [[]] * len(polygons_of_marginals) if self.full_layout: - pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) + pcgts = self.writer.build_pagexml_full_layout( + [], [], page_coord, [], [], [], [], [], [], + polygons_of_images, contours_tables, [], + polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], + cont_page, polygons_lines_xml, []) else: - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + pcgts = self.writer.build_pagexml_no_full_layout( + [], page_coord, [], [], [], [], + polygons_of_images, + polygons_of_marginals, empty_marginals, empty_marginals, [], [], + cont_page, polygons_lines_xml, contours_tables, []) self.logger.info("Job done in %.1fs", time.time() - t0) if self.dir_in: self.writer.write_pagexml(pcgts) @@ -4671,14 +4643,18 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + contours_only_text_parent = self.dilate_textregions_contours( + contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one( + contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) + txt_con_org = get_textregion_contours_in_org_image_light( + contours_only_text_parent, self.image, slope_first, map=self.executor.map) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + txt_con_org = get_textregion_contours_in_org_image( + contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) @@ -4687,59 +4663,84 @@ class Eynollah: if not self.curved_line: if self.light_version: if self.textline_light: - #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ + all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, + image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, + image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, + # boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, + # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) - + all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( + all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons_marginals) + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ + index_by_text_par_con = self.filter_contours_without_textline_inside( + contours_only_text_parent, txt_con_org, all_found_textline_polygons, + contours_only_text_parent_d_ordered) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ + index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, + image_page_rotated, boxes_marginals, slope_deskew) + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( + # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ + all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, + image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ + all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, + image_page_rotated, boxes_text, text_only, + num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2( + all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, + image_page_rotated, boxes_marginals, text_only, + num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( + all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( + contours_only_text_parent_d_ordered, index_by_text_par_con) #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) else: #takes long timee contours_only_text_parent_d_ordered = None @@ -4749,8 +4750,9 @@ class Eynollah: fun = check_any_text_region_in_model_one_is_main_or_header text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ - fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = fun( + text_regions_p, regions_fully, contours_only_text_parent, + all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) @@ -4758,60 +4760,76 @@ class Eynollah: pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, - all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( + text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) if not self.reading_order_machine_based: + pixel_seps = 6 if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new, regions_without_separators, matrix_of_lines_ch, + num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, + num_col_classifier, erosion_hurts, self.tables, self.right2left) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() if self.full_layout: - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( + contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) if self.ocr: ocr_all_textlines = [] else: ocr_all_textlines = None - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines) + pcgts = self.writer.build_pagexml_full_layout( + contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, + polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) #print("Job done in %.1fs", time.time() - t0) if self.dir_in: @@ -4823,21 +4841,25 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( + contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( + contours_only_text_parent_d_ordered, index_by_text_par_con) #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) if self.ocr: - device = cuda.get_current_device() device.reset() gc.collect() @@ -4849,7 +4871,6 @@ class Eynollah: ind_tot = 0 #cv2.imwrite('./img_out.png', image_page) - ocr_all_textlines = [] for indexing, ind_poly_first in enumerate(all_found_textline_polygons): ocr_textline_in_textregion = [] @@ -4871,7 +4892,6 @@ class Eynollah: img_poly_on_img = np.copy(image_page) else: img_poly_on_img = np.copy(img_bin_light) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) if self.textline_light: @@ -4883,10 +4903,7 @@ class Eynollah: img_croped = img_poly_on_img[y:y+h, x:x+w, :] #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - ocr_textline_in_textregion.append(text_ocr) - - ind_tot = ind_tot +1 ocr_all_textlines.append(ocr_textline_in_textregion) @@ -4894,9 +4911,11 @@ class Eynollah: ocr_all_textlines = None #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout( + txt_con_org, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) #print("Job done in %.1fs" % (time.time() - t0)) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index d7f9ccd..feab341 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -14,9 +14,9 @@ from .contour import (contours_in_same_horizon, return_contours_of_image, return_parent_contours) -def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peak_points,cy_hor_diff): - - +def return_x_start_end_mothers_childs_and_type_of_reading_order( + x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): + x_start=[] x_end=[] kind=[]#if covers 2 and more than 2 columns set it to 1 otherwise 0 @@ -30,15 +30,12 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x starting=x_min_hor_some[i]-peak_points starting=starting[starting>=0] min_start=np.argmin(starting) - - ending=peak_points-x_max_hor_some[i] len_ending_neg=len(ending[ending<=0]) ending=ending[ending>0] max_end=np.argmin(ending)+len_ending_neg - if (max_end-min_start)>=2: if (max_end-min_start)==(len(peak_points)-1): new_main_sep_y.append(indexer) @@ -57,18 +54,13 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x kind.append(1) indexer+=1 + + x_start_returned = np.array(x_start, dtype=int) + x_end_returned = np.array(x_end, dtype=int) + y_sep_returned = np.array(y_sep, dtype=int) + y_diff_returned = np.array(y_diff, dtype=int) - - x_start_returned=np.copy(x_start) - x_end_returned=np.copy(x_end) - y_sep_returned=np.copy(y_sep) - y_diff_returned=np.copy(y_diff) - - - - - all_args_uniq=contours_in_same_horizon(y_sep_returned) - + all_args_uniq = contours_in_same_horizon(y_sep_returned) args_to_be_unified=[] y_unified=[] y_diff_unified=[] @@ -84,7 +76,10 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x y_sep_same_hor=np.array(y_sep_returned)[all_args_uniq[dd]] y_diff_same_hor=np.array(y_diff_returned)[all_args_uniq[dd]] #print('burda2') - if x_s_same_hor[0]==(x_e_same_hor[1]-1) or x_s_same_hor[1]==(x_e_same_hor[0]-1) and x_s_same_hor[0]!=x_s_same_hor[1] and x_e_same_hor[0]!=x_e_same_hor[1]: + if (x_s_same_hor[0]==x_e_same_hor[1]-1 or + x_s_same_hor[1]==x_e_same_hor[0]-1 and + x_s_same_hor[0]!=x_s_same_hor[1] and + x_e_same_hor[0]!=x_e_same_hor[1]): #print('burda3') for arg_in in all_args_uniq[dd]: #print(arg_in,'arg_in') @@ -98,19 +93,14 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x x_e_unified.append(x_e_selected) y_unified.append(y_selected) y_diff_unified.append(y_diff_selected) - - - #print(x_s_same_hor,'x_s_same_hor') #print(x_e_same_hor[:]-1,'x_e_same_hor') #print('#############################') - #print(x_s_unified,'y_selected') #print(x_e_unified,'x_s_selected') #print(y_unified,'x_e_same_hor') - + args_lines_not_unified=list( set(range(len(y_sep_returned)))-set(args_to_be_unified) ) - #print(args_lines_not_unified,'args_lines_not_unified') x_start_returned_not_unified=list( np.array(x_start_returned)[args_lines_not_unified] ) @@ -128,11 +118,10 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x #print(x_start_returned,'x_start_returned') #print(x_end_returned,'x_end_returned') - x_start_returned=np.copy(x_start_returned_not_unified) - x_end_returned=np.copy(x_end_returned_not_unified) - y_sep_returned=np.copy(y_sep_returned_not_unified) - y_diff_returned=np.copy(y_diff_returned_not_unified) - + x_start_returned = np.array(x_start_returned_not_unified, dtype=int) + x_end_returned = np.array(x_end_returned_not_unified, dtype=int) + y_sep_returned = np.array(y_sep_returned_not_unified, dtype=int) + y_diff_returned = np.array(y_diff_returned_not_unified, dtype=int) #print(y_sep_returned,'y_sep_returned2') #print(x_start_returned,'x_start_returned2') @@ -165,19 +154,19 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x #print(y_min_new,'y_min_new') #print(y_max_new,'y_max_new') - - #print(y_sep[new_main_sep_y[0]],y_sep,'yseps') x_start=np.array(x_start) x_end=np.array(x_end) kind=np.array(kind) y_sep=np.array(y_sep) - if (y_min_new in y_mains_sep_ohne_grenzen) and (y_max_new in y_mains_sep_ohne_grenzen): + if (y_min_new in y_mains_sep_ohne_grenzen and + y_max_new in y_mains_sep_ohne_grenzen): x_start=x_start[(y_sep>y_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sep<=y_max_new)] #print('burda1') @@ -185,7 +174,8 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x #print('burda2') kind=kind[(y_sep>y_min_new) & (y_sep<=y_max_new)] y_sep=y_sep[(y_sep>y_min_new) & (y_sep<=y_max_new)] - elif (y_min_new not in y_mains_sep_ohne_grenzen) and (y_max_new in y_mains_sep_ohne_grenzen): + elif (y_min_new not in y_mains_sep_ohne_grenzen and + y_max_new in y_mains_sep_ohne_grenzen): x_start=x_start[(y_sep>=y_min_new) & (y_sep=y_min_new) & (y_sep=y_min_new) & (y_sep1: #print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)') #print(np.array(mother),'mother') - remained_sep_indexes_without_mother=np.array(list(remained_sep_indexes))[np.array(mother)==0] - remained_sep_indexes_with_child_without_mother=np.array(list(remained_sep_indexes))[(np.array(mother)==0) & (np.array(child)==1)] + remained_sep_indexes_without_mother = remained_sep_indexes[mother==0] + remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1] #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') - - - x_end_with_child_without_mother=np.array(x_end)[np.array(remained_sep_indexes_with_child_without_mother)] - - x_start_with_child_without_mother=np.array(x_start)[np.array(remained_sep_indexes_with_child_without_mother)] - - y_lines_with_child_without_mother=np.array(y_sep)[np.array(remained_sep_indexes_with_child_without_mother)] - - + x_end_with_child_without_mother = x_end[remained_sep_indexes_with_child_without_mother] + x_start_with_child_without_mother = x_start[remained_sep_indexes_with_child_without_mother] + y_lines_with_child_without_mother = y_sep[remained_sep_indexes_with_child_without_mother] + reading_orther_type=0 - - - x_end_without_mother=np.array(x_end)[np.array(remained_sep_indexes_without_mother)] - x_start_without_mother=np.array(x_start)[np.array(remained_sep_indexes_without_mother)] - y_lines_without_mother=np.array(y_sep)[np.array(remained_sep_indexes_without_mother)] + x_end_without_mother = x_end[remained_sep_indexes_without_mother] + x_start_without_mother = x_start[remained_sep_indexes_without_mother] + y_lines_without_mother = y_sep[remained_sep_indexes_without_mother] if len(remained_sep_indexes_without_mother)>=2: for i in range(len(remained_sep_indexes_without_mother)-1): - ##nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]],x_end[remained_sep_indexes_without_mother[i]]+1)) - nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]],x_end[remained_sep_indexes_without_mother[i]])) + nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]], + x_end[remained_sep_indexes_without_mother[i]] + # + 1 + )) for j in range(i+1,len(remained_sep_indexes_without_mother)): - #nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]],x_end[remained_sep_indexes_without_mother[j]]+1)) - nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]],x_end[remained_sep_indexes_without_mother[j]])) - - set_diff=nodes_i-nodes_j - - if set_diff!=nodes_i: - reading_orther_type=1 + nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]], + x_end[remained_sep_indexes_without_mother[j]] + # + 1 + )) + set_diff = nodes_i - nodes_j + if set_diff != nodes_i: + reading_orther_type = 1 else: - reading_orther_type=0 + reading_orther_type = 0 #print(reading_orther_type,'javab') - #print(y_lines_with_child_without_mother,'y_lines_with_child_without_mother') #print(x_start_with_child_without_mother,'x_start_with_child_without_mother') #print(x_end_with_child_without_mother,'x_end_with_hild_without_mother') - len_sep_with_child=len(np.array(child)[np.array(child)==1]) + len_sep_with_child = len(child[child==1]) #print(len_sep_with_child,'len_sep_with_child') - there_is_sep_with_child=0 - - if len_sep_with_child>=1: - there_is_sep_with_child=1 - + there_is_sep_with_child = 0 + if len_sep_with_child >= 1: + there_is_sep_with_child = 1 #print(all_args_uniq,'all_args_uniq') #print(args_to_be_unified,'args_to_be_unified') - - return reading_orther_type,x_start_returned, x_end_returned ,y_sep_returned,y_diff_returned,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y + return (reading_orther_type, + x_start_returned, + x_end_returned, + y_sep_returned, + y_diff_returned, + y_lines_without_mother, + x_start_without_mother, + x_end_without_mother, + there_is_sep_with_child, + y_lines_with_child_without_mother, + x_start_with_child_without_mother, + x_end_with_child_without_mother, + new_main_sep_y) + def crop_image_inside_box(box, img_org_copy): image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] @@ -304,7 +303,6 @@ def otsu_copy_binary(img): img1 = img[:, :, 0] retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) - img_r[:, :, 0] = threshold1 img_r[:, :, 1] = threshold1 img_r[:, :, 2] = threshold1 @@ -312,9 +310,7 @@ def otsu_copy_binary(img): img_r = img_r / float(np.max(img_r)) * 255 return img_r - def find_features_of_lines(contours_main): - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] @@ -326,7 +322,6 @@ def find_features_of_lines(contours_main): y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) slope_lines = [] - for kk in range(len(contours_main)): [vx, vy, x, y] = cv2.fitLine(contours_main[kk], cv2.DIST_L2, 0, 0.01, 0.01) slope_lines.append(((vy / vx) / np.pi * 180)[0]) @@ -339,29 +334,42 @@ def find_features_of_lines(contours_main): slope_lines[(slope_lines != 0) & (slope_lines != 1)] = 2 dis_x = np.abs(x_max_main - x_min_main) - return slope_lines, dis_x, x_min_main, x_max_main, np.array(cy_main), np.array(slope_lines_org), y_min_main, y_max_main, np.array(cx_main) + return (slope_lines, + dis_x, + x_min_main, + x_max_main, + np.array(cy_main), + np.array(slope_lines_org), + y_min_main, + y_max_main, + np.array(cx_main)) def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregion_pre_np, img_only_text): textregion_pre_p_org = np.copy(textregion_pre_p) # 4 is drop capitals - headers_in_longshot = (textregion_pre_np[:, :, 0] == 2) * 1 - # headers_in_longshot= ( (textregion_pre_np[:,:,0]==2) | (textregion_pre_np[:,:,0]==1) )*1 - textregion_pre_p[:, :, 0][(headers_in_longshot[:, :] == 1) & (textregion_pre_p[:, :, 0] != 4)] = 2 + headers_in_longshot = textregion_pre_np[:, :, 0] == 2 + #headers_in_longshot = ((textregion_pre_np[:,:,0]==2) | + # (textregion_pre_np[:,:,0]==1)) + textregion_pre_p[:, :, 0][headers_in_longshot & + (textregion_pre_p[:, :, 0] != 4)] = 2 textregion_pre_p[:, :, 0][textregion_pre_p[:, :, 0] == 1] = 0 # earlier it was so, but by this manner the drop capitals are also deleted - # textregion_pre_p[:,:,0][( img_only_text[:,:]==1) & (textregion_pre_p[:,:,0]!=7) & (textregion_pre_p[:,:,0]!=2)]=1 - textregion_pre_p[:, :, 0][(img_only_text[:, :] == 1) & (textregion_pre_p[:, :, 0] != 7) & (textregion_pre_p[:, :, 0] != 4) & (textregion_pre_p[:, :, 0] != 2)] = 1 + # textregion_pre_p[:,:,0][(img_only_text[:,:]==1) & + # (textregion_pre_p[:,:,0]!=7) & + # (textregion_pre_p[:,:,0]!=2)] = 1 + textregion_pre_p[:, :, 0][(img_only_text[:, :] == 1) & + (textregion_pre_p[:, :, 0] != 7) & + (textregion_pre_p[:, :, 0] != 4) & + (textregion_pre_p[:, :, 0] != 2)] = 1 return textregion_pre_p - def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8): - regions_without_separators_0 = regions_without_separators[:,:].sum(axis=1) + regions_without_separators_0 = regions_without_separators.sum(axis=1) z = gaussian_filter1d(regions_without_separators_0, sigma_) return np.std(z) - def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): - regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0) + regions_without_separators_0 = regions_without_separators.sum(axis=0) ##plt.plot(regions_without_separators_0) ##plt.show() sigma_ = 35 # 70#35 @@ -372,7 +380,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl y = regions_without_separators_0 # [first_nonzero:last_nonzero] y_help = np.zeros(len(y) + 20) y_help[10 : len(y) + 10] = y - x = np.array(range(len(y))) + x = np.arange(len(y)) zneg_rev = -y_help + np.max(y_help) zneg = np.zeros(len(zneg_rev) + 20) zneg[10 : len(zneg_rev) + 10] = zneg_rev @@ -386,9 +394,12 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl last_nonzero = last_nonzero - 100 first_nonzero = first_nonzero + 200 - peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)] - peaks = peaks[(peaks > 0.06 * regions_without_separators.shape[1]) & (peaks < 0.94 * regions_without_separators.shape[1])] - peaks_neg = peaks_neg[(peaks_neg > 370) & (peaks_neg < (regions_without_separators.shape[1] - 370))] + peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & + (peaks_neg < last_nonzero)] + peaks = peaks[(peaks > 0.06 * regions_without_separators.shape[1]) & + (peaks < 0.94 * regions_without_separators.shape[1])] + peaks_neg = peaks_neg[(peaks_neg > 370) & + (peaks_neg < (regions_without_separators.shape[1] - 370))] interest_pos = z[peaks] interest_pos = interest_pos[interest_pos > 10] # plt.plot(z) @@ -405,7 +416,8 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl # print(np.min(interest_pos),np.max(interest_pos),np.max(interest_pos)/np.min(interest_pos),'minmax') dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier - grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 + grenze = min_peaks_pos - dis_talaei + # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 # print(interest_neg,'interest_neg') # print(grenze,'grenze') @@ -441,19 +453,26 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl p_g_u = len(y) - int(len(y) / 4.0) if num_col == 3: - if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ((peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ((peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m): + if ((peaks_neg_fin[0] > p_g_u and + peaks_neg_fin[1] > p_g_u) or + (peaks_neg_fin[0] < p_g_l and + peaks_neg_fin[1] < p_g_l) or + (peaks_neg_fin[0] + 200 < p_m and + peaks_neg_fin[1] < p_m) or + (peaks_neg_fin[0] - 200 > p_m and + peaks_neg_fin[1] > p_m)): num_col = 1 peaks_neg_fin = [] if num_col == 2: - if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l): + if (peaks_neg_fin[0] > p_g_u or + peaks_neg_fin[0] < p_g_l): num_col = 1 peaks_neg_fin = [] ##print(len(peaks_neg_fin)) diff_peaks = np.abs(np.diff(peaks_neg_fin)) - cut_off = 400 peaks_neg_true = [] forest = [] @@ -489,23 +508,35 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl ##print(num_col,'early') if num_col == 3: - if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and (peaks_neg_true[1] + 200) < p_m) or ((peaks_neg_true[0] - 200) > p_m and peaks_neg_true[1] > p_m): + if ((peaks_neg_true[0] > p_g_u and + peaks_neg_true[1] > p_g_u) or + (peaks_neg_true[0] < p_g_l and + peaks_neg_true[1] < p_g_l) or + (peaks_neg_true[0] < p_m and + peaks_neg_true[1] + 200 < p_m) or + (peaks_neg_true[0] - 200 > p_m and + peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] - elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter): + elif (peaks_neg_true[0] < p_g_u and + peaks_neg_true[0] > p_g_l and + peaks_neg_true[1] > p_u_quarter): peaks_neg_true = [peaks_neg_true[0]] - elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter): + elif (peaks_neg_true[1] < p_g_u and + peaks_neg_true[1] > p_g_l and + peaks_neg_true[0] < p_quarter): peaks_neg_true = [peaks_neg_true[1]] if num_col == 2: - if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l): + if (peaks_neg_true[0] > p_g_u or + peaks_neg_true[0] < p_g_l): num_col = 1 peaks_neg_true = [] diff_peaks_abnormal = diff_peaks[diff_peaks < 360] if len(diff_peaks_abnormal) > 0: - arg_help = np.array(range(len(diff_peaks))) + arg_help = np.arange(len(diff_peaks)) arg_help_ann = arg_help[diff_peaks < 360] peaks_neg_fin_new = [] @@ -527,7 +558,6 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl # plt.plot(peaks_neg_true,z[peaks_neg_true],'*') # plt.plot([0,len(y)], [grenze,grenze]) # plt.show() - ##print(len(peaks_neg_true)) return len(peaks_neg_true), peaks_neg_true @@ -536,7 +566,6 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): ##plt.plot(regions_without_separators_0) ##plt.show() - sigma_ = 15 meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] @@ -547,32 +576,24 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): last_nonzero = len(regions_without_separators_0) - last_nonzero y = regions_without_separators_0 # [first_nonzero:last_nonzero] - y_help = np.zeros(len(y) + 20) - y_help[10 : len(y) + 10] = y - - x = np.array(range(len(y))) + x = np.arange(len(y)) zneg_rev = -y_help + np.max(y_help) - zneg = np.zeros(len(zneg_rev) + 20) - zneg[10 : len(zneg_rev) + 10] = zneg_rev - z = gaussian_filter1d(y, sigma_) zneg = gaussian_filter1d(zneg, sigma_) peaks_neg, _ = find_peaks(zneg, height=0) peaks, _ = find_peaks(z, height=0) - peaks_neg = peaks_neg - 10 - 10 - peaks_neg_org = np.copy(peaks_neg) - - peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)] - - peaks = peaks[(peaks > 0.09 * regions_without_separators.shape[1]) & (peaks < 0.91 * regions_without_separators.shape[1])] + peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & + (peaks_neg < last_nonzero)] + peaks = peaks[(peaks > 0.09 * regions_without_separators.shape[1]) & + (peaks < 0.91 * regions_without_separators.shape[1])] peaks_neg = peaks_neg[(peaks_neg > 500) & (peaks_neg < (regions_without_separators.shape[1] - 500))] # print(peaks) @@ -587,7 +608,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): # $print(min_peaks_pos) dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier # print(interest_pos) - grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 + grenze = min_peaks_pos - dis_talaei + # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 interest_neg_fin = interest_neg[(interest_neg < grenze)] peaks_neg_fin = peaks_neg[(interest_neg < grenze)] @@ -601,13 +623,21 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): p_g_u = len(y) - int(len(y) / 3.0) if num_col == 3: - if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or (peaks_neg_fin[0] < p_m and peaks_neg_fin[1] < p_m) or (peaks_neg_fin[0] > p_m and peaks_neg_fin[1] > p_m): + if ((peaks_neg_fin[0] > p_g_u and + peaks_neg_fin[1] > p_g_u) or + (peaks_neg_fin[0] < p_g_l and + peaks_neg_fin[1] < p_g_l) or + (peaks_neg_fin[0] < p_m and + peaks_neg_fin[1] < p_m) or + (peaks_neg_fin[0] > p_m and + peaks_neg_fin[1] > p_m)): num_col = 1 else: pass if num_col == 2: - if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l): + if (peaks_neg_fin[0] > p_g_u or + peaks_neg_fin[0] < p_g_l): num_col = 1 else: pass @@ -646,23 +676,36 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): p_u_quarter = len(y) - p_quarter if num_col == 3: - if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and peaks_neg_true[1] < p_m) or (peaks_neg_true[0] > p_m and peaks_neg_true[1] > p_m): + if ((peaks_neg_true[0] > p_g_u and + peaks_neg_true[1] > p_g_u) or + (peaks_neg_true[0] < p_g_l and + peaks_neg_true[1] < p_g_l) or + (peaks_neg_true[0] < p_m and + peaks_neg_true[1] < p_m) or + (peaks_neg_true[0] > p_m and + peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] - elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter): + elif (peaks_neg_true[0] < p_g_u and + peaks_neg_true[0] > p_g_l and + peaks_neg_true[1] > p_u_quarter): peaks_neg_true = [peaks_neg_true[0]] - elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter): + elif (peaks_neg_true[1] < p_g_u and + peaks_neg_true[1] > p_g_l and + peaks_neg_true[0] < p_quarter): peaks_neg_true = [peaks_neg_true[1]] else: pass if num_col == 2: - if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l): + if (peaks_neg_true[0] > p_g_u or + peaks_neg_true[0] < p_g_l): num_col = 1 peaks_neg_true = [] if num_col == 4: - if len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2: + if (len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or + len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2): num_col = 1 peaks_neg_true = [] else: @@ -674,7 +717,10 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): for i in range(len(peaks_neg_true)): hill_main = peaks_neg_true[i] # deep_depth=z[peaks_neg] - hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & (peaks_neg_org <= hill_main + 400)) | ((peaks_neg_org < hill_main) & (peaks_neg_org >= hill_main - 400))] + hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & + (peaks_neg_org <= hill_main + 400)) | + ((peaks_neg_org < hill_main) & + (peaks_neg_org >= hill_main - 400))] deep_depth_around = z[hills_around] # print(hill_main,z[hill_main],hills_around,deep_depth_around,'manoooo') @@ -687,13 +733,11 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): pass diff_peaks_annormal = diff_peaks[diff_peaks < 360] - if len(diff_peaks_annormal) > 0: - arg_help = np.array(range(len(diff_peaks))) + arg_help = np.arange(len(diff_peaks)) arg_help_ann = arg_help[diff_peaks < 360] peaks_neg_fin_new = [] - for ii in range(len(peaks_neg_fin)): if ii in arg_help_ann: arg_min = np.argmin([interest_neg_fin[ii], interest_neg_fin[ii + 1]]) @@ -701,7 +745,6 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): peaks_neg_fin_new.append(peaks_neg_fin[ii]) else: peaks_neg_fin_new.append(peaks_neg_fin[ii + 1]) - elif (ii - 1) in arg_help_ann: pass else: @@ -711,7 +754,6 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): # sometime pages with one columns gives also some negative peaks. delete those peaks param = z[peaks_neg_true] / float(min_peaks_pos) * 100 - if len(param[param <= 41]) == 0: peaks_neg_true = [] @@ -722,11 +764,9 @@ def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): ##plt.plot(regions_without_separators_0) ##plt.show() - sigma_ = 35 # 70#35 z = gaussian_filter1d(regions_without_separators_0, sigma_) - peaks, _ = find_peaks(z, height=0) # print(peaks,'peaksnew') @@ -734,39 +774,43 @@ def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): def return_regions_without_separators(regions_pre): kernel = np.ones((5, 5), np.uint8) - regions_without_separators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1 - # regions_without_separators=( (image_regions_eraly_p[:,:,:]!=6) & (image_regions_eraly_p[:,:,:]!=0) & (image_regions_eraly_p[:,:,:]!=5) & (image_regions_eraly_p[:,:,:]!=8) & (image_regions_eraly_p[:,:,:]!=7))*1 + regions_without_separators = ((regions_pre[:, :] != 6) & + (regions_pre[:, :] != 0)) + # regions_without_separators=( (image_regions_eraly_p[:,:,:]!=6) & + # (image_regions_eraly_p[:,:,:]!=0) & + # (image_regions_eraly_p[:,:,:]!=5) & + # (image_regions_eraly_p[:,:,:]!=8) & + # (image_regions_eraly_p[:,:,:]!=7)) - regions_without_separators = regions_without_separators.astype(np.uint8) - - regions_without_separators = cv2.erode(regions_without_separators, kernel, iterations=6) + regions_without_separators = cv2.erode(regions_without_separators.astype(np.uint8), kernel, iterations=6) return regions_without_separators - def put_drop_out_from_only_drop_model(layout_no_patch, layout1): - drop_only = (layout_no_patch[:, :, 0] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) - areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))]) + areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) + for j in range(len(contours_drop_parent))]) areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1]) - - contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] - - areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] + contours_drop_parent = [contours_drop_parent[jz] + for jz in range(len(contours_drop_parent)) + if areas_cnt_text[jz] > 0.00001] + areas_cnt_text = [areas_cnt_text[jz] + for jz in range(len(areas_cnt_text)) + if areas_cnt_text[jz] > 0.00001] contours_drop_parent_final = [] - for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) # boxes.append([int(x), int(y), int(w), int(h)]) map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1])) map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w] - - if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15: + if (100. * + (map_of_drop_contour_bb == 1).sum() / + (map_of_drop_contour_bb == 5).sum()) >= 15: contours_drop_parent_final.append(contours_drop_parent[jj]) layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0 @@ -780,49 +824,53 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) - areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))]) + areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) + for j in range(len(contours_drop_parent))]) areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1]) - - contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] - - areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] + contours_drop_parent = [contours_drop_parent[jz] + for jz in range(len(contours_drop_parent)) + if areas_cnt_text[jz] > 0.00001] + areas_cnt_text = [areas_cnt_text[jz] + for jz in range(len(areas_cnt_text)) + if areas_cnt_text[jz] > 0.00001] contours_drop_parent_final = [] - for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) + box = slice(y, y + h), slice(x, x + w) + box0 = box + (0,) mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])) + mask_of_drop_cpaital_in_early_layout[box] = text_regions_p[box] - mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w] = text_regions_p[y : y + h, x : x + w] - - all_drop_capital_pixels_which_is_text_in_early_lo = np.sum( mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]==1 ) - - mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]=1 - all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1 ) + all_drop_capital_pixels_which_is_text_in_early_lo = np.sum(mask_of_drop_cpaital_in_early_layout[box]==1) + mask_of_drop_cpaital_in_early_layout[box] = 1 + all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1) percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) - - - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.6 and percent_text_to_all_in_drop>=0.3: - - layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label + if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and + percent_text_to_all_in_drop >= 0.3): + layout_in_patch[box0] = drop_capital_label else: - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 0] = drop_capital_label - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 4] = drop_capital_label# images - #layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label + layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = drop_capital_label + layout_in_patch[box0][layout_in_patch[box0] == 0] = drop_capital_label + layout_in_patch[box0][layout_in_patch[box0] == 4] = drop_capital_label# images + #layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch -def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered): - - cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) +def check_any_text_region_in_model_one_is_main_or_header( + regions_model_1, regions_model_full, + contours_only_text_parent, + all_box_coord, all_found_textline_polygons, + slopes, + contours_only_text_parent_d_ordered): + + cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ + find_new_features_of_contours(contours_only_text_parent) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main - - all_found_textline_polygons_main=[] all_found_textline_polygons_head=[] @@ -843,14 +891,10 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) - - all_pixels=((img[:,:,0]==255)*1).sum() - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() pixels_main=all_pixels-pixels_header - if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 contours_only_text_parent_head.append(con) @@ -870,28 +914,44 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions #print(all_pixels,pixels_main,pixels_header) - return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d - + return (regions_model_1, + contours_only_text_parent_main, + contours_only_text_parent_head, + all_box_coord_main, + all_box_coord_head, + all_found_textline_polygons_main, + all_found_textline_polygons_head, + slopes_main, + slopes_head, + contours_only_text_parent_main_d, + contours_only_text_parent_head_d) + +def check_any_text_region_in_model_one_is_main_or_header_light( + regions_model_1, regions_model_full, + contours_only_text_parent, + all_box_coord, all_found_textline_polygons, + slopes, + contours_only_text_parent_d_ordered): -def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered): - ### to make it faster h_o = regions_model_1.shape[0] w_o = regions_model_1.shape[1] - - regions_model_1 = cv2.resize(regions_model_1, (int(regions_model_1.shape[1]/3.), int(regions_model_1.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - regions_model_full = cv2.resize(regions_model_full, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent = [ (i/3.).astype(np.int32) for i in contours_only_text_parent] + zoom = 3 + regions_model_1 = cv2.resize(regions_model_1, (regions_model_1.shape[1] // zoom, + regions_model_1.shape[0] // zoom), + interpolation=cv2.INTER_NEAREST) + regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, + regions_model_full.shape[0] // zoom), + interpolation=cv2.INTER_NEAREST) + contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] ### - - cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) + cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ + find_new_features_of_contours(contours_only_text_parent) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main - - all_found_textline_polygons_main=[] all_found_textline_polygons_head=[] @@ -909,16 +969,13 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r for ii in range(len(contours_only_text_parent)): con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) + img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) - - - all_pixels=((img[:,:,0]==255)*1).sum() - - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() - pixels_main=all_pixels-pixels_header - + all_pixels = (img[:,:,0]==255).sum() + pixels_header=((img[:,:,0]==255) & + (regions_model_full[:,:,0]==2)).sum() + pixels_main = all_pixels - pixels_header if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 @@ -939,22 +996,30 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r #print(all_pixels,pixels_main,pixels_header) - - ### to make it faster - regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST) - #regions_model_full = cv2.resize(img, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent_head = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_head] - contours_only_text_parent_main = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_main] + # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, + # regions_model_full.shape[0] // zoom), + # interpolation=cv2.INTER_NEAREST) + contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head] + contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main] ### - return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d + return (regions_model_1, + contours_only_text_parent_main, + contours_only_text_parent_head, + all_box_coord_main, + all_box_coord_head, + all_found_textline_polygons_main, + all_found_textline_polygons_head, + slopes_main, + slopes_head, + contours_only_text_parent_main_d, + contours_only_text_parent_head_d) def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col): # print(textlines_con) # textlines_con=textlines_con.astype(np.uint32) - textlines_con_changed = [] for m1 in range(len(textlines_con)): @@ -973,9 +1038,10 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) ##plt.imshow(img_text_all) ##plt.show() - areas_cnt_text = np.array([cv2.contourArea(textlines_tot[j]) for j in range(len(textlines_tot))]) + areas_cnt_text = np.array([cv2.contourArea(textlines_tot[j]) + for j in range(len(textlines_tot))]) areas_cnt_text = areas_cnt_text / float(textline_iamge.shape[0] * textline_iamge.shape[1]) - indexes_textlines = np.array(range(len(textlines_tot))) + indexes_textlines = np.arange(len(textlines_tot)) # print(areas_cnt_text,np.min(areas_cnt_text),np.max(areas_cnt_text)) if num_col == 0: @@ -1010,9 +1076,7 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) sum_small_big_all2 = (sum_small_big_all[:, :] == 2) * 1 sum_intersection_sb = sum_small_big_all2.sum(axis=1).sum() - if sum_intersection_sb > 0: - dis_small_from_bigs_tot = [] for z1 in range(len(textlines_small)): # print(len(textlines_small),'small') @@ -1028,27 +1092,22 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) sum_small_big_2 = (sum_small_big[:, :] == 2) * 1 sum_intersection = sum_small_big_2.sum(axis=1).sum() - # print(sum_intersection) - intersections.append(sum_intersection) if len(np.array(intersections)[np.array(intersections) > 0]) == 0: intersections = [] - try: dis_small_from_bigs_tot.append(np.argmax(intersections)) except: dis_small_from_bigs_tot.append(-1) smalls_list = np.array(dis_small_from_bigs_tot)[np.array(dis_small_from_bigs_tot) >= 0] - # index_small_textlines_rest=list( set(indexes_textlines_small)-set(smalls_list) ) textlines_big_with_change = [] textlines_big_with_change_con = [] textlines_small_with_change = [] - for z in list(set(smalls_list)): index_small_textlines = list(np.where(np.array(dis_small_from_bigs_tot) == z)[0]) # print(z,index_small_textlines) @@ -1068,7 +1127,6 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) cont, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(cont[0],type(cont)) - textlines_big_with_change_con.append(cont) textlines_big_org_form[z] = cont[0] @@ -1079,13 +1137,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) # print(textlines_small_with_change,'textlines_small_with_change') # print(textlines_big) textlines_con_changed.append(textlines_big_org_form) - else: textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed def order_of_regions(textline_mask, contours_main, contours_header, y_ref): - ##plt.imshow(textline_mask) ##plt.show() """ @@ -1095,59 +1151,47 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): y_help=np.zeros(len(y)+40) y_help[20:len(y)+20]=y - x=np.array( range(len(y)) ) - + x=np.arange(len(y)) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - ##plt.imshow(textline_mask[:,:]) ##plt.show() - sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) zneg_rev=-y_help+np.max(y_help) - zneg=np.zeros(len(zneg_rev)+40) zneg[20:len(zneg_rev)+20]=zneg_rev zneg= gaussian_filter1d(zneg, sigma_gaus) - peaks, _ = find_peaks(z, height=0) peaks_neg, _ = find_peaks(zneg, height=0) - peaks_neg=peaks_neg-20-20 peaks=peaks-20 """ - textline_sum_along_width = textline_mask.sum(axis=1) y = textline_sum_along_width[:] y_padded = np.zeros(len(y) + 40) y_padded[20 : len(y) + 20] = y - x = np.array(range(len(y))) + x = np.arange(len(y)) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) sigma_gaus = 8 - z = gaussian_filter1d(y_padded, sigma_gaus) zneg_rev = -y_padded + np.max(y_padded) - zneg = np.zeros(len(zneg_rev) + 40) zneg[20 : len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) peaks, _ = find_peaks(z, height=0) peaks_neg, _ = find_peaks(zneg, height=0) - peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 ##plt.plot(z) ##plt.show() - if contours_main != None: areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] @@ -1173,42 +1217,32 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): # print(cy_main,'mainy') peaks_neg_new = [] - peaks_neg_new.append(0 + y_ref) for iii in range(len(peaks_neg)): peaks_neg_new.append(peaks_neg[iii] + y_ref) - peaks_neg_new.append(textline_mask.shape[0] + y_ref) if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new): cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10 - if contours_main != None: - indexer_main = np.array(range(len(contours_main))) - + indexer_main = np.arange(len(contours_main)) if contours_main != None: len_main = len(contours_main) else: len_main = 0 matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5)) - - matrix_of_orders[:, 0] = np.array(range(len(contours_main) + len(contours_header))) - + matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header)) matrix_of_orders[: len(contours_main), 1] = 1 matrix_of_orders[len(contours_main) :, 1] = 2 - matrix_of_orders[: len(contours_main), 2] = cx_main matrix_of_orders[len(contours_main) :, 2] = cx_header - matrix_of_orders[: len(contours_main), 3] = cy_main matrix_of_orders[len(contours_main) :, 3] = cy_header - - matrix_of_orders[: len(contours_main), 4] = np.array(range(len(contours_main))) - matrix_of_orders[len(contours_main) :, 4] = np.array(range(len(contours_header))) + matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) + matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) # print(peaks_neg_new,'peaks_neg_new') - # print(matrix_of_orders,'matrix_of_orders') # print(peaks_neg_new,np.max(peaks_neg_new)) final_indexers_sorted = [] @@ -1217,19 +1251,20 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): for i in range(len(peaks_neg_new) - 1): top = peaks_neg_new[i] down = peaks_neg_new[i + 1] - - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - + indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & + ((matrix_of_orders[:, 3] < down))] + cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & + ((matrix_of_orders[:, 3] < down))] + cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & + ((matrix_of_orders[:, 3] < down))] + types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & + (matrix_of_orders[:, 3] < down)] + index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & + (matrix_of_orders[:, 3] < down)] sorted_inside = np.argsort(cxs_in) - ind_in_int = indexes_in[sorted_inside] ind_in_type = types_of_text[sorted_inside] ind_ind_type = index_types_of_text[sorted_inside] - for j in range(len(ind_in_int)): final_indexers_sorted.append(int(ind_in_int[j])) final_types.append(int(ind_in_type[j])) @@ -1237,20 +1272,22 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] - # This fix is applied if the sum of the lengths of contours and contours_h does not match final_indexers_sorted. However, this is not the optimal solution.. - if (len(cy_main)+len(cy_header) ) == len(final_index_type): + # This fix is applied if the sum of the lengths of contours and contours_h + # does not match final_indexers_sorted. However, this is not the optimal solution.. + if len(cy_main) + len(cy_header) == len(final_index_type): pass else: - indexes_missed = set(list( np.array( range((len(cy_main)+len(cy_header) ) )) )) - set(final_indexers_sorted) + indexes_missed = set(np.arange(len(cy_main) + len(cy_header))) - set(final_indexers_sorted) for ind_missed in indexes_missed: final_indexers_sorted.append(ind_missed) final_types.append(1) final_index_type.append(ind_missed) - - + return final_indexers_sorted, matrix_of_orders, final_types, final_index_type -def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier): +def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( + img_p_in_ver, img_in_hor,num_col_classifier): + #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) img_p_in_ver=img_p_in_ver.astype(np.uint8) img_p_in_ver=np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) @@ -1258,33 +1295,33 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(im ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_lines_ver,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - slope_lines_ver,dist_x_ver, x_min_main_ver ,x_max_main_ver ,cy_main_ver,slope_lines_org_ver,y_min_main_ver, y_max_main_ver, cx_main_ver=find_features_of_lines(contours_lines_ver) - + slope_lines_ver, _, x_min_main_ver, _, _, _, y_min_main_ver, y_max_main_ver, cx_main_ver = \ + find_features_of_lines(contours_lines_ver) for i in range(len(x_min_main_ver)): - img_p_in_ver[int(y_min_main_ver[i]):int(y_min_main_ver[i])+30,int(cx_main_ver[i])-25:int(cx_main_ver[i])+25,0]=0 - img_p_in_ver[int(y_max_main_ver[i])-30:int(y_max_main_ver[i]),int(cx_main_ver[i])-25:int(cx_main_ver[i])+25,0]=0 - - + img_p_in_ver[int(y_min_main_ver[i]): + int(y_min_main_ver[i])+30, + int(cx_main_ver[i])-25: + int(cx_main_ver[i])+25, 0] = 0 + img_p_in_ver[int(y_max_main_ver[i])-30: + int(y_max_main_ver[i]), + int(cx_main_ver[i])-25: + int(cx_main_ver[i])+25, 0] = 0 + img_in_hor=img_in_hor.astype(np.uint8) img_in_hor=np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(img_in_hor, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_hor,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines_hor,dist_x_hor, x_min_main_hor ,x_max_main_hor ,cy_main_hor,slope_lines_org_hor,y_min_main_hor, y_max_main_hor, cx_main_hor=find_features_of_lines(contours_lines_hor) - - + slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \ + find_features_of_lines(contours_lines_hor) x_width_smaller_than_acolumn_width=img_in_hor.shape[1]/float(num_col_classifier+1.) len_lines_bigger_than_x_width_smaller_than_acolumn_width=len( dist_x_hor[dist_x_hor>=x_width_smaller_than_acolumn_width] ) - - len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column=int( len_lines_bigger_than_x_width_smaller_than_acolumn_width/float(num_col_classifier) ) - - - if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column<10: - args_hor=np.array( range(len(slope_lines_hor) )) + len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column=int(len_lines_bigger_than_x_width_smaller_than_acolumn_width / + float(num_col_classifier)) + if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column < 10: + args_hor=np.arange(len(slope_lines_hor)) all_args_uniq=contours_in_same_horizon(cy_main_hor) #print(all_args_uniq,'all_args_uniq') if len(all_args_uniq)>0: @@ -1302,51 +1339,50 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(im #print(img_p_in_ver.shape[1],some_x_max-some_x_min,'xdiff') diff_x_some=some_x_max-some_x_min for jv in range(len(some_args)): - - img_p_in=cv2.fillPoly(img_in_hor, pts =[contours_lines_hor[some_args[jv]]], color=(1,1,1)) - + img_p_in=cv2.fillPoly(img_in_hor, pts=[contours_lines_hor[some_args[jv]]], color=(1,1,1)) if any(i_diff>(img_p_in_ver.shape[1]/float(3.3)) for i_diff in diff_x_some): - img_p_in[int(np.mean(some_cy))-5:int(np.mean(some_cy))+5, int(np.min(some_x_min)):int(np.max(some_x_max)) ]=1 - + img_p_in[int(np.mean(some_cy))-5: + int(np.mean(some_cy))+5, + int(np.min(some_x_min)): + int(np.max(some_x_max)) ]=1 sum_dis=dist_x_hor[some_args].sum() diff_max_min_uniques=np.max(x_max_main_hor[some_args])-np.min(x_min_main_hor[some_args]) - - if diff_max_min_uniques>sum_dis and ( (sum_dis/float(diff_max_min_uniques) ) >0.85 ) and ( (diff_max_min_uniques/float(img_p_in_ver.shape[1]))>0.85 ) and np.std( dist_x_hor[some_args] )<(0.55*np.mean( dist_x_hor[some_args] )): - #print(dist_x_hor[some_args],dist_x_hor[some_args].sum(),np.min(x_min_main_hor[some_args]) ,np.max(x_max_main_hor[some_args]),'jalibdi') - #print(np.mean( dist_x_hor[some_args] ),np.std( dist_x_hor[some_args] ),np.var( dist_x_hor[some_args] ),'jalibdiha') + if (diff_max_min_uniques > sum_dis and + sum_dis / float(diff_max_min_uniques) > 0.85 and + diff_max_min_uniques / float(img_p_in_ver.shape[1]) > 0.85 and + np.std(dist_x_hor[some_args]) < 0.55 * np.mean(dist_x_hor[some_args])): + # print(dist_x_hor[some_args], + # dist_x_hor[some_args].sum(), + # np.min(x_min_main_hor[some_args]), + # np.max(x_max_main_hor[some_args]),'jalibdi') + # print(np.mean( dist_x_hor[some_args] ), + # np.std( dist_x_hor[some_args] ), + # np.var( dist_x_hor[some_args] ),'jalibdiha') special_separators.append(np.mean(cy_main_hor[some_args])) - else: img_p_in=img_in_hor special_separators=[] else: img_p_in=img_in_hor special_separators=[] - img_p_in_ver[:,:,0][img_p_in_ver[:,:,0]==255]=1 sep_ver_hor=img_p_in+img_p_in_ver - - sep_ver_hor_cross=(sep_ver_hor[:,:,0]==2)*1 - sep_ver_hor_cross=np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) sep_ver_hor_cross=sep_ver_hor_cross.astype(np.uint8) imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) - for ii in range(len(cx_cross)): img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0 - else: img_p_in=np.copy(img_in_hor) special_separators=[] - return img_p_in[:,:,0],special_separators + return img_p_in[:,:,0], special_separators def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot = [] @@ -1359,62 +1395,49 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): t_ins_c0 = time.time() separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 - separators_closeup[0:110,:,:]=0 separators_closeup[separators_closeup.shape[0]-150:,:,:]=0 kernel = np.ones((5,5),np.uint8) - separators_closeup=separators_closeup.astype(np.uint8) separators_closeup = cv2.dilate(separators_closeup,kernel,iterations = 1) separators_closeup = cv2.erode(separators_closeup,kernel,iterations = 1) - separators_closeup_new=np.zeros((separators_closeup.shape[0] ,separators_closeup.shape[1] )) separators_closeup_n=np.copy(separators_closeup) - separators_closeup_n=separators_closeup_n.astype(np.uint8) - + separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] - separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) gray_early=gray_early.astype(np.uint8) - imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - slope_linese,dist_xe, x_min_maine ,x_max_maine ,cy_maine,slope_lines_orge,y_min_maine, y_max_maine, cx_maine=find_features_of_lines(contours_line_e) - - dist_ye=y_max_maine-y_min_maine - - - args_e=np.array(range(len(contours_line_e))) - args_hor_e=args_e[(dist_ye<=50) & (dist_xe>=3*dist_ye)] - - + _, dist_xe, _, _, _, _, y_min_main, y_max_main, _ = \ + find_features_of_lines(contours_line_e) + dist_ye = y_max_main - y_min_main + args_e=np.arange(len(contours_line_e)) + args_hor_e=args_e[(dist_ye<=50) & + (dist_xe>=3*dist_ye)] cnts_hor_e=[] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - figs_e=np.zeros(thresh_e.shape) figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary,pts=cnts_hor_e,color=(0,0,0)) - + separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0,0,0)) gray = cv2.bitwise_not(separators_closeup_n_binary) gray=gray.astype(np.uint8) bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \ - cv2.THRESH_BINARY, 15, -2) - + cv2.THRESH_BINARY, 15, -2) horizontal = np.copy(bw) vertical = np.copy(bw) - + cols = horizontal.shape[1] horizontal_size = cols // 30 # Create structure element for extracting horizontal lines through morphology operations @@ -1424,12 +1447,9 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, horizontal = cv2.dilate(horizontal, horizontalStructure) kernel = np.ones((5,5),np.uint8) - - horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2) - - horizontal=cv2.fillPoly(horizontal,pts=cnts_hor_e,color=(255,255,255)) + horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255,255,255)) rows = vertical.shape[0] verticalsize = rows // 30 @@ -1438,10 +1458,11 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, # Apply morphology operations vertical = cv2.erode(vertical, verticalStructure) vertical = cv2.dilate(vertical, verticalStructure) - vertical = cv2.dilate(vertical,kernel,iterations = 1) - horizontal,special_separators=combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(vertical,horizontal,num_col_classifier) + horizontal, special_separators = \ + combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( + vertical, horizontal, num_col_classifier) separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1 @@ -1453,9 +1474,10 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_vers) + slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ + find_features_of_lines(contours_line_vers) - args=np.array( range(len(slope_lines) )) + args=np.arange(len(slope_lines)) args_ver=args[slope_lines==1] dist_x_ver=dist_x[slope_lines==1] y_min_main_ver=y_min_main[slope_lines==1] @@ -1466,19 +1488,17 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, dist_y_ver=y_max_main_ver-y_min_main_ver len_y=separators_closeup.shape[0]/3.0 - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) horizontal=horizontal.astype(np.uint8) imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_line_hors,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_hors) + slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ + find_features_of_lines(contours_line_hors) slope_lines_org_hor=slope_lines_org[slope_lines==0] - args=np.array( range(len(slope_lines) )) + args=np.arange(len(slope_lines)) len_x=separators_closeup.shape[1]/5.0 - dist_y=np.abs(y_max_main-y_min_main) args_hor=args[slope_lines==0] @@ -1497,109 +1517,84 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, y_min_main_hor=y_min_main_hor[dist_x_hor>=len_x/2.0] y_max_main_hor=y_max_main_hor[dist_x_hor>=len_x/2.0] dist_y_hor=dist_y_hor[dist_x_hor>=len_x/2.0] - slope_lines_org_hor=slope_lines_org_hor[dist_x_hor>=len_x/2.0] dist_x_hor=dist_x_hor[dist_x_hor>=len_x/2.0] - matrix_of_lines_ch=np.zeros((len(cy_main_hor)+len(cx_main_ver),10)) - matrix_of_lines_ch[:len(cy_main_hor),0]=args_hor matrix_of_lines_ch[len(cy_main_hor):,0]=args_ver - - matrix_of_lines_ch[len(cy_main_hor):,1]=cx_main_ver - matrix_of_lines_ch[:len(cy_main_hor),2]=x_min_main_hor+50#x_min_main_hor+150 matrix_of_lines_ch[len(cy_main_hor):,2]=x_min_main_ver - matrix_of_lines_ch[:len(cy_main_hor),3]=x_max_main_hor-50#x_max_main_hor-150 matrix_of_lines_ch[len(cy_main_hor):,3]=x_max_main_ver - matrix_of_lines_ch[:len(cy_main_hor),4]=dist_x_hor matrix_of_lines_ch[len(cy_main_hor):,4]=dist_x_ver - matrix_of_lines_ch[:len(cy_main_hor),5]=cy_main_hor - - matrix_of_lines_ch[:len(cy_main_hor),6]=y_min_main_hor matrix_of_lines_ch[len(cy_main_hor):,6]=y_min_main_ver - matrix_of_lines_ch[:len(cy_main_hor),7]=y_max_main_hor matrix_of_lines_ch[len(cy_main_hor):,7]=y_max_main_ver - matrix_of_lines_ch[:len(cy_main_hor),8]=dist_y_hor matrix_of_lines_ch[len(cy_main_hor):,8]=dist_y_ver - - matrix_of_lines_ch[len(cy_main_hor):,9]=1 if contours_h is not None: - slope_lines_head,dist_x_head, x_min_main_head ,x_max_main_head ,cy_main_head,slope_lines_org_head,y_min_main_head, y_max_main_head, cx_main_head=find_features_of_lines(contours_h) + _, dist_x_head, x_min_main_head, x_max_main_head, cy_main_head, _, y_min_main_head, y_max_main_head, _ = \ + find_features_of_lines(contours_h) matrix_l_n=np.zeros((matrix_of_lines_ch.shape[0]+len(cy_main_head),matrix_of_lines_ch.shape[1])) matrix_l_n[:matrix_of_lines_ch.shape[0],:]=np.copy(matrix_of_lines_ch[:,:]) - args_head=np.array(range(len(cy_main_head)))+len(cy_main_hor) + args_head=np.arange(len(cy_main_head)) + len(cy_main_hor) matrix_l_n[matrix_of_lines_ch.shape[0]:,0]=args_head matrix_l_n[matrix_of_lines_ch.shape[0]:,2]=x_min_main_head+30 matrix_l_n[matrix_of_lines_ch.shape[0]:,3]=x_max_main_head-30 - matrix_l_n[matrix_of_lines_ch.shape[0]:,4]=dist_x_head - matrix_l_n[matrix_of_lines_ch.shape[0]:,5]=y_min_main_head-3-8 matrix_l_n[matrix_of_lines_ch.shape[0]:,6]=y_min_main_head-5-8 matrix_l_n[matrix_of_lines_ch.shape[0]:,7]=y_max_main_head#y_min_main_head+1-8 matrix_l_n[matrix_of_lines_ch.shape[0]:,8]=4 - matrix_of_lines_ch=np.copy(matrix_l_n) - - - cy_main_splitters=cy_main_hor[ (x_min_main_hor<=.16*region_pre_p.shape[1]) & (x_max_main_hor>=.84*region_pre_p.shape[1] )] + cy_main_splitters=cy_main_hor[(x_min_main_hor<=.16*region_pre_p.shape[1]) & + (x_max_main_hor>=.84*region_pre_p.shape[1])] cy_main_splitters=np.array( list(cy_main_splitters)+list(special_separators)) - if contours_h is not None: try: - cy_main_splitters_head=cy_main_head[ (x_min_main_head<=.16*region_pre_p.shape[1]) & (x_max_main_head>=.84*region_pre_p.shape[1] )] + cy_main_splitters_head=cy_main_head[(x_min_main_head<=.16*region_pre_p.shape[1]) & + (x_max_main_head>=.84*region_pre_p.shape[1])] cy_main_splitters=np.array( list(cy_main_splitters)+list(cy_main_splitters_head)) except: pass args_cy_splitter=np.argsort(cy_main_splitters) - cy_main_splitters_sort=cy_main_splitters[args_cy_splitter] splitter_y_new=[] splitter_y_new.append(0) for i in range(len(cy_main_splitters_sort)): splitter_y_new.append( cy_main_splitters_sort[i] ) - splitter_y_new.append(region_pre_p.shape[0]) - splitter_y_new_diff=np.diff(splitter_y_new)/float(region_pre_p.shape[0])*100 - args_big_parts=np.array(range(len(splitter_y_new_diff))) [ splitter_y_new_diff>22 ] - + args_big_parts=np.arange(len(splitter_y_new_diff))[ splitter_y_new_diff>22 ] + regions_without_separators=return_regions_without_separators(region_pre_p) - - length_y_threshold=regions_without_separators.shape[0]/4.0 num_col_fin=0 peaks_neg_fin_fin=[] - for itiles in args_big_parts: - regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:,0] - + regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]): + int(splitter_y_new[itiles+1]),:,0] try: - num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) + num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, + num_col_classifier, tables, multiplier=7.0) except: num_col = 0 peaks_neg_fin = [] - if num_col>num_col_fin: num_col_fin=num_col peaks_neg_fin_fin=peaks_neg_fin - if len(args_big_parts)==1 and (len(peaks_neg_fin_fin)+1) splitter_y_new[i] ) & (matrix_of_lines_ch[:,7]< splitter_y_new[i+1] ) ] + matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) & + (matrix_of_lines_ch[:,7]< splitter_y_new[i+1] )] #print(len( matrix_new[:,9][matrix_new[:,9]==1] )) - #print(matrix_new[:,8][matrix_new[:,9]==1],'gaddaaa') - # check to see is there any vertical separator to find holes. - if 1>0:#len( matrix_new[:,9][matrix_new[:,9]==1] )>0 and np.max(matrix_new[:,8][matrix_new[:,9]==1])>=0.1*(np.abs(splitter_y_new[i+1]-splitter_y_new[i] )): - + #if (len(matrix_new[:,9][matrix_new[:,9]==1]) > 0 and + # np.max(matrix_new[:,8][matrix_new[:,9]==1]) >= + # 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))): + if True: try: if erosion_hurts: - num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], num_col_classifier, tables, multiplier=6.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + num_col_classifier, tables, multiplier=6.) else: - num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],num_col_classifier, tables, multiplier=7.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + num_col_classifier, tables, multiplier=7.) except: peaks_neg_fin=[] num_col = 0 - - try: peaks_neg_fin_org=np.copy(peaks_neg_fin) if (len(peaks_neg_fin)+1)=len(peaks_neg_fin2): peaks_neg_fin=list(np.copy(peaks_neg_fin1)) else: peaks_neg_fin=list(np.copy(peaks_neg_fin2)) - - - peaks_neg_fin=list(np.array(peaks_neg_fin)+peaks_neg_fin_early[i_n]) if i_n!=(len(peaks_neg_fin_early)-2): @@ -1682,10 +1686,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho #print(peaks_neg_fin,'peaks_neg_fin') peaks_neg_fin_rev=peaks_neg_fin_rev+peaks_neg_fin - - - - if len(peaks_neg_fin_rev)>=len(peaks_neg_fin_org): peaks_neg_fin=list(np.sort(peaks_neg_fin_rev)) num_col=len(peaks_neg_fin) @@ -1696,7 +1696,9 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho #print(peaks_neg_fin,'peaks_neg_fin') except: pass - #num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.0) + #num_col, peaks_neg_fin = find_num_col( + # regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + # multiplier=7.0) x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ] x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ] cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ] @@ -1706,197 +1708,160 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho if right2left_readingorder: x_max_hor_some_new = regions_without_separators.shape[1] - x_min_hor_some x_min_hor_some_new = regions_without_separators.shape[1] - x_max_hor_some - x_min_hor_some =list(np.copy(x_min_hor_some_new)) x_max_hor_some =list(np.copy(x_max_hor_some_new)) - - - - peaks_neg_tot=return_points_with_boundies(peaks_neg_fin,0, regions_without_separators[:,:].shape[1]) - peaks_neg_tot_tables.append(peaks_neg_tot) - reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff) - - - if (reading_order_type==1) or (reading_order_type==0 and (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1)): - - + reading_order_type, x_starting, x_ending, y_type_2, y_diff_type_2, \ + y_lines_without_mother, x_start_without_mother, x_end_without_mother, there_is_sep_with_child, \ + y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \ + new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order( + x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff) + x_starting = np.array(x_starting) + x_ending = np.array(x_ending) + y_type_2 = np.array(y_type_2) + y_diff_type_2 = np.array(y_diff_type_2) + + if ((reading_order_type==1) or + (reading_order_type==0 and + (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))): try: y_grenze=int(splitter_y_new[i])+300 - - - #check if there is a big separator in this y_mains_sep_ohne_grenzen - args_early_ys=np.array(range(len(y_type_2))) - + args_early_ys=np.arange(len(y_type_2)) #print(args_early_ys,'args_early_ys') #print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) - - y_type_2_up=np.array(y_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_starting_up=np.array(x_starting)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_ending_up=np.array(x_ending)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - y_diff_type_2_up=np.array(y_diff_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - args_up=args_early_ys[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - - - - if len(y_type_2_up)>0: - y_main_separator_up=y_type_2_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] - y_diff_main_separator_up=y_diff_type_2_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] - args_main_to_deleted=args_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] + + x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + if len(y_type_2_up) > 0: + y_main_separator_up = y_type_2_up [(x_starting_up==0) & + (x_ending_up==(len(peaks_neg_tot)-1) )] + y_diff_main_separator_up = y_diff_type_2_up[(x_starting_up==0) & + (x_ending_up==(len(peaks_neg_tot)-1) )] + args_main_to_deleted = args_up[(x_starting_up==0) & + (x_ending_up==(len(peaks_neg_tot)-1) )] #print(y_main_separator_up,y_diff_main_separator_up,args_main_to_deleted,'fffffjammmm') - - if len(y_diff_main_separator_up)>0: - args_to_be_kept=np.array( list( set(args_early_ys)-set(args_main_to_deleted) ) ) + if len(y_diff_main_separator_up) > 0: + args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) )) #print(args_to_be_kept,'args_to_be_kept') - boxes.append([0,peaks_neg_tot[len(peaks_neg_tot)-1],int(splitter_y_new[i]),int( np.max(y_diff_main_separator_up))]) + boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], + int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))]) splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0] #print(splitter_y_new[i],'splitter_y_new[i]') - y_type_2=np.array(y_type_2)[args_to_be_kept] - x_starting=np.array(x_starting)[args_to_be_kept] - x_ending=np.array(x_ending)[args_to_be_kept] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept] + y_type_2 = y_type_2[args_to_be_kept] + x_starting = x_starting[args_to_be_kept] + x_ending = x_ending[args_to_be_kept] + y_diff_type_2 = y_diff_type_2[args_to_be_kept] #print('galdiha') y_grenze=int(splitter_y_new[i])+200 - - - args_early_ys2=np.array(range(len(y_type_2))) - y_type_2_up=np.array(y_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_starting_up=np.array(x_starting)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_ending_up=np.array(x_ending)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - y_diff_type_2_up=np.array(y_diff_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - args_up2=args_early_ys2[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - - + args_early_ys2=np.arange(len(y_type_2)) + y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] #print(y_type_2_up,x_starting_up,x_ending_up,'didid') - - nodes_in=[] + nodes_in = [] for ij in range(len(x_starting_up)): - nodes_in=nodes_in+list(np.array(range(x_starting_up[ij],x_ending_up[ij]))) - - #print(np.unique(nodes_in),'nodes_in') + nodes_in = nodes_in + list(range(x_starting_up[ij], + x_ending_up[ij])) + nodes_in = np.unique(nodes_in) + #print(nodes_in,'nodes_in') - if set(np.unique(nodes_in))==set(np.array(range(len(peaks_neg_tot)-1)) ): + if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): pass - elif set( np.unique(nodes_in) )==set( np.array(range(1,len(peaks_neg_tot)-1)) ): + elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') - args_to_be_kept2=np.array( list( set(args_early_ys2)-set(args_up2) ) ) + args_to_be_kept2=np.array(list( set(args_early_ys2)-set(args_up2) )) if len(args_to_be_kept2)>0: - y_type_2=np.array(y_type_2)[args_to_be_kept2] - x_starting=np.array(x_starting)[args_to_be_kept2] - x_ending=np.array(x_ending)[args_to_be_kept2] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept2] + y_type_2 = y_type_2[args_to_be_kept2] + x_starting = x_starting[args_to_be_kept2] + x_ending = x_ending[args_to_be_kept2] + y_diff_type_2 = y_diff_type_2[args_to_be_kept2] else: pass - #print('burdaydikh2') - - - elif len(y_diff_main_separator_up)==0: - nodes_in=[] + nodes_in = [] for ij in range(len(x_starting_up)): - nodes_in=nodes_in+list(np.array(range(x_starting_up[ij],x_ending_up[ij]))) - - #print(np.unique(nodes_in),'nodes_in2') + nodes_in = nodes_in + list(range(x_starting_up[ij], + x_ending_up[ij])) + nodes_in = np.unique(nodes_in) + #print(nodes_in,'nodes_in2') #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') - - - if set(np.unique(nodes_in))==set(np.array(range(len(peaks_neg_tot)-1)) ): + if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): pass - elif set(np.unique(nodes_in) )==set( np.array(range(1,len(peaks_neg_tot)-1)) ): + elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') #print(args_early_ys,'args_early_ys') #print(args_up,'args_up') - args_to_be_kept2=np.array( list( set(args_early_ys)-set(args_up) ) ) + args_to_be_kept2=np.array(list( set(args_early_ys) - set(args_up) )) #print(args_to_be_kept2,'args_to_be_kept2') - #print(len(y_type_2),len(x_starting),len(x_ending),len(y_diff_type_2)) - if len(args_to_be_kept2)>0: - y_type_2=np.array(y_type_2)[args_to_be_kept2] - x_starting=np.array(x_starting)[args_to_be_kept2] - x_ending=np.array(x_ending)[args_to_be_kept2] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept2] + y_type_2 = y_type_2[args_to_be_kept2] + x_starting = x_starting[args_to_be_kept2] + x_ending = x_ending[args_to_be_kept2] + y_diff_type_2 = y_diff_type_2[args_to_be_kept2] else: pass - #print('burdaydikh2') - - - - - - - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - y_type_2=np.array(y_type_2) - y_diff_type_2_up=np.array(y_diff_type_2_up) #int(splitter_y_new[i]) - y_lines_by_order=[] x_start_by_order=[] x_end_by_order=[] - if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1: - - if reading_order_type==1: y_lines_by_order.append(int(splitter_y_new[i])) x_start_by_order.append(0) x_end_by_order.append(len(peaks_neg_tot)-2) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - - columns_covered_by_mothers=[] - + columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_start_without_mother[dj],x_end_without_mother[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - all_columns=np.array(range(len(peaks_neg_tot)-1)) - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - for lk in range(len(x_start_without_mother)): - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(x_start_without_mother[lk]) - x_ending.append(x_end_without_mother[lk]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - - - - - ind_args=np.array(range(len(y_type_2))) + columns_covered_by_mothers = columns_covered_by_mothers + \ + list(range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) + + all_columns=np.arange(len(peaks_neg_tot)-1) + columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, x_start_without_mother) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, x_end_without_mother) + + ind_args=np.arange(len(y_type_2)) #ind_args=np.array(ind_args) #print(ind_args,'ind_args') for column in range(len(peaks_neg_tot)-1): @@ -1920,159 +1885,115 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - else: - #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - - columns_covered_by_mothers=[] - + columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_start_without_mother[dj],x_end_without_mother[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - all_columns=np.array(range(len(peaks_neg_tot)-1)) + columns_covered_by_mothers = columns_covered_by_mothers + \ + list(range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - for lk in range(len(x_start_without_mother)): - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(x_start_without_mother[lk]) - x_ending.append(x_end_without_mother[lk]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - - columns_covered_by_with_child_no_mothers=[] + all_columns=np.arange(len(peaks_neg_tot)-1) + columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, x_start_without_mother) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, x_end_without_mother) + columns_covered_by_with_child_no_mothers = [] for dj in range(len(x_end_with_child_without_mother)): - columns_covered_by_with_child_no_mothers=columns_covered_by_with_child_no_mothers+list(np.array(range(x_start_with_child_without_mother[dj],x_end_with_child_without_mother[dj])) ) - columns_covered_by_with_child_no_mothers=list(set(columns_covered_by_with_child_no_mothers)) + columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ + list(range(x_start_with_child_without_mother[dj], + x_end_with_child_without_mother[dj])) + columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) - all_columns=np.array(range(len(peaks_neg_tot)-1)) - - columns_not_covered_child_no_mother=list( set(all_columns)-set(columns_covered_by_with_child_no_mothers) ) + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers)) #indexes_to_be_spanned=[] - for i_s in range( len(x_end_with_child_without_mother) ): + for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) - - - - columns_not_covered_child_no_mother=np.sort(columns_not_covered_child_no_mother) - - - - ind_args=np.array(range(len(y_type_2))) - - - + columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother) + ind_args = np.arange(len(y_type_2)) + x_end_with_child_without_mother = np.array(x_end_with_child_without_mother) + x_start_with_child_without_mother = np.array(x_start_with_child_without_mother) for i_s_nc in columns_not_covered_child_no_mother: if i_s_nc in x_start_with_child_without_mother: - x_end_biggest_column=np.array(x_end_with_child_without_mother)[np.array(x_start_with_child_without_mother)==i_s_nc][0] - args_all_biggest_lines=ind_args[(x_starting==i_s_nc) & (x_ending==x_end_biggest_column)] - - args_all_biggest_lines=np.array(args_all_biggest_lines) - y_column_nc=y_type_2[args_all_biggest_lines] - x_start_column_nc=x_starting[args_all_biggest_lines] - x_end_column_nc=x_ending[args_all_biggest_lines] - - y_column_nc=np.sort(y_column_nc) - + x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] + args_all_biggest_lines = ind_args[(x_starting==i_s_nc) & + (x_ending==x_end_biggest_column)] + y_column_nc = y_type_2[args_all_biggest_lines] + x_start_column_nc = x_starting[args_all_biggest_lines] + x_end_column_nc = x_ending[args_all_biggest_lines] + y_column_nc = np.sort(y_column_nc) for i_c in range(len(y_column_nc)): if i_c==(len(y_column_nc)-1): - ind_all_lines_betweeen_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] + ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & + (y_type_2=i_s_nc) & + (x_ending<=x_end_biggest_column)] else: - ind_all_lines_betweeen_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] - - y_all_between_nm_wc=y_type_2[ind_all_lines_betweeen_nm_wc] - x_starting_all_between_nm_wc=x_starting[ind_all_lines_betweeen_nm_wc] - x_ending_all_between_nm_wc=x_ending[ind_all_lines_betweeen_nm_wc] - - x_diff_all_between_nm_wc=x_ending_all_between_nm_wc-x_starting_all_between_nm_wc - - + ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & + (y_type_2=i_s_nc) & + (x_ending<=x_end_biggest_column)] + y_all_between_nm_wc = y_type_2[ind_all_lines_between_nm_wc] + x_starting_all_between_nm_wc = x_starting[ind_all_lines_between_nm_wc] + x_ending_all_between_nm_wc = x_ending[ind_all_lines_between_nm_wc] + + x_diff_all_between_nm_wc = x_ending_all_between_nm_wc - x_starting_all_between_nm_wc if len(x_diff_all_between_nm_wc)>0: biggest=np.argmax(x_diff_all_between_nm_wc) - - columns_covered_by_mothers=[] - + columns_covered_by_mothers = [] for dj in range(len(x_starting_all_between_nm_wc)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_starting_all_between_nm_wc[dj],x_ending_all_between_nm_wc[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - - all_columns=np.array(range(i_s_nc,x_end_biggest_column)) + columns_covered_by_mothers = columns_covered_by_mothers + \ + list(range(x_starting_all_between_nm_wc[dj], + x_ending_all_between_nm_wc[dj])) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) + all_columns=np.arange(i_s_nc, x_end_biggest_column) + columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) should_longest_line_be_extended=0 - if len(x_diff_all_between_nm_wc)>0 and set( list( np.array(range(x_starting_all_between_nm_wc[biggest],x_ending_all_between_nm_wc[biggest])) )+list(columns_not_covered) ) !=set(all_columns): + if (len(x_diff_all_between_nm_wc) > 0 and + set(list(range(x_starting_all_between_nm_wc[biggest], + x_ending_all_between_nm_wc[biggest])) + + list(columns_not_covered)) != set(all_columns)): should_longest_line_be_extended=1 - - index_lines_so_close_to_top_separator=np.array(range(len(y_all_between_nm_wc)))[(y_all_between_nm_wc>y_column_nc[i_c]) & (y_all_between_nm_wc<=(y_column_nc[i_c]+500))] - - - if len(index_lines_so_close_to_top_separator)>0: - indexes_remained_after_deleting_closed_lines= np.array( list ( set( list( np.array(range(len(y_all_between_nm_wc))) ) ) -set(list( index_lines_so_close_to_top_separator) ) ) ) - - if len(indexes_remained_after_deleting_closed_lines)>0: - y_all_between_nm_wc=y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_starting_all_between_nm_wc=x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_ending_all_between_nm_wc=x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - - - y_all_between_nm_wc=list(y_all_between_nm_wc) - x_starting_all_between_nm_wc=list(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=list(x_ending_all_between_nm_wc) - - - y_all_between_nm_wc.append(y_column_nc[i_c] ) - x_starting_all_between_nm_wc.append(i_s_nc) - x_ending_all_between_nm_wc.append(x_end_biggest_column) - - - + index_lines_so_close_to_top_separator = \ + np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & + (y_all_between_nm_wc<=(y_column_nc[i_c]+500))] + if len(index_lines_so_close_to_top_separator) > 0: + indexes_remained_after_deleting_closed_lines= \ + np.array(list(set(list(range(len(y_all_between_nm_wc)))) - + set(list(index_lines_so_close_to_top_separator)))) + if len(indexes_remained_after_deleting_closed_lines) > 0: + y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - y_all_between_nm_wc=list(y_all_between_nm_wc) - x_starting_all_between_nm_wc=list(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=list(x_ending_all_between_nm_wc) + y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_end_biggest_column) - if len(x_diff_all_between_nm_wc)>0: + if len(x_diff_all_between_nm_wc) > 0: try: - x_starting_all_between_nm_wc.append(x_starting_all_between_nm_wc[biggest]) - x_ending_all_between_nm_wc.append(x_ending_all_between_nm_wc[biggest]) - y_all_between_nm_wc.append(y_column_nc[i_c]) + y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest]) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest]) except: pass - - - for c_n_c in columns_not_covered: - y_all_between_nm_wc.append(y_column_nc[i_c]) - x_starting_all_between_nm_wc.append(c_n_c) - x_ending_all_between_nm_wc.append(c_n_c+1) - - y_all_between_nm_wc=np.array(y_all_between_nm_wc) - x_starting_all_between_nm_wc=np.array(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=np.array(x_ending_all_between_nm_wc) + y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered)) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1) - ind_args_between=np.array(range(len(x_ending_all_between_nm_wc))) - - for column in range(i_s_nc,x_end_biggest_column): + ind_args_between=np.arange(len(x_ending_all_between_nm_wc)) + for column in range(i_s_nc, x_end_biggest_column): ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column] #print('babali2') #print(ind_args_in_col,'ind_args_in_col') @@ -2092,14 +2013,7 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - - - - - - else: - #print(column,'column') ind_args_in_col=ind_args[x_starting==i_s_nc] #print('babali2') @@ -2119,15 +2033,11 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - - for il in range(len(y_lines_by_order)): - - - y_copy=list( np.copy(y_lines_by_order) ) - x_start_copy=list( np.copy(x_start_by_order) ) - x_end_copy=list ( np.copy(x_end_by_order) ) + y_copy = list(y_lines_by_order) + x_start_copy = list(x_start_by_order) + x_end_copy = list(x_end_by_order) #print(y_copy,'y_copy') y_itself=y_copy.pop(il) @@ -2135,13 +2045,14 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho x_end_itself=x_end_copy.pop(il) #print(y_copy,'y_copy2') - - for column in range(x_start_itself,x_end_itself+1): + for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] for yic in range(len(y_copy)): #print('burda') - if y_copy[yic]>y_itself and column>=x_start_copy[yic] and column<=x_end_copy[yic]: + if (y_copy[yic]>y_itself and + column>=x_start_copy[yic] and + column<=x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') @@ -2150,81 +2061,48 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho else: y_down=[int(splitter_y_new[i+1])][0] #print(y_itself,'y_itself') - boxes.append([peaks_neg_tot[column],peaks_neg_tot[column+1],y_itself,y_down]) + boxes.append([peaks_neg_tot[column], + peaks_neg_tot[column+1], + y_itself, + y_down]) except: - boxes.append([0,peaks_neg_tot[len(peaks_neg_tot)-1],int(splitter_y_new[i]),int(splitter_y_new[i+1])]) - - - + boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], + int(splitter_y_new[i]), int(splitter_y_new[i+1])]) else: y_lines_by_order=[] x_start_by_order=[] x_end_by_order=[] if len(x_starting)>0: - all_columns = np.array(range(len(peaks_neg_tot)-1)) - columns_covered_by_lines_covered_more_than_2col=[] - + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_covered_by_lines_covered_more_than_2col = [] for dj in range(len(x_starting)): - if set( list(np.array(range(x_starting[dj],x_ending[dj])) ) ) == set(all_columns): + if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns): pass else: - columns_covered_by_lines_covered_more_than_2col=columns_covered_by_lines_covered_more_than_2col+list(np.array(range(x_starting[dj],x_ending[dj])) ) - columns_covered_by_lines_covered_more_than_2col=list(set(columns_covered_by_lines_covered_more_than_2col)) - - - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_lines_covered_more_than_2col) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - - #y_type_2.append(int(splitter_y_new[i])) - #x_starting.append(x_starting[0]) - #x_ending.append(x_ending[0]) - - if len(new_main_sep_y)>0: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(0) - x_ending.append(len(peaks_neg_tot)-1) + columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ + list(range(x_starting[dj],x_ending[dj])) + columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col)) + columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col)) + + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + if len(new_main_sep_y) > 0: + x_starting = np.append(x_starting, 0) + x_ending = np.append(x_ending, len(peaks_neg_tot)-1) else: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(x_starting[0]) - x_ending.append(x_ending[0]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) + x_starting = np.append(x_starting, x_starting[0]) + x_ending = np.append(x_ending, x_ending[0]) else: - all_columns=np.array(range(len(peaks_neg_tot)-1)) - columns_not_covered=list( set(all_columns) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_not_covered = list(set(all_columns)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) ind_args=np.array(range(len(y_type_2))) #ind_args=np.array(ind_args) @@ -2248,13 +2126,10 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - for il in range(len(y_lines_by_order)): - - - y_copy=list( np.copy(y_lines_by_order) ) - x_start_copy=list( np.copy(x_start_by_order) ) - x_end_copy=list ( np.copy(x_end_by_order) ) + y_copy = list(y_lines_by_order) + x_start_copy = list(x_start_by_order) + x_end_copy = list(x_end_by_order) #print(y_copy,'y_copy') y_itself=y_copy.pop(il) @@ -2262,13 +2137,14 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho x_end_itself=x_end_copy.pop(il) #print(y_copy,'y_copy2') - - for column in range(x_start_itself,x_end_itself+1): + for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] for yic in range(len(y_copy)): #print('burda') - if y_copy[yic]>y_itself and column>=x_start_copy[yic] and column<=x_end_copy[yic]: + if (y_copy[yic]>y_itself and + column>=x_start_copy[yic] and + column<=x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') @@ -2277,10 +2153,10 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho else: y_down=[int(splitter_y_new[i+1])][0] #print(y_itself,'y_itself') - boxes.append([peaks_neg_tot[column],peaks_neg_tot[column+1],y_itself,y_down]) - - - + boxes.append([peaks_neg_tot[column], + peaks_neg_tot[column+1], + y_itself, + y_down]) #else: #boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]]) @@ -2291,7 +2167,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho peaks_neg_tot_tables_ind = regions_without_separators.shape[1] - np.array(peaks_tab_ind) peaks_neg_tot_tables_ind = list(peaks_neg_tot_tables_ind[::-1]) peaks_neg_tot_tables_new.append(peaks_neg_tot_tables_ind) - for i in range(len(boxes)): x_start_new = regions_without_separators.shape[1] - boxes[i][1] diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index e47c5e7..be00db0 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -27,35 +27,33 @@ def find_contours_mean_y_diff(contours_main): cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] return np.mean(np.diff(np.sort(np.array(cy_main)))) - def get_text_region_boxes_by_given_contours(contours): - kernel = np.ones((5, 5), np.uint8) boxes = [] contours_new = [] for jj in range(len(contours)): - x, y, w, h = cv2.boundingRect(contours[jj]) - - boxes.append([x, y, w, h]) + box = cv2.boundingRect(contours[jj]) + boxes.append(box) contours_new.append(contours[jj]) return boxes, contours_new def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): - found_polygons_early = list() - + found_polygons_early = [] for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue polygon = geometry.Polygon([point[0] for point in c]) area = polygon.area - if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1: # and hierarchy[0][jv][3]==-1 : - found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) + if (area >= min_area * np.prod(image.shape[:2]) and + area <= max_area * np.prod(image.shape[:2]) and + hierarchy[0][jv][3] == -1): + found_polygons_early.append(np.array([[point] + for point in polygon.exterior.coords], dtype=np.uint)) return found_polygons_early def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): - found_polygons_early = list() - + found_polygons_early = [] for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -66,48 +64,59 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area # print(hierarchy[0][jv][3],hierarchy ) - if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : + if (area >= min_area * np.prod(image.shape[:2]) and + area <= max_area * np.prod(image.shape[:2]) and + # hierarchy[0][jv][3]==-1 + True): # print(c[0][0][1]) - found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) + found_polygons_early.append(np.array([[point] + for point in polygon.exterior.coords], dtype=np.int32)) return found_polygons_early def find_new_features_of_contours(contours_main): - - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + areas_main = np.array([cv2.contourArea(contours_main[j]) + for j in range(len(contours_main))]) + M_main = [cv2.moments(contours_main[j]) + for j in range(len(contours_main))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) + for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) + for j in range(len(M_main))] try: - x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] for j in range(len(contours_main))]) - - x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - - y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) + x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) + for j in range(len(contours_main))]) + argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) + for j in range(len(contours_main))]) + x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] + for j in range(len(contours_main))]) + y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] + for j in range(len(contours_main))]) + x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) + for j in range(len(contours_main))]) + y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) + for j in range(len(contours_main))]) + y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) + for j in range(len(contours_main))]) except: - x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))]) - - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))]) - - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))]) - - x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))]) - - y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))]) - + x_min_main = np.array([np.min(contours_main[j][:, 0]) + for j in range(len(contours_main))]) + argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) + for j in range(len(contours_main))]) + x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] + for j in range(len(contours_main))]) + y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] + for j in range(len(contours_main))]) + x_max_main = np.array([np.max(contours_main[j][:, 0]) + for j in range(len(contours_main))]) + y_min_main = np.array([np.min(contours_main[j][:, 1]) + for j in range(len(contours_main))]) + y_max_main = np.array([np.max(contours_main[j][:, 1]) + for j in range(len(contours_main))]) # dis_x=np.abs(x_max_main-x_min_main) return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin -def find_features_of_contours(contours_main): - +def find_features_of_contours(contours_main): areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] @@ -118,14 +127,15 @@ def find_features_of_contours(contours_main): y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) - return y_min_main, y_max_main + def return_parent_contours(contours, hierarchy): - contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1] + contours_parent = [contours[i] + for i in range(len(contours)) + if hierarchy[0][i][3] == -1] return contours_parent def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -137,10 +147,9 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=min_area) - + contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, + max_area=1, min_area=min_area) return contours_imgs def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): @@ -148,7 +157,6 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) img_copy = rotation_image_new(img_copy, -slope_first) - img_copy = img_copy.astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -158,7 +166,6 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - return cont_int[0], index_r_con def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): @@ -172,7 +179,6 @@ def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): return tuple(zip(*results)) def get_textregion_contours_in_org_image(cnts, img, slope_first): - cnts_org = [] # print(cnts,'cnts') for i in range(len(cnts)): @@ -193,7 +199,6 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): ret, thresh = cv2.threshold(imgray, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) # print(np.shape(cont_int[0])) @@ -202,32 +207,23 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): return cnts_org def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): - - h_o = img.shape[0] - w_o = img.shape[1] - - img = cv2.resize(img, (int(img.shape[1]/3.), int(img.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) - #cnts = cnts/2 - cnts = [(i/ 3).astype(np.int32) for i in cnts] + zoom = 3 + img = cv2.resize(img, (img.shape[1] // zoom, + img.shape[0] // zoom), + interpolation=cv2.INTER_NEAREST) cnts_org = [] - #print(cnts,'cnts') - for i in range(len(cnts)): + for cnt in cnts: img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) - - img_copy = rotation_image_new(img_copy, -slope_first) + img_copy = cv2.fillPoly(img_copy, pts=[(cnt / zoom).astype(int)], color=(1, 1, 1)) - img_copy = img_copy.astype(np.uint8) + img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - # print(np.shape(cont_int[0])) - cnts_org.append(cont_int[0]*3) + cnts_org.append(cont_int[0] * zoom) return cnts_org @@ -235,14 +231,11 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) - img_copy = rotation_image_new(img_copy, -slope_first) - - img_copy = img_copy.astype(np.uint8) + img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) # print(np.shape(cont_int[0])) @@ -264,7 +257,6 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map): return [i*6 for i in contours] def return_contours_of_interested_textline(region_pre_p, pixel): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -277,11 +269,11 @@ def return_contours_of_interested_textline(region_pre_p, pixel): contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003) + contours_imgs = filter_contours_area_of_image_tables( + thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003) return contours_imgs def return_contours_of_image(image): - if len(image.shape) == 2: image = np.repeat(image[:, :, np.newaxis], 3, axis=2) image = image.astype(np.uint8) @@ -293,7 +285,6 @@ def return_contours_of_image(image): return contours, hierarchy def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -305,14 +296,13 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size) + contours_imgs = filter_contours_area_of_image_tables( + thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size) return contours_imgs def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -325,9 +315,11 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) + contours_imgs = filter_contours_area_of_image_tables( + thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3)) img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1)) + return img_ret[:, :, 0] diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index f037a9f..7e77afe 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -41,9 +41,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - textline_patch_sum_along_width = img_patch.sum(axis=axis) - first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] @@ -52,11 +50,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if 1 > 0: - try: - y_padded_smoothed_e = gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e = -y_padded + np.max(y_padded) y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40) @@ -67,7 +62,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -78,12 +73,11 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -93,7 +87,6 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -106,9 +99,10 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks_new_tot = peaks_e[:] textline_con, hierarchy = return_contours_of_image(img_patch) - textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) # print(sigma_gaus,'sigma_gaus') except: @@ -126,10 +120,18 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks, _ = find_peaks(y_padded_smoothed, height=0) peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) - return x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix + return (x, y, + x_d, y_d, + xv, + x_min_cont, y_min_cont, + x_max_cont, y_max_cont, + first_nonzero, + y_padded_up_to_down_padded, + y_padded_smoothed, + peaks, peaks_neg, + rotation_matrix) def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): - (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) @@ -151,9 +153,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - textline_patch_sum_along_width = img_patch.sum(axis=1) - first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] @@ -162,11 +162,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if 1>0: - try: - y_padded_smoothed_e= gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e=-y_padded+np.max(y_padded) y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40) @@ -178,27 +175,22 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted= np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3 ] + arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) - - arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] - peaks_new=peaks_e[:] peaks_neg_new=peaks_neg_e[:] clusters_to_be_deleted=[] if len(arg_diff_cluster)>0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) for i in range(len(arg_diff_cluster)-1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: + arg_diff_cluster[i+1]+1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) - - if len(clusters_to_be_deleted)>0: peaks_new_extra=[] for m in range(len(clusters_to_be_deleted)): @@ -208,7 +200,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]] peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]] peaks_new_tot=[] for i1 in peaks_new: @@ -216,16 +207,14 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): for i1 in peaks_new_extra: peaks_new_tot.append(i1) peaks_new_tot=np.sort(peaks_new_tot) - - else: peaks_new_tot=peaks_e[:] - textline_con,hierarchy=return_contours_of_image(img_patch) - textline_con_fil=filter_contours_area_of_image(img_patch,textline_con,hierarchy,max_area=1,min_area=0.0008) + textline_con_fil=filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus=int( y_diff_mean * (7./40.0) ) #print(sigma_gaus,'sigma_gaus') except: @@ -234,60 +223,41 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): sigma_gaus=3 #print(sigma_gaus,'sigma') - y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus) y_padded_up_to_down=-y_padded+np.max(y_padded) y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40) y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) - peaks, _ = find_peaks(y_padded_smoothed, height=0) peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) - - - - try: neg_peaks_max=np.max(y_padded_smoothed[peaks]) - - - arg_neg_must_be_deleted= np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42 ] - - + arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) - - arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] except: arg_neg_must_be_deleted=[] arg_diff_cluster=[] - - try: peaks_new=peaks[:] peaks_neg_new=peaks_neg[:] clusters_to_be_deleted=[] - if len(arg_diff_cluster)>=2 and len(arg_diff_cluster)>0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) for i in range(len(arg_diff_cluster)-1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: + arg_diff_cluster[i+1]+1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) elif len(arg_neg_must_be_deleted)>=2 and len(arg_diff_cluster)==0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - - if len(arg_neg_must_be_deleted)==1: clusters_to_be_deleted.append(arg_neg_must_be_deleted) - - if len(clusters_to_be_deleted)>0: peaks_new_extra=[] for m in range(len(clusters_to_be_deleted)): @@ -297,7 +267,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]-1]] peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]]] - peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg[clusters_to_be_deleted[m][m1]]] peaks_new_tot=[] for i1 in peaks_new: @@ -321,36 +290,27 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): ##plt.plot(y_padded_smoothed) ##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*') ##plt.show() - peaks=peaks_new_tot[:] peaks_neg=peaks_neg_new[:] - - else: peaks_new_tot=peaks[:] peaks=peaks_new_tot[:] peaks_neg=peaks_neg_new[:] except: pass - mean_value_of_peaks=np.mean(y_padded_smoothed[peaks]) std_value_of_peaks=np.std(y_padded_smoothed[peaks]) peaks_values=y_padded_smoothed[peaks] - peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 - for jj in range(len(peaks_neg)): if peaks_neg[jj] > len(x) - 1: peaks_neg[jj] = len(x) - 1 - for jj in range(len(peaks)): if peaks[jj] > len(x) - 1: peaks[jj] = len(x) - 1 - - textline_boxes = [] textline_boxes_rot = [] @@ -386,7 +346,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) - if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 @@ -423,8 +382,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): if point_up_rot2<0: point_up_rot2=0 - - x_min_rot1=x_min_rot1-x_help x_max_rot2=x_max_rot2-x_help x_max_rot3=x_max_rot3-x_help @@ -435,29 +392,24 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - elif len(peaks) < 1: pass elif len(peaks) == 1: - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[0] + first_nonzero])), True) - for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[0] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -480,7 +432,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - if x_min_rot1<0: x_min_rot1=0 if x_min_rot4<0: @@ -489,7 +440,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot1=0 if point_up_rot2<0: point_up_rot2=0 - x_min_rot1=x_min_rot1-x_help x_max_rot2=x_max_rot2-x_help @@ -500,22 +450,15 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot2=point_up_rot2-y_help point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(y_min)], [int(x_max), int(y_min)], [int(x_max), int(y_max)], [int(x_min), int(y_max)]])) - - - elif len(peaks) == 2: dis_to_next = np.abs(peaks[1] - peaks[0]) for jj in range(len(peaks)): @@ -533,12 +476,12 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): except: point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) - for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -556,8 +499,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - - if x_min_rot1<0: x_min_rot1=0 if x_min_rot4<0: @@ -577,21 +518,16 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) else: for jj in range(len(peaks)): - if jj == 0: dis_to_next = peaks[jj + 1] - peaks[jj] # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) @@ -615,12 +551,12 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next_up) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) - for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -646,7 +582,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot1=0 if point_up_rot2<0: point_up_rot2=0 - x_min_rot1=x_min_rot1-x_help x_max_rot2=x_max_rot2-x_help @@ -657,29 +592,24 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot2=point_up_rot2-y_help point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - return peaks, textline_boxes_rot def separate_lines_vertical(img_patch, contour_text_interest, thetha): - thetha = thetha + 90 contour_text_interest_copy = contour_text_interest.copy() - x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 0) - + x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, \ + first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, \ + peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 0) # plt.plot(y_padded_up_to_down_padded) # plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*') @@ -693,8 +623,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] - + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -705,17 +634,15 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): clusters_to_be_deleted = [] if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - if len(arg_neg_must_be_deleted) == 1: clusters_to_be_deleted.append(arg_neg_must_be_deleted) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -725,7 +652,6 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -796,7 +722,6 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -823,13 +748,16 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) elif len(peaks) < 1: pass - elif len(peaks) == 1: x_min = x_min_cont x_max = x_max_cont @@ -856,10 +784,14 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(y_min)], [int(x_max), int(y_min)], [int(x_max), int(y_max)], [int(x_min), int(y_max)]])) - + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(y_min)], + [int(x_max), int(y_min)], + [int(x_max), int(y_max)], + [int(x_min), int(y_max)]])) elif len(peaks) == 2: dis_to_next = np.abs(peaks[1] - peaks[0]) for jj in range(len(peaks)): @@ -874,11 +806,12 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_down = img_patch.shape[0] - 2 point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -905,12 +838,16 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) else: for jj in range(len(peaks)): - if jj == 0: dis_to_next = peaks[jj + 1] - peaks[jj] # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) @@ -934,11 +871,12 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_up = peaks[jj] + first_nonzero - int(1.0 / 1.9 * dis_to_next_up) point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -965,14 +903,17 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) return peaks, textline_boxes_rot def separate_lines_new_inside_tiles2(img_patch, thetha): - (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) @@ -994,9 +935,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - textline_patch_sum_along_width = img_patch.sum(axis=1) - first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] @@ -1006,9 +945,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) if 1 > 0: - try: - y_padded_smoothed_e = gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e = -y_padded + np.max(y_padded) y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40) @@ -1019,7 +956,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -1030,12 +967,10 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1045,7 +980,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -1053,12 +987,13 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): for i1 in peaks_new_extra: peaks_new_tot.append(i1) peaks_new_tot = np.sort(peaks_new_tot) - else: peaks_new_tot = peaks_e[:] textline_con, hierarchy = return_contours_of_image(img_patch) - textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) @@ -1084,27 +1019,23 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): try: neg_peaks_max = np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] - + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1] clusters_to_be_deleted = [] - if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - if len(arg_neg_must_be_deleted) == 1: clusters_to_be_deleted.append(arg_neg_must_be_deleted) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1114,7 +1045,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -1138,7 +1068,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): # plt.plot(y_padded_smoothed) # plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*') # plt.show() - peaks = peaks_new_tot[:] peaks_neg = peaks_neg_new[:] except: @@ -1166,7 +1095,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): # print(peaks_neg_true) for i in range(len(peaks_neg_true)): img_patch[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0 - else: pass @@ -1346,14 +1274,14 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) - + contours_imgs = filter_contours_area_of_image_tables(thresh, + contours_imgs, hierarchy, + max_area=max_area, min_area=min_area) cont_final = [] ###print(add_boxes_coor_into_textlines,'ikki') for i in range(len(contours_imgs)): img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3)) img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255)) - img_contour = img_contour.astype(np.uint8) img_contour = cv2.dilate(img_contour, kernel, iterations=4) @@ -1373,9 +1301,7 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i ##print(cont_final,'nadizzzz') return None, cont_final - def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): - textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = textline_mask.astype(np.uint8) kernel = np.ones((5, 5), np.uint8) @@ -1400,8 +1326,10 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest x_help = 30 y_help = 2 - textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), textline_mask.shape[1] + int(2 * x_help), 3)) - textline_mask_help[y_help : y_help + textline_mask.shape[0], x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) + textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), + textline_mask.shape[1] + int(2 * x_help), 3)) + textline_mask_help[y_help : y_help + textline_mask.shape[0], + x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) dst = rotate_image(textline_mask_help, slope) dst = dst[:, :, 0] @@ -1412,7 +1340,6 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest # plt.show() contour_text_copy = contour_text_interest.copy() - contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0] contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] @@ -1423,12 +1350,12 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest # plt.imshow(img_contour) # plt.show() - img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), img_contour.shape[1] + int(2 * x_help), 3)) - - img_contour_help[y_help : y_help + img_contour.shape[0], x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) + img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), + img_contour.shape[1] + int(2 * x_help), 3)) + img_contour_help[y_help : y_help + img_contour.shape[0], + x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) img_contour_rot = rotate_image(img_contour_help, slope) - # plt.imshow(img_contour_rot_help) # plt.show() @@ -1454,12 +1381,13 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest # print('juzaa') if abs(slope) > 45: # print(add_boxes_coor_into_textlines,'avval') - _, contours_rotated_clean = separate_lines_vertical_cont(textline_mask, contours_text_rot[ind_big_con], box_ind, slope, add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) + _, contours_rotated_clean = separate_lines_vertical_cont( + textline_mask, contours_text_rot[ind_big_con], box_ind, slope, + add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) else: - _, contours_rotated_clean = separate_lines(dst, contours_text_rot[ind_big_con], slope, x_help, y_help) - + _, contours_rotated_clean = separate_lines( + dst, contours_text_rot[ind_big_con], slope, x_help, y_help) except: - contours_rotated_clean = [] return contours_rotated_clean @@ -1487,11 +1415,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # print(margin,'margin') # if margin<=4: # margin = int(0.08 * length_x) - # margin=0 width_mid = length_x - 2 * margin - nxf = img_path.shape[1] / float(width_mid) if nxf > int(nxf): @@ -1553,8 +1479,8 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3)))) - - img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] = img_int[:, :] + img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], + int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] = img_int[:, :] # plt.imshow(img_xline) # plt.show() img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i]) @@ -1565,7 +1491,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_patch_separated_returned = rotate_image(img_patch_separated, -slopes_tile_wise[i]) img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1 - img_patch_separated_returned_true_size = img_patch_separated_returned[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] + img_patch_separated_returned_true_size = img_patch_separated_returned[ + int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], + int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size @@ -1594,27 +1522,19 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] - - max_shape=np.max(img_int.shape) img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) )) - onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) - #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) - - - #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] #print(img_resized.shape,'img_resizedshape') #plt.imshow(img_resized) #plt.show() - if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: #plt.imshow(img_resized) #plt.show() @@ -1623,7 +1543,6 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - elif main_page: #plt.imshow(img_resized) #plt.show() @@ -1637,7 +1556,6 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, else: angles = np.linspace(90, 12, n_tot_angles) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - else: angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) @@ -1695,7 +1613,9 @@ def do_work_of_slopes_new( else: try: textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, + hierarchy, + max_area=1, min_area=0.00008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if np.isnan(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1733,7 +1653,6 @@ def do_work_of_slopes_new( return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope - def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, @@ -1759,7 +1678,9 @@ def do_work_of_slopes_new_curved( else: try: textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, + hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if np.isnan(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1788,7 +1709,8 @@ def do_work_of_slopes_new_curved( textline_biggest_region = mask_biggest * textline_mask_tot_ea # print(slope_for_all,'slope_for_all') - textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all, + textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, + num_col, slope_for_all, logger=logger, plotter=plotter) # new line added From 25116a2c79440ea16d8a5e28c6f1b7f08da5c6b6 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 19 Feb 2025 00:35:48 +0100 Subject: [PATCH 35/36] resolved 2 errors --- src/eynollah/eynollah.py | 6 ++++-- src/eynollah/utils/__init__.py | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 25d5ec4..9158168 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4074,8 +4074,10 @@ class Eynollah: ind_textline_inside_tr = list(range(len(contours[jj]))) index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr - ind_ins = [0] * len(contours[jj]) + jj - indexes_of_textline_tot = indexes_of_textline_tot + ind_ins + #ind_ins = [0] * len(contours[jj]) + jj + ind_ins = np.zeros( len(contours[jj]) ) + jj + list_ind_ins = list(ind_ins) + indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index feab341..a67fc38 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -237,8 +237,11 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( if len(remained_sep_indexes)>1: #print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)') #print(np.array(mother),'mother') - remained_sep_indexes_without_mother = remained_sep_indexes[mother==0] - remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1] + ##remained_sep_indexes_without_mother = remained_sep_indexes[mother==0] + ##remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1] + remained_sep_indexes_without_mother=np.array(list(remained_sep_indexes))[np.array(mother)==0] + remained_sep_indexes_with_child_without_mother=np.array(list(remained_sep_indexes))[(np.array(mother)==0) & (np.array(child)==1)] + #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') x_end_with_child_without_mother = x_end[remained_sep_indexes_with_child_without_mother] From 7110bd971f719bd3d86457bd3a1b6375ca952921 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 27 Feb 2025 19:11:15 +0100 Subject: [PATCH 36/36] resolved an error for light version in the case that slope_deskew is smaller than slope_threshold --- src/eynollah/eynollah.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 9158168..6802e47 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4120,7 +4120,7 @@ class Eynollah: def filter_contours_without_textline_inside( self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): - + ###contours_txtline_of_all_textregions = [] ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] @@ -4156,7 +4156,8 @@ class Eynollah: contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) text_con_org.pop(ind_u_a_trs) - contours_only_text_parent_d_ordered.pop(ind_u_a_trs) + if len(contours_only_text_parent_d_ordered) > 0: + contours_only_text_parent_d_ordered.pop(ind_u_a_trs) return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) @@ -4518,7 +4519,6 @@ class Eynollah: ###min_con_area = 0.000005 contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - if len(contours_only_text_parent) > 0: areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) @@ -4619,8 +4619,7 @@ class Eynollah: else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] - contours_only_text_parent = [] - + #contours_only_text_parent = [] if not len(contours_only_text_parent): # stop early empty_marginals = [[]] * len(polygons_of_marginals) @@ -4690,8 +4689,7 @@ class Eynollah: all_found_textline_polygons_marginals) contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( - contours_only_text_parent, txt_con_org, all_found_textline_polygons, - contours_only_text_parent_d_ordered) + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \