From 5fdc6d4fa48d25309d1a774b04e79debbf797e75 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 14 Oct 2023 09:05:05 +0200 Subject: [PATCH 001/107] integration of machine based reading order detection --- qurator/eynollah/eynollah.py | 222 +++++++++++++++++++++++++++--- qurator/eynollah/utils/contour.py | 11 +- 2 files changed, 209 insertions(+), 24 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 4b1b5e9..b83db98 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -78,6 +78,7 @@ from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter +MIN_AREA_REGION = 0.0005 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -225,6 +226,7 @@ class Eynollah: self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" + self.model_reading_order_machine_dir = dir_models + "/model_6_reading_order_machine_based" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -246,6 +248,7 @@ class Eynollah: self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) self.ls_imgs = os.listdir(self.dir_in) @@ -264,6 +267,7 @@ class Eynollah: self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) + self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) self.ls_imgs = os.listdir(self.dir_in) @@ -1647,9 +1651,39 @@ class Eynollah: mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + + #for jv in range(1): + #print(jv, hir_lines_xml[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + + + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + #sys.exit() + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -1785,7 +1819,7 @@ class Eynollah: polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -1853,7 +1887,7 @@ class Eynollah: mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) @@ -2821,13 +2855,157 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model + + def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + + #print(text_regions_p.shape) + y_len = text_regions_p.shape[0] + x_len = text_regions_p.shape[1] + + img_poly = np.zeros((y_len,x_len), dtype='uint8') + + unique_pix = np.unique(text_regions_p) + #print(unique_pix, 'unique_pix') + + #for pix in unique_pix: + #print(pix) + #plt.imshow((text_regions_p[:,:]==pix)*1 ) + #plt.show() + + img_poly[text_regions_p[:,:]==1] = 1 + img_poly[text_regions_p[:,:]==2] = 2 + img_poly[text_regions_p[:,:]==3] = 4 + img_poly[text_regions_p[:,:]==6] = 5 + + #plt.imshow(text_regions_p) + #plt.show() + + + #plt.imshow(img_poly) + #plt.show() + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + + height1 =672#448 + width1 = 448#224 + + height2 =672#448 + width2= 448#224 + + height3 =672#448 + width3 = 448#224 + + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + + img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') + + for j in range(len(cy_main)): + #print(j, int(y_max_main[j]), x_min_main[j], x_max_main[j] ) + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + #plt.imshow(img_header_and_sep[:,:]) + #plt.show() + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + #id_all_text = id_paragraph + id_header + + #texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] + #texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] + + #co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area) + + labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + for i in range(len(co_text_all)): + img_label = np.zeros((y_len,x_len,3),dtype='uint8') + img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) + labels_con[:,:,i] = img_label[:,:,0] + + + img3= np.copy(img_poly) + + labels_con = resize_image(labels_con, height1, width1) + + img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + + img3= resize_image (img3, height3, width3) + + img3 = img3.astype(np.uint16) + + + #plt.imshow(img3) + #plt.show() + + order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 + + for i in range(labels_con.shape[2]): + for j in range(labels_con.shape[2]): + if j>i: + img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) + img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) + #img1 = img1.astype(np.uint16) + #img2 = img2.astype(np.uint16) + + img2[:,:,0][img3[:,:]==5] = 2 + img2[:,:,0][img_header_and_sep[:,:]==1] = 3 + + + + img1[:,:,0][img3[:,:]==5] = 2 + img1[:,:,0][img_header_and_sep[:,:]==1] = 3 + + + #plt.imshow(labels_con[:,:,i]) + #plt.show() + + #plt.imshow(img2[:,:,0]) + #plt.show() + + + #plt.imshow(img1[:,:,0]) + #plt.show() + + #sys.exit() + input_1= np.zeros( (height1, width1,3)) + + input_1[:,:,0] = img1[:,:,0]/3. + input_1[:,:,2] = img2[:,:,0]/3. + input_1[:,:,1] = img3[:,:]/5. + + #y_pr=model.predict([img1.reshape(1,height1,width1,3) , img2.reshape(1,height2,width2,3),img3.reshape(1,height3,width3,3) ], verbose=2) + y_pr=model_ro_machine.predict(input_1.reshape(1,height1,width1,3) , verbose=0) + #print(y_pr) + + if y_pr>=0.5: + order_class = 1 + else: + order_class = 0 + + order_matrix[i,j] = y_pr#order_class + order_matrix[j,i] = 1-y_pr#int( 1 - order_class) + + + sum_mat = np.sum(order_matrix, axis=1) + index_sort = np.argsort(sum_mat) + index_sort = index_sort[::-1] + + print(index_sort) + REGION_ID_TEMPLATE = 'region_%04d' + order_of_texts = [] + id_of_texts = [] + for order, id_text in enumerate(index_sort): + order_of_texts.append(id_text) + id_of_texts.append( REGION_ID_TEMPLATE % order ) + + + return order_of_texts, id_of_texts def run(self): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - + + self.reading_order_machine_based = True#True t0_tot = time.time() @@ -2896,7 +3074,7 @@ class Eynollah: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - min_con_area = 0.000005 + ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) @@ -2906,8 +3084,8 @@ class Eynollah: areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) #self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -2983,8 +3161,8 @@ class Eynollah: areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) @@ -3086,21 +3264,33 @@ class Eynollah: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() if self.full_layout: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts + + print(id_of_texts_tot,'id_of_texts_tot') + print(order_text_new,'order_text_new') + else: contours_only_text_parent_h = None - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index bac8235..53b39b5 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -44,8 +44,8 @@ def get_text_region_boxes_by_given_contours(contours): def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - jv = 0 - for c in contours: + + for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -53,14 +53,12 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area area = polygon.area if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1: # and hierarchy[0][jv][3]==-1 : found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) - jv += 1 return found_polygons_early def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - jv = 0 - for c in contours: + for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -73,7 +71,6 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : # print(c[0][0][1]) found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) - jv += 1 return found_polygons_early def find_new_features_of_contours(contours_main): @@ -234,8 +231,6 @@ def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): with Pool(cpu_count()) as p: cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) - print(len(cnts_org),'lendiha') - return cnts_org def get_textregion_contours_in_org_image(cnts, img, slope_first): From 49c93149a49b103b6434fee79ef28517fa4b13f9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 10:01:28 +0200 Subject: [PATCH 002/107] machine based reading order inference with a variable batch size --- qurator/eynollah/eynollah.py | 96 +++++++++++++++--------------------- 1 file changed, 41 insertions(+), 55 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b83db98..35992c9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2857,32 +2857,20 @@ class Eynollah: return model def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): - - #print(text_regions_p.shape) y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] img_poly = np.zeros((y_len,x_len), dtype='uint8') unique_pix = np.unique(text_regions_p) - #print(unique_pix, 'unique_pix') - - #for pix in unique_pix: - #print(pix) - #plt.imshow((text_regions_p[:,:]==pix)*1 ) - #plt.show() + img_poly[text_regions_p[:,:]==1] = 1 img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - #plt.imshow(text_regions_p) - #plt.show() - - - #plt.imshow(img_poly) - #plt.show() + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) height1 =672#448 @@ -2900,19 +2888,11 @@ class Eynollah: img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') for j in range(len(cy_main)): - #print(j, int(y_max_main[j]), x_min_main[j], x_max_main[j] ) img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - #plt.imshow(img_header_and_sep[:,:]) - #plt.show() co_text_all = contours_only_text_parent + contours_only_text_parent_h - #id_all_text = id_paragraph + id_header - - #texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] - #texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] - #co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area) labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') for i in range(len(co_text_all)): @@ -2932,63 +2912,69 @@ class Eynollah: img3 = img3.astype(np.uint16) - #plt.imshow(img3) - #plt.show() - order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 + inference_bs = 6 + tot_counter = 1 + batch_counter = 0 + i_indexer = [] + j_indexer =[] + + input_1= np.zeros( (inference_bs, height1, width1,3)) + tot_iteration = int( ( labels_con.shape[2]*(labels_con.shape[2]-1) )/2. ) + full_bs_ite= tot_iteration//inference_bs + last_bs = tot_iteration % inference_bs + + #print(labels_con.shape[2],"number of regions for reading order") for i in range(labels_con.shape[2]): for j in range(labels_con.shape[2]): if j>i: img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) - #img1 = img1.astype(np.uint16) - #img2 = img2.astype(np.uint16) img2[:,:,0][img3[:,:]==5] = 2 img2[:,:,0][img_header_and_sep[:,:]==1] = 3 - - img1[:,:,0][img3[:,:]==5] = 2 img1[:,:,0][img_header_and_sep[:,:]==1] = 3 - #plt.imshow(labels_con[:,:,i]) - #plt.show() - - #plt.imshow(img2[:,:,0]) - #plt.show() - + i_indexer.append(i) + j_indexer.append(j) - #plt.imshow(img1[:,:,0]) - #plt.show() + input_1[batch_counter,:,:,0] = img1[:,:,0]/3. + input_1[batch_counter,:,:,2] = img2[:,:,0]/3. + input_1[batch_counter,:,:,1] = img3[:,:]/5. - #sys.exit() - input_1= np.zeros( (height1, width1,3)) + batch_counter = batch_counter+1 - input_1[:,:,0] = img1[:,:,0]/3. - input_1[:,:,2] = img2[:,:,0]/3. - input_1[:,:,1] = img3[:,:]/5. - - #y_pr=model.predict([img1.reshape(1,height1,width1,3) , img2.reshape(1,height2,width2,3),img3.reshape(1,height3,width3,3) ], verbose=2) - y_pr=model_ro_machine.predict(input_1.reshape(1,height1,width1,3) , verbose=0) - #print(y_pr) + if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + y_pr=model_ro_machine.predict(input_1 , verbose=0) - if y_pr>=0.5: - order_class = 1 - else: - order_class = 0 + if batch_counter==inference_bs: + iteration_batches = inference_bs + else: + iteration_batches = last_bs + for jb in range(iteration_batches): + if y_pr[jb][0]>=0.5: + order_class = 1 + else: + order_class = 0 + + order_matrix[i_indexer[jb],j_indexer[jb]] = y_pr[jb][0]#order_class + order_matrix[j_indexer[jb],i_indexer[jb]] = 1-y_pr[jb][0]#int( 1 - order_class) + + batch_counter = 0 - order_matrix[i,j] = y_pr#order_class - order_matrix[j,i] = 1-y_pr#int( 1 - order_class) + i_indexer = [] + j_indexer = [] + tot_counter = tot_counter+1 sum_mat = np.sum(order_matrix, axis=1) index_sort = np.argsort(sum_mat) index_sort = index_sort[::-1] - print(index_sort) REGION_ID_TEMPLATE = 'region_%04d' order_of_texts = [] id_of_texts = [] @@ -3272,13 +3258,12 @@ class Eynollah: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts - print(id_of_texts_tot,'id_of_texts_tot') - print(order_text_new,'order_text_new') else: contours_only_text_parent_h = None @@ -3291,6 +3276,7 @@ class Eynollah: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts From 59c0d90e5af7ed3f1d3d8d7a78ecdcc17eb2fb59 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 10:17:46 +0200 Subject: [PATCH 003/107] machine based reading order inference & optimized algorithm --- qurator/eynollah/eynollah.py | 153 ++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 3 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 35992c9..63e71cb 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2855,7 +2855,6 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -2983,6 +2982,154 @@ class Eynollah: id_of_texts.append( REGION_ID_TEMPLATE % order ) + return order_of_texts, id_of_texts + + def update_list_and_return_first_biger_than_one_length(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): + list_inp.pop(index_element_to_be_updated) + if len(pr_list)>0: + list_inp.insert(index_element_to_be_updated, pr_list) + else: + index_element_to_be_updated = index_element_to_be_updated -1 + + list_inp.insert(index_element_to_be_updated+1, [innner_index_pr_pos]) + if len(pos_list)>0: + list_inp.insert(index_element_to_be_updated+2, pos_list) + + len_all_elements = [len(i) for i in list_inp] + list_len_bigger_1 = np.where(np.array(len_all_elements)>1) + list_len_bigger_1 = list_len_bigger_1[0] + + if len(list_len_bigger_1)>0: + early_list_bigger_than_one = list_len_bigger_1[0] + else: + early_list_bigger_than_one = -20 + return list_inp, early_list_bigger_than_one + def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + y_len = text_regions_p.shape[0] + x_len = text_regions_p.shape[1] + + img_poly = np.zeros((y_len,x_len), dtype='uint8') + + unique_pix = np.unique(text_regions_p) + + + img_poly[text_regions_p[:,:]==1] = 1 + img_poly[text_regions_p[:,:]==2] = 2 + img_poly[text_regions_p[:,:]==3] = 4 + img_poly[text_regions_p[:,:]==6] = 5 + + + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + + height1 =672#448 + width1 = 448#224 + + height2 =672#448 + width2= 448#224 + + height3 =672#448 + width3 = 448#224 + + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + + img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + + + labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + for i in range(len(co_text_all)): + img_label = np.zeros((y_len,x_len,3),dtype='uint8') + img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) + labels_con[:,:,i] = img_label[:,:,0] + + + img3= np.copy(img_poly) + + labels_con = resize_image(labels_con, height1, width1) + + img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + + img3= resize_image (img3, height3, width3) + + img3 = img3.astype(np.uint16) + + inference_bs = 4 + input_1= np.zeros( (inference_bs, height1, width1,3)) + starting_list_of_regions = [] + starting_list_of_regions.append( list(range(labels_con.shape[2])) ) + index_update = 0 + index_selected = starting_list_of_regions[0] + #print(labels_con.shape[2],"number of regions for reading order") + while index_update>=0: + ij_list = starting_list_of_regions[index_update] + i = ij_list[0] + ij_list.pop(0) + + pr_list = [] + post_list = [] + + batch_counter = 0 + tot_counter = 1 + + tot_iteration = len(ij_list) + full_bs_ite= tot_iteration//inference_bs + last_bs = tot_iteration % inference_bs + + jbatch_indexer =[] + for j in ij_list: + img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) + img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) + + img2[:,:,0][img3[:,:]==5] = 2 + img2[:,:,0][img_header_and_sep[:,:]==1] = 3 + + img1[:,:,0][img3[:,:]==5] = 2 + img1[:,:,0][img_header_and_sep[:,:]==1] = 3 + + jbatch_indexer.append(j) + + input_1[batch_counter,:,:,0] = img1[:,:,0]/3. + input_1[batch_counter,:,:,2] = img2[:,:,0]/3. + input_1[batch_counter,:,:,1] = img3[:,:]/5. + + batch_counter = batch_counter+1 + + if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + y_pr=model_ro_machine.predict(input_1 , verbose=0) + + if batch_counter==inference_bs: + iteration_batches = inference_bs + else: + iteration_batches = last_bs + for jb in range(iteration_batches): + if y_pr[jb][0]>=0.5: + post_list.append(jbatch_indexer[jb]) + else: + pr_list.append(jbatch_indexer[jb]) + + batch_counter = 0 + jbatch_indexer = [] + + tot_counter = tot_counter+1 + + starting_list_of_regions, index_update = self.update_list_and_return_first_biger_than_one_length(index_update, i, pr_list, post_list,starting_list_of_regions) + + index_sort = [i[0] for i in starting_list_of_regions ] + + REGION_ID_TEMPLATE = 'region_%04d' + order_of_texts = [] + id_of_texts = [] + for order, id_text in enumerate(index_sort): + order_of_texts.append(id_text) + id_of_texts.append( REGION_ID_TEMPLATE % order ) + + return order_of_texts, id_of_texts def run(self): @@ -3252,7 +3399,7 @@ class Eynollah: if self.full_layout: if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) @@ -3268,7 +3415,7 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) From 941d87328a45ad6df5df27c0a84a4b695de65c67 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 11:19:30 +0200 Subject: [PATCH 004/107] machine based reading order & works for not full layout case --- qurator/eynollah/eynollah.py | 84 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 63e71cb..c008476 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2881,16 +2881,17 @@ class Eynollah: height3 =672#448 width3 = 448#224 - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h + if contours_only_text_parent_h: + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + else: + co_text_all = contours_only_text_parent labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') @@ -2984,7 +2985,7 @@ class Eynollah: return order_of_texts, id_of_texts - def update_list_and_return_first_biger_than_one_length(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): + def update_list_and_return_first_with_length_bigger_than_one(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): list_inp.pop(index_element_to_be_updated) if len(pr_list)>0: list_inp.insert(index_element_to_be_updated, pr_list) @@ -3030,16 +3031,17 @@ class Eynollah: height3 =672#448 width3 = 448#224 - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h + if contours_only_text_parent_h: + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + else: + co_text_all = contours_only_text_parent labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') @@ -3118,7 +3120,7 @@ class Eynollah: tot_counter = tot_counter+1 - starting_list_of_regions, index_update = self.update_list_and_return_first_biger_than_one_length(index_update, i, pr_list, post_list,starting_list_of_regions) + starting_list_of_regions, index_update = self.update_list_and_return_first_with_length_bigger_than_one(index_update, i, pr_list, post_list,starting_list_of_regions) index_sort = [i[0] for i in starting_list_of_regions ] @@ -3138,7 +3140,7 @@ class Eynollah: """ self.logger.debug("enter run") - self.reading_order_machine_based = True#True + self.reading_order_machine_based = True#False#True#True t0_tot = time.time() @@ -3359,32 +3361,32 @@ class Eynollah: all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) pixel_lines = 6 + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - if num_col_classifier >= 3: + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - - - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) #print(boxes_d,'boxes_d') #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) From eac18c553d6829cbb6c3c0d6ca1572977a2b3243 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 13 Dec 2023 01:44:51 +0100 Subject: [PATCH 005/107] machine based reading order as an argument --- qurator/eynollah/cli.py | 8 ++++++++ qurator/eynollah/eynollah.py | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a2a2ad0..a422df9 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -133,6 +133,12 @@ from qurator.eynollah.eynollah import Eynollah is_flag=True, help="if this parameter set to true, this tool would ignore page extraction", ) +@click.option( + "--reading_order_machine_based/--heuristic_reading_order", + "-romb/-hro", + is_flag=True, + help="if this parameter set to true, this tool would apply machine based reading order detection", +) @click.option( "--log-level", "-l", @@ -160,6 +166,7 @@ def main( allow_scaling, headers_off, light_version, + reading_order_machine_based, ignore_page_extraction, log_level ): @@ -197,6 +204,7 @@ def main( headers_off=headers_off, light_version=light_version, ignore_page_extraction=ignore_page_extraction, + reading_order_machine_based=reading_order_machine_based, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c008476..5e06734 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -165,6 +165,7 @@ class Eynollah: headers_off=False, light_version=False, ignore_page_extraction=False, + reading_order_machine_based=False, override_dpi=None, logger=None, pcgts=None, @@ -181,6 +182,7 @@ class Eynollah: self.dir_in = dir_in self.dir_of_all = dir_of_all self.dir_save_page = dir_save_page + self.reading_order_machine_based = reading_order_machine_based self.dir_of_deskewed = dir_of_deskewed self.dir_of_deskewed = dir_of_deskewed self.dir_of_cropped_images=dir_of_cropped_images @@ -226,7 +228,7 @@ class Eynollah: self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" - self.model_reading_order_machine_dir = dir_models + "/model_6_reading_order_machine_based" + self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -3139,8 +3141,6 @@ class Eynollah: Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - self.reading_order_machine_based = True#False#True#True t0_tot = time.time() From 514466883415f86ea90b6ef48a4e15187407ec05 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 17 Jul 2024 10:01:37 +0200 Subject: [PATCH 006/107] ocr engine first integration --- qurator/eynollah/cli.py | 8 + qurator/eynollah/eynollah.py | 295 ++++++++++++++++++++++++++++++++++- qurator/eynollah/writer.py | 15 +- 3 files changed, 313 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a422df9..833e904 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -139,6 +139,12 @@ from qurator.eynollah.eynollah import Eynollah is_flag=True, help="if this parameter set to true, this tool would apply machine based reading order detection", ) +@click.option( + "--do_ocr", + "-ocr/-noocr", + is_flag=True, + help="if this parameter set to true, this tool will try to do ocr", +) @click.option( "--log-level", "-l", @@ -167,6 +173,7 @@ def main( headers_off, light_version, reading_order_machine_based, + do_ocr, ignore_page_extraction, log_level ): @@ -205,6 +212,7 @@ def main( light_version=light_version, ignore_page_extraction=ignore_page_extraction, reading_order_machine_based=reading_order_machine_based, + do_ocr=do_ocr, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 5e06734..a505b0e 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -17,6 +17,16 @@ import gc from ocrd_utils import getLogger import cv2 import numpy as np +from transformers import TrOCRProcessor +from PIL import Image +import torch +from difflib import SequenceMatcher as sq +from transformers import VisionEncoderDecoderModel +from numba import cuda +import copy +from scipy.signal import find_peaks +from scipy.ndimage import gaussian_filter1d + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" stderr = sys.stderr sys.stderr = open(os.devnull, "w") @@ -166,6 +176,7 @@ class Eynollah: light_version=False, ignore_page_extraction=False, reading_order_machine_based=False, + do_ocr=False, override_dpi=None, logger=None, pcgts=None, @@ -199,6 +210,7 @@ class Eynollah: self.headers_off = headers_off self.light_version = light_version self.ignore_page_extraction = ignore_page_extraction + self.ocr = do_ocr self.pcgts = pcgts if not dir_in: self.plotter = None if not enable_plotting else EynollahPlotter( @@ -233,6 +245,9 @@ class Eynollah: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" + if self.ocr: + self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" + self.model_tables = dir_models + "/eynollah-tables_20210319" self.models = {} @@ -251,6 +266,10 @@ class Eynollah: self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) + if self.ocr: + self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") self.ls_imgs = os.listdir(self.dir_in) @@ -3135,6 +3154,223 @@ class Eynollah: return order_of_texts, id_of_texts + def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.2*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + print(len(peaks_real), 'len(peaks_real)') + + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + + arg_sort4 =arg_sort[::-1][:4] + + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] ) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1]) + #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final[0], peaks_final[1] + else: + pass + + + def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.06*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + #print(len(peaks_real), 'len(peaks_real)') + + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_max = np.argmax(sum_smoothed[peaks_real]) + + peaks_final = peaks_real[arg_max] + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final, peaks_final], [0, height-1]) + ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final + else: + return None + def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(self,peaks_real, sum_smoothed, start_split, end_split): + peaks_real = peaks_real[(peaks_realstart_split)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + + arg_sort4 =arg_sort[::-1][:4] + + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) + return peaks_final[0] + + def return_start_and_end_of_common_text_of_textline_ocr_new(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.15*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + mid = int(width/2.) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, width1, mid+2) + + peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, mid-2, width2) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peak_start, peak_start], [0, height-1]) + #plt.plot([peak_end, peak_end], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peak_start, peak_end + else: + pass + + def return_ocr_of_textline_without_common_section(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + + split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image, ind_tot) + if split_point: + image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) + + #pixel_values1 = processor(image1, return_tensors="pt").pixel_values + #pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values + generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device)) + generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + #print(generated_text_merged,'generated_text_merged') + + #generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + #generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + + #generated_text = generated_text1 + ' ' + generated_text2 + generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1] + + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + else: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + #print(generated_text,'generated_text') + #print('########################################') + return generated_text + def return_ocr_of_textline(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + try: + width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot) + + image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height)) + + pixel_values1 = processor(image1, return_tensors="pt").pixel_values + pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + + match = sq(None, generated_text1, generated_text2).find_longest_match(0, len(generated_text1), 0, len(generated_text2)) + + generated_text = generated_text1 + generated_text2[match.b+match.size:] + except: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + + def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): + textline_contour[:,0] = textline_contour[:,0] + box_ind[2] + textline_contour[:,1] = textline_contour[:,1] + box_ind[0] + return textline_contour def run(self): """ @@ -3398,6 +3634,7 @@ class Eynollah: if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() + if self.full_layout: if self.reading_order_machine_based: @@ -3425,11 +3662,67 @@ class Eynollah: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + + if self.ocr: + + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + img_poly_on_img = np.copy(image_page) + + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) + + ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + else: + ocr_all_textlines = None + #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index f537f65..c69be9b 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -2,7 +2,7 @@ # pylint: disable=import-error from pathlib import Path import os.path - +import xml.etree.ElementTree as ET from .utils.xml import create_page_xml, xml_reading_order from .utils.counter import EynollahIdCounter @@ -12,6 +12,7 @@ from ocrd_models.ocrd_page import ( CoordsType, PcGtsType, TextLineType, + TextEquivType, TextRegionType, ImageRegionType, TableRegionType, @@ -93,11 +94,13 @@ class EynollahXmlWriter(): points_co += ' ' coords.set_points(points_co[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter): + def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): self.logger.debug('enter serialize_lines_in_region') for j in range(len(all_found_textline_polygons[region_idx])): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) + if ocr_all_textlines_textregion: + textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) text_region.add_TextLine(textline) region_bboxes = all_box_coord[region_idx] points_co = '' @@ -140,7 +143,7 @@ class EynollahXmlWriter(): with open(out_fname, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -159,7 +162,11 @@ class EynollahXmlWriter(): Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)), ) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', From a62ae370c3ff37495383f8415620dc2cf5d44eb1 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 02:21:01 +0200 Subject: [PATCH 007/107] new full layout model and early layout for 1&2 column images are integrated - light version --- qurator/eynollah/eynollah.py | 118 ++++++++++++++++++++++++++++++----- qurator/eynollah/writer.py | 16 ++++- 2 files changed, 114 insertions(+), 20 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index a505b0e..8032f1e 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -241,6 +241,8 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" + self.model_region_dir_p_1_2_sp_np = dir_models + "/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -263,6 +265,8 @@ class Eynollah: self.model_bin = self.our_load_model(self.model_dir_of_binarization) self.model_textline = self.our_load_model(self.model_textline_dir) self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) + self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) + self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) @@ -1069,6 +1073,66 @@ class Eynollah: croped_page, page_coord = crop_image_inside_box(box, img) return croped_page, page_coord + def extract_text_regions_new(self, img, patches, cols): + self.logger.debug("enter extract_text_regions") + img_height_h = img.shape[0] + img_width_h = img.shape[1] + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np) + else: + model_region = self.model_region_fl_new if patches else self.model_region_fl_np + + if not patches: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + prediction_regions2 = None + else: + if cols == 1: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + + img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) + img = img.astype(np.uint8) + + if cols == 2: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) + img = img.astype(np.uint8) + + if cols == 3: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) + img = img.astype(np.uint8) + + if cols == 4: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) + img = img.astype(np.uint8) + + if cols == 5: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) + img = img.astype(np.uint8) + + if cols >= 6: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) + img = img.astype(np.uint8) + + marginal_of_patch_percent = 0.1 + + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + + prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) + self.logger.debug("exit extract_text_regions") + return prediction_regions, prediction_regions + + def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] @@ -1652,10 +1716,17 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(img_bin) if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + if num_col_classifier == 1 or num_col_classifier == 2: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + else: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2828,24 +2899,32 @@ class Eynollah: text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) - - regions_fully, regions_fully_only_drop = self.extract_text_regions(image_page, True, cols=num_col_classifier) - text_regions_p[:,:][regions_fully[:,:,0]==6]=6 - regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + + # 6 is the separators lable in old full layout model + # 4 is the drop capital class in old full layout model + # in the new full layout drop capital is 3 and separators are 5 + + text_regions_p[:,:][regions_fully[:,:,0]==5]=6 + regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 + + #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 + #regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) + #regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) - regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) - if num_col_classifier > 2: - regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 - else: - regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) + ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) + ##if num_col_classifier > 2: + ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 + ##else: + ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) + ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 - text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 + ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() ####if not self.tables: @@ -3645,8 +3724,13 @@ class Eynollah: else: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) + + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index c69be9b..29caddc 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -208,7 +208,7 @@ class EynollahXmlWriter(): return pcgts - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml): + def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines): self.logger.debug('enter build_pagexml_full_layout') # create the file structure @@ -225,14 +225,24 @@ class EynollahXmlWriter(): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): textregion = TextRegionType(id=counter.next_region_id, type_='header', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter) + + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', From be144db9f83fbdd0bd345b89f5634b419e0fd919 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 18:13:10 +0200 Subject: [PATCH 008/107] updating 1&2 columns images + full layout --- qurator/eynollah/eynollah.py | 143 +++++++++++++++++++++-------- qurator/eynollah/utils/__init__.py | 14 ++- 2 files changed, 115 insertions(+), 42 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8032f1e..54e6e3b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1083,43 +1083,64 @@ class Eynollah: model_region = self.model_region_fl_new if patches else self.model_region_fl_np if not patches: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) prediction_regions2 = None else: if cols == 1: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) img = img.astype(np.uint8) if cols == 2: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) img = img.astype(np.uint8) if cols == 3: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) img = img.astype(np.uint8) if cols == 4: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) img = img.astype(np.uint8) if cols == 5: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) img = img.astype(np.uint8) if cols >= 6: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) img = img.astype(np.uint8) @@ -1611,6 +1632,7 @@ class Eynollah: img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) + #print(img.shape,'bin shape') if not self.dir_in: prediction_textline = self.do_prediction(patches, img, model_textline) else: @@ -1664,6 +1686,7 @@ class Eynollah: box_sub.put(boxes_sub_new) def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_light_v") + t_in = time.time() erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] @@ -1671,7 +1694,7 @@ class Eynollah: #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - + #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: img_w_new = 1000 @@ -1711,9 +1734,12 @@ class Eynollah: #img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) - + #print("inside 1 ", time.time()-t_in) textline_mask_tot_ea = self.run_textline(img_bin) + + + #print("inside 2 ", time.time()-t_in) if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: @@ -1727,12 +1753,14 @@ class Eynollah: prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - + + #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) + img_bin = resize_image(img_bin,img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -1787,8 +1815,8 @@ class Eynollah: text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea + #print("inside 4 ", time.time()-t_in) + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -2553,7 +2581,11 @@ class Eynollah: prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) - def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts): + def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): + + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') + #print(erosion_hurts, 'erosion_hurts') + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -2563,7 +2595,7 @@ class Eynollah: img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() - + #print("inside graphics 1 ", time.time() - t_in_gr) if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: @@ -2574,6 +2606,9 @@ class Eynollah: text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) @@ -2582,7 +2617,7 @@ class Eynollah: img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + #print("inside graphics 2 ", time.time() - t_in_gr) if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: @@ -2600,8 +2635,10 @@ class Eynollah: except Exception as why: self.logger.error(why) num_col = None - return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea + #print("inside graphics 3 ", time.time() - t_in_gr) + return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -2629,13 +2666,11 @@ class Eynollah: img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2682,6 +2717,7 @@ class Eynollah: return textline_mask_tot_ea def run_deskew(self, textline_mask_tot_ea): + #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') sigma = 2 main_page_deskew = True slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) @@ -2805,7 +2841,7 @@ class Eynollah: self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables - def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts): + def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') if self.tables: @@ -2900,20 +2936,23 @@ class Eynollah: image_page = image_page.astype(np.uint8) - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + if self.light_version: + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) + else: + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 text_regions_p[:,:][regions_fully[:,:,0]==5]=6 - regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 + ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 - #regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - #regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 - - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) + ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) + ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + drop_capital_label_in_full_layout_model = 3 + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -2923,7 +2962,7 @@ class Eynollah: ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() - text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 + text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4 ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() @@ -3463,22 +3502,41 @@ class Eynollah: self.ls_imgs = [1] for img_name in self.ls_imgs: + print(img_name) t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - + #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) + + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -3498,7 +3556,7 @@ class Eynollah: continue else: return pcgts - + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -3513,17 +3571,20 @@ class Eynollah: textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) if self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts) + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - + #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -3625,13 +3686,16 @@ class Eynollah: # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) else: pass + + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - + #print("text region early 5 in %.1fs", time.time() - t0) if not self.curved_line: if self.light_version: if self.textline_light: @@ -3651,7 +3715,7 @@ class Eynollah: all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) @@ -3778,7 +3842,10 @@ class Eynollah: #print(x, y, w, h, h/float(w),'ratio') h2w_ratio = h/float(w) mask_poly = np.zeros(image_page.shape) - img_poly_on_img = np.copy(image_page) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) @@ -3805,8 +3872,10 @@ class Eynollah: pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index d2b2488..929669f 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -775,9 +775,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): return layout_no_patch -def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): - - drop_only = (layout_in_patch[:, :, 0] == 4) * 1 +def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label): + drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -786,13 +785,18 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] - areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001] + areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] contours_drop_parent_final = [] for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - layout_in_patch[y : y + h, x : x + w, 0] = 4 + + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + + layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label + else: + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label return layout_in_patch From 00bf2b64d016df86810ec2eed5799799c7a13fbd Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 19:07:54 +0200 Subject: [PATCH 009/107] 1&2 column images only printspace --- qurator/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 54e6e3b..3f078b0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3549,7 +3549,8 @@ class Eynollah: if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t1) if self.dir_in: self.writer.write_pagexml(pcgts) From e97677879638816ee12d0e1840b41e3e021ea9b2 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 14:33:01 +0200 Subject: [PATCH 010/107] testing pyproject.toml --- pyproject.toml | 30 ++++++++++++++++ qurator/eynollah/cli.py | 80 +++++++++++++++++++++++++---------------- requirements.txt | 8 ----- setup.py | 28 --------------- 4 files changed, 80 insertions(+), 66 deletions(-) create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..102f443 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "eynollah" +version = "1.2.3" + + + + +dependencies = [ + "ocrd >= 2.23.3", + "tensorflow >= 2.12.0", + "scikit-learn >= 0.23.2", + "imutils >= 0.5.3", + "numpy < 1.24.0", + "matplotlib", + "torch == 2.0.1", + "transformers == 4.30.2", + "numba == 0.58.1", +] + +[project.scripts] +eynollah = "qurator.eynollah.cli:main" + + +[tool.setuptools.packages.find] +where = ["."] +include = ["qurator"] diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 833e904..6c6561f 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -3,14 +3,60 @@ import click from ocrd_utils import initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah +@click.group() +def main(): + pass -@click.command() +@main.command() +@click.option( + "--dir_xml", + "-dx", + help="directory of GT page-xml files", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_modal_image", + "-domi", + help="directory where ground truth images would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_classes", + "-docl", + help="directory where ground truth classes would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--input_height", + "-ih", + help="input height", +) +@click.option( + "--input_width", + "-iw", + help="input width", +) +@click.option( + "--min_area_size", + "-min", + help="min area size of regions considered for reading order training.", +) + +def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): + xml_files_ind = os.listdir(dir_xml) + + +@main.command() @click.option( "--image", "-i", help="image filename", type=click.Path(exists=True, dir_okay=False), ) + @click.option( "--out", "-o", @@ -146,37 +192,13 @@ from qurator.eynollah.eynollah import Eynollah help="if this parameter set to true, this tool will try to do ocr", ) @click.option( - "--log-level", + "--log_level", "-l", type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) -def main( - image, - out, - dir_in, - model, - save_images, - save_layout, - save_deskewed, - save_all, - save_page, - enable_plotting, - allow_enhancement, - curved_line, - textline_light, - full_layout, - tables, - right2left, - input_binary, - allow_scaling, - headers_off, - light_version, - reading_order_machine_based, - do_ocr, - ignore_page_extraction, - log_level -): + +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -215,8 +237,6 @@ def main( do_ocr=do_ocr, ) eynollah.run() - #pcgts = eynollah.run() - ##eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 530dac2..0000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# ocrd includes opencv, numpy, shapely, click -ocrd >= 2.23.3 -numpy <1.24.0 -scikit-learn >= 0.23.2 -tensorflow >=2.12.0 -imutils >= 0.5.3 -matplotlib -setuptools >= 50 diff --git a/setup.py b/setup.py deleted file mode 100644 index 9abf158..0000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -from setuptools import setup, find_packages -from json import load - -install_requires = open('requirements.txt').read().split('\n') -with open('ocrd-tool.json', 'r', encoding='utf-8') as f: - version = load(f)['version'] - -setup( - name='eynollah', - version=version, - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Vahid Rezanezhad', - url='https://github.com/qurator-spk/eynollah', - license='Apache License 2.0', - namespace_packages=['qurator'], - packages=find_packages(exclude=['tests']), - install_requires=install_requires, - package_data={ - '': ['*.json'] - }, - entry_points={ - 'console_scripts': [ - 'eynollah=qurator.eynollah.cli:main', - 'ocrd-eynollah-segment=qurator.eynollah.ocrd_cli:main', - ] - }, -) From 53fd5fb2a5da9a4c42bd1964a3ed1d2427f8637e Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 14:42:37 +0200 Subject: [PATCH 011/107] resolving #106 for pyproject.toml test --- qurator/eynollah/cli.py | 6 +++++- qurator/eynollah/eynollah.py | 9 ++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 6c6561f..b0f55cd 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -236,7 +236,11 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s reading_order_machine_based=reading_order_machine_based, do_ocr=do_ocr, ) - eynollah.run() + if dir_in: + eynollah.run() + else: + pcgts = eynollah.run() + eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 3f078b0..b27d269 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3797,7 +3797,8 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts else: @@ -3872,9 +3873,11 @@ class Eynollah: self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts #print("text region early 7 in %.1fs", time.time() - t0) - self.writer.write_pagexml(pcgts) + if self.dir_in: + self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) #print("Job done in %.1fs", time.time() - t0) From 4c50479cb87cf6abf29f1ce8f907eb6814eedec0 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 15:28:36 +0200 Subject: [PATCH 012/107] pyproject.toml may work for ocrd --- pyproject.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 102f443..c76f7e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "eynollah" -version = "1.2.3" +version = "0.1.0" @@ -23,8 +23,12 @@ dependencies = [ [project.scripts] eynollah = "qurator.eynollah.cli:main" +ocrd-eynollah-segment="qurator.eynollah.ocrd_cli:main" [tool.setuptools.packages.find] where = ["."] include = ["qurator"] + +[tool.setuptools.package-data] +"*" = ["*.json", '*.yml', '*.xml', '*.xsd'] From 74eac4daccd7e5bd9dc5644dc01ad54671671a10 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 15 Aug 2024 13:50:36 +0200 Subject: [PATCH 013/107] dtype = object in the case of length 1 arise error --- qurator/eynollah/eynollah.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b27d269..b4e7276 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3599,7 +3599,10 @@ class Eynollah: contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + if len(contours_only_text_parent)>1: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + else: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) @@ -3614,7 +3617,10 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + if len(contours_only_text_parent_d)>1: + contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + else: + contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) @@ -3677,7 +3683,10 @@ class Eynollah: areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + if len(contours_only_text_parent)>1: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + else: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) @@ -3719,7 +3728,10 @@ class Eynollah: #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if len(contours_only_text_parent_d_ordered)>1: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3809,7 +3821,10 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if len(contours_only_text_parent_d_ordered)>1: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) From 6f4205ba49e66ad99b1c18a95533d71447625faf Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 15 Aug 2024 16:08:45 +0200 Subject: [PATCH 014/107] update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c76f7e7..67544bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ version = "0.1.0" dependencies = [ "ocrd >= 2.23.3", - "tensorflow >= 2.12.0", + "tensorflow == 2.12.1", "scikit-learn >= 0.23.2", "imutils >= 0.5.3", "numpy < 1.24.0", From 4f8210de71935f9980c121f5eaae4df2722903d7 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 15 Aug 2024 23:23:48 +0200 Subject: [PATCH 015/107] update Makefile model location --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 525e6c3..440b0bd 100644 --- a/Makefile +++ b/Makefile @@ -24,12 +24,14 @@ models: models_eynollah models_eynollah: models_eynollah.tar.gz # tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz - tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + # tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + tar xf models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + wget https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz # Install with pip install: From c10a525675690076c1d029a483c0ff997c0c0e17 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 23 Aug 2024 02:18:16 +0200 Subject: [PATCH 016/107] inference with batch size bigger than 1 --- qurator/eynollah/eynollah.py | 172 ++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 72 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b4e7276..2bf57a4 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -548,11 +548,11 @@ class Eynollah: if self.input_binary: img = self.imread() if self.dir_in: - prediction_bin = self.do_prediction(True, img, self.model_bin) + prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) else: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img, model_bin) + prediction_bin = self.do_prediction(True, img, model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 @@ -703,7 +703,7 @@ class Eynollah: return model, None - def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -745,7 +745,17 @@ class Eynollah: nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): if i == 0: @@ -766,59 +776,77 @@ class Eynollah: if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + if batch_indexer == n_batch_inference: + + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -835,7 +863,7 @@ class Eynollah: img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] @@ -1147,7 +1175,7 @@ class Eynollah: marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1173,7 +1201,7 @@ class Eynollah: img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.7), int(img_width_h * 0.7)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: @@ -1181,7 +1209,7 @@ class Eynollah: img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.4), int(img_width_h * 0.4)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) elif cols > 2: @@ -1189,7 +1217,7 @@ class Eynollah: img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.3), int(img_width_h * 0.3)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: @@ -1245,7 +1273,7 @@ class Eynollah: img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 @@ -1634,9 +1662,9 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) #print(img.shape,'bin shape') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline) + prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=4) else: - prediction_textline = self.do_prediction(patches, img, self.model_textline) + prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=4) prediction_textline = resize_image(prediction_textline, img_h, img_w) if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1721,9 +1749,9 @@ class Eynollah: if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -1870,9 +1898,9 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) if self.dir_in: - prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, 0.2) + prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) else: - prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) + prediction_regions_org2 = self.do_prediction(True, img, model_region, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) @@ -1905,9 +1933,9 @@ class Eynollah: else: if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -1958,9 +1986,9 @@ class Eynollah: if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] From 04e79002b3daa3f4e69921e6b94b3d0a6ee48639 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 24 Aug 2024 12:54:19 +0200 Subject: [PATCH 017/107] making light version faster for 1 and 2 columns images --- qurator/eynollah/eynollah.py | 88 ++++++++++++++++++------ qurator/eynollah/utils/separate_lines.py | 16 ++--- 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2bf57a4..640db16 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -28,6 +28,7 @@ from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' stderr = sys.stderr sys.stderr = open(os.devnull, "w") import tensorflow as tf @@ -299,17 +300,25 @@ class Eynollah: def _cache_images(self, image_filename=None, image_pil=None): ret = {} + t_c0 = time.time() if image_filename: ret['img'] = cv2.imread(image_filename) - self.dpi = check_dpi(image_filename) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_filename) else: ret['img'] = pil2cv(image_pil) - self.dpi = check_dpi(image_pil) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_pil) ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret def reset_file_name_dir(self, image_filename): + t_c = time.time() self._imgs = self._cache_images(image_filename=image_filename) self.image_filename = image_filename @@ -491,6 +500,27 @@ class Eynollah: num_column_is_classified = True return img_new, num_column_is_classified + + def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): + self.logger.debug("enter calculate_width_height_by_columns") + if num_col == 1: + img_w_new = 1300 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) + else: + img_w_new = 1500 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1500) + + if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: + img_new = np.copy(img) + num_column_is_classified = False + elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + img_new = np.copy(img) + num_column_is_classified = False + else: + img_new = resize_image(img, img_h_new, img_w_new) + num_column_is_classified = True + + return img_new, num_column_is_classified def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): self.logger.debug("enter resize_image_with_column_classifier") @@ -600,16 +630,24 @@ class Eynollah: self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if dpi < DPI_THRESHOLD: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + else: + img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if light_version: image_res = np.copy(img_new) else: image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: - num_column_is_classified = True - image_res = np.copy(img) - is_image_enhanced = False + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + image_res = np.copy(img_new) + is_image_enhanced = True + else: + num_column_is_classified = True + image_res = np.copy(img) + is_image_enhanced = False self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin @@ -1175,7 +1213,7 @@ class Eynollah: marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=4) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1280,7 +1318,10 @@ class Eynollah: def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") - num_cores = cpu_count() + if len(contours)>15: + num_cores = cpu_count() + else: + num_cores = 1 queue_of_all_params = Queue() processes = [] @@ -1554,8 +1595,6 @@ class Eynollah: mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - # plt.imshow(mask_only_con_region) - # plt.show() if self.textline_light: all_text_region_raw = np.copy(textline_mask_tot_ea) @@ -1660,11 +1699,11 @@ class Eynollah: img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - #print(img.shape,'bin shape') + #print(img.shape,'bin shape textline') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=3) else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=3) prediction_textline = resize_image(prediction_textline, img_h, img_w) if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1747,11 +1786,14 @@ class Eynollah: img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) + t_bin = time.time() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=10) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=10) + + #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2710,10 +2752,10 @@ class Eynollah: return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction def run_enhancement(self,light_version): + t_in = time.time() self.logger.info("Resizing and enhancing image...") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') - scale = 1 if is_image_enhanced: if self.allow_enhancement: @@ -2731,6 +2773,7 @@ class Eynollah: if self.allow_scaling: img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) + #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified def run_textline(self, image_page): @@ -2748,7 +2791,8 @@ class Eynollah: #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') sigma = 2 main_page_deskew = True - slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) + n_total_angles = 30 + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, n_total_angles, main_page_deskew, plotter=self.plotter) slope_first = 0 if self.plotter: @@ -2871,7 +2915,7 @@ class Eynollah: def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') - + t_full0 = time.time() if self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) @@ -2963,12 +3007,12 @@ class Eynollah: text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) - + #print("full inside 1", time.time()- t_full0) if self.light_version: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) else: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) - + #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 @@ -3012,6 +3056,7 @@ class Eynollah: img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') + #print("full inside 3", time.time()- t_full0) return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables def our_load_model(self, model_file): @@ -3534,6 +3579,7 @@ class Eynollah: t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) + #print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) @@ -3922,7 +3968,7 @@ class Eynollah: if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/separate_lines.py b/qurator/eynollah/utils/separate_lines.py index acdc2e9..1004a92 100644 --- a/qurator/eynollah/utils/separate_lines.py +++ b/qurator/eynollah/utils/separate_lines.py @@ -1569,7 +1569,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1626,7 +1626,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): ang_int=0 - angels=np.linspace(ang_int-22.5,ang_int+22.5,100) + angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) var_res=[] for rot in angels: @@ -1649,7 +1649,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): #plt.imshow(img_resized) #plt.show() - angels=np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) + angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) var_res=[] @@ -1680,7 +1680,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-12,100) + angels=np.linspace(-90,-12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1700,7 +1700,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,12,100) + angels=np.linspace(90,12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1719,7 +1719,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): except: ang_int=0 else: - angels=np.linspace(-25,25,60) + angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) var_res=[] indexer=0 for rot in angels: @@ -1749,7 +1749,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=22 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-25,60) + angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) var_res=[] @@ -1772,7 +1772,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,25,60) + angels=np.linspace(90,25,int(n_tot_angles/2.)+10) var_res=[] From 7ae6a8776fb3cddc9279680f40fc23bc9b4df946 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 26 Aug 2024 16:02:10 +0200 Subject: [PATCH 018/107] ignoring dpi check by light version --- qurator/eynollah/eynollah.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 640db16..ff35d6f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -504,11 +504,11 @@ class Eynollah: def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: + img_w_new = 1000 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1000) + else: img_w_new = 1300 img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) - else: - img_w_new = 1500 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 1500) if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) @@ -1213,7 +1213,7 @@ class Eynollah: marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=4) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1810,7 +1810,8 @@ class Eynollah: #print("inside 2 ", time.time()-t_in) - + + #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) From 93005959e54abf5f67def79868b8fd8d8831e287 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 27 Aug 2024 18:13:46 +0200 Subject: [PATCH 019/107] inference batch size debugged --- qurator/eynollah/eynollah.py | 71 +++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index ff35d6f..f183dee 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -89,7 +89,7 @@ from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter -MIN_AREA_REGION = 0.0005 +MIN_AREA_REGION = 0.00001 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -182,6 +182,7 @@ class Eynollah: logger=None, pcgts=None, ): + self.light_version = light_version if not dir_in: if image_pil: self._imgs = self._cache_images(image_pil=image_pil) @@ -209,7 +210,6 @@ class Eynollah: self.input_binary = input_binary self.allow_scaling = allow_scaling self.headers_off = headers_off - self.light_version = light_version self.ignore_page_extraction = ignore_page_extraction self.ocr = do_ocr self.pcgts = pcgts @@ -828,7 +828,64 @@ class Eynollah: batch_indexer = batch_indexer + 1 if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) @@ -885,6 +942,7 @@ class Eynollah: batch_indexer = 0 img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -1789,9 +1847,9 @@ class Eynollah: t_bin = time.time() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=10) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=10) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] @@ -1808,7 +1866,6 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(img_bin) - #print("inside 2 ", time.time()-t_in) #print(img_resized.shape, num_col_classifier, "num_col_classifier") @@ -1839,6 +1896,10 @@ class Eynollah: mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + mask_texts_only = mask_texts_only.astype('uint8') + + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) From 0f87974b0c7a7bdfddd31ffa99b89c58c952ddcf Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 2 Sep 2024 16:21:07 +0200 Subject: [PATCH 020/107] writing drop capitals in xml output + and may resolve issue #110 --- qurator/eynollah/eynollah.py | 23 ++++++++++++----------- qurator/eynollah/writer.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index f183dee..1bb0eff 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3735,9 +3735,9 @@ class Eynollah: contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - if len(contours_only_text_parent)>1: + try: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - else: + except: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -3753,10 +3753,11 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - if len(contours_only_text_parent_d)>1: + try: contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - else: + except: contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) @@ -3819,9 +3820,9 @@ class Eynollah: areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - if len(contours_only_text_parent)>1: + try: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - else: + except: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -3864,10 +3865,10 @@ class Eynollah: #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - if len(contours_only_text_parent_d_ordered)>1: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: + try: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + except: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3957,9 +3958,9 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if len(contours_only_text_parent_d_ordered)>1: + try: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: + except: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index 29caddc..8eb1027 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -136,6 +136,29 @@ class EynollahXmlWriter(): points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) + + def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): + self.logger.debug('enter serialize_lines_in_region') + for j in range(1): + coords = CoordsType() + textline = TextLineType(id=counter.next_line_id, Coords=coords) + if ocr_all_textlines_textregion: + textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) + text_region.add_TextLine(textline) + #region_bboxes = all_box_coord[region_idx] + points_co = '' + for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]): + if len(contour_textline) == 2: + points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) + points_co += ',' + points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y)) + else: + points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) + points_co += ',' + points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) + + points_co += ' ' + coords.set_points(points_co[:-1]) def write_pagexml(self, pcgts): out_fname = os.path.join(self.dir_out, self.image_filename_stem) + ".xml" @@ -251,8 +274,12 @@ class EynollahXmlWriter(): self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) for mm in range(len(found_polygons_drop_capitals)): - page.add_TextRegion(TextRegionType(id=counter.next_region_id, type_='drop-capital', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))) + dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) + page.add_TextRegion(dropcapital) + all_box_coord_drop = None + slopes_drop = None + self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) for mm in range(len(found_polygons_text_region_img)): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) From c3a4a1bba77d40b9be8926483e40a1ccefe42198 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 3 Sep 2024 13:14:10 +0200 Subject: [PATCH 021/107] resolving issue #110 in a better way --- qurator/eynollah/eynollah.py | 61 ++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 1bb0eff..c88f0f9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2357,7 +2357,6 @@ class Eynollah: arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): - print(cx_text_only[ii],cy_text_only[ii],'markaz') if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) break @@ -3624,6 +3623,9 @@ class Eynollah: textline_contour[:,0] = textline_contour[:,0] + box_ind[2] textline_contour[:,1] = textline_contour[:,1] + box_ind[0] return textline_contour + def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): + return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + def run(self): """ @@ -3735,11 +3737,15 @@ class Eynollah: contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - try: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - except: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -3753,12 +3759,14 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - try: - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - except: - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) @@ -3820,11 +3828,14 @@ class Eynollah: areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - try: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - except: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -3865,10 +3876,11 @@ class Eynollah: #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - try: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - except: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3958,10 +3970,11 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - try: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - except: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) From f0b49073b7ba4746e1facd17cf8f8598e253b1d4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 3 Sep 2024 23:10:38 +0200 Subject: [PATCH 022/107] adding option for textline detection in printspace --- qurator/eynollah/eynollah.py | 939 +++++++++++++++++++---------------- 1 file changed, 512 insertions(+), 427 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c88f0f9..533e2a0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -741,7 +741,7 @@ class Eynollah: return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -774,7 +774,7 @@ class Eynollah: width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / float(255.0) - img = img.astype(np.float16) + #img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) @@ -832,6 +832,23 @@ class Eynollah: seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch,:,:] @@ -889,6 +906,22 @@ class Eynollah: label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -1202,9 +1235,9 @@ class Eynollah: img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) else: - model_region = self.model_region_fl_new if patches else self.model_region_fl_np + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: if self.light_version: @@ -1809,7 +1842,7 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -1866,89 +1899,98 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(img_bin) - #print("inside 2 ", time.time()-t_in) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - #print(img_resized.shape, num_col_classifier, "num_col_classifier") - if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) - else: + if not skip_layout_ro: + #print("inside 2 ", time.time()-t_in) + + #print(img_resized.shape, num_col_classifier, "num_col_classifier") + if not self.dir_in: + ###if num_col_classifier == 1 or num_col_classifier == 2: + ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + ###else: + ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + ###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) - else: - if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - - #print("inside 3 ", time.time()-t_in) - #plt.imshow(prediction_regions_org[:,:,0]) - #plt.show() + ##if num_col_classifier == 1 or num_col_classifier == 2: + ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + ##else: + ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - img_bin = resize_image(img_bin,img_height_h, img_width_h ) - - prediction_regions_org=prediction_regions_org[:,:,0] + #print("inside 3 ", time.time()-t_in) + #plt.imshow(prediction_regions_org[:,:,0]) + #plt.show() + + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - - mask_texts_only = mask_texts_only.astype('uint8') - - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 - - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - - #plt.imshow(test_khat[:,:]) - #plt.show() - - #for jv in range(1): - #print(jv, hir_lines_xml[0][232][3]) - #test_khat = np.zeros(prediction_regions_org.shape) + img_bin = resize_image(img_bin,img_height_h, img_width_h ) + + prediction_regions_org=prediction_regions_org[:,:,0] + + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + + mask_texts_only = mask_texts_only.astype('uint8') - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + + mask_images_only=(prediction_regions_org[:,:] ==2)*1 + + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() + #for jv in range(1): + #print(jv, hir_lines_xml[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - - #plt.imshow(test_khat[:,:]) - #plt.show() - #sys.exit() - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - - text_regions_p_true = np.zeros(prediction_regions_org.shape) - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - #print("inside 4 ", time.time()-t_in) - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + #sys.exit() + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) + + text_regions_p_true = np.zeros(prediction_regions_org.shape) + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + + text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) + #print("inside 4 ", time.time()-t_in) + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin + else: + img_bin = resize_image(img_bin,img_height_h, img_width_h ) + return None, erosion_hurts, None, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -2392,8 +2434,6 @@ class Eynollah: ref_point += len(id_of_texts) order_of_texts_tot = [] - print(len(contours_only_text_parent),'contours_only_text_parent') - print(len(order_by_con_main),'order_by_con_main') for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) @@ -2768,6 +2808,28 @@ class Eynollah: num_col = None #print("inside graphics 3 ", time.time() - t_in_gr) return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light + + def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): + + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') + #print(erosion_hurts, 'erosion_hurts') + t_in_gr = time.time() + img_g = self.imread(grayscale=True, uint8=True) + + img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) + img_g3 = img_g3.astype(np.uint8) + img_g3[:, :, 0] = img_g[:, :] + img_g3[:, :, 1] = img_g[:, :] + img_g3[:, :, 2] = img_g[:, :] + + image_page, page_coord, cont_page = self.extract_page() + #print("inside graphics 1 ", time.time() - t_in_gr) + + textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) @@ -3632,6 +3694,8 @@ class Eynollah: Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") + + skip_layout_ro = True t0_tot = time.time() @@ -3649,398 +3713,419 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) - - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + if not skip_layout_ro: + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + #plt.imshow(table_prediction) + #plt.show() - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + + if self.full_layout: + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) - else: - pass - - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - if not self.curved_line: + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - if self.textline_light: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - else: - - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) else: - #takes long timee - contours_only_text_parent_d_ordered = None + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + if not self.curved_line: if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + if self.textline_light: + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + + scale_param = 1 + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + pixel_lines = 6 + + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() - if not self.reading_order_machine_based: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + if self.full_layout: + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - #print(boxes_d,'boxes_d') - #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) - #for box_i in boxes_d: - #img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1 - #plt.imshow(img_once) - #plt.show() - #print(np.unique(img_once),'img_once') - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - if self.full_layout: - - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.ocr: + ocr_all_textlines = [] else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - if self.ocr: - ocr_all_textlines = [] - else: - ocr_all_textlines = None + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - - - else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + - if self.ocr: + if self.ocr: - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - ocr_textline_in_textregion.append(text_ocr) - - ##cv2.imwrite(str(ind_tot)+'.png', img_croped) - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) + else: + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro) + + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + all_found_textline_polygons=[ all_found_textline_polygons ] + order_text_new = [0] + slopes =[0] + id_of_texts_tot =['region_0001'] + + polygons_of_images = [] + slopes_marginals = [] + polygons_of_marginals = [] + all_found_textline_polygons_marginals = [] + all_box_coord_marginals = [] + polygons_lines_xml = [] + contours_tables = [] + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if not self.dir_in: return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) From 2c939049854c73c7dc27e4b04863c8498d654129 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 12 Sep 2024 17:35:28 +0200 Subject: [PATCH 023/107] avoiding double binarization --- qurator/eynollah/eynollah.py | 155 +++++++++++++++++++---------- qurator/eynollah/utils/__init__.py | 4 +- 2 files changed, 106 insertions(+), 53 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 533e2a0..569aec5 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -89,7 +89,7 @@ from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter -MIN_AREA_REGION = 0.00001 +MIN_AREA_REGION = 0.000001 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -237,15 +237,16 @@ class Eynollah: self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425" - self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" + #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/model_3_eraly_layout_no_patches_1_2_spaltige" - self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" + self.model_textline_dir = dir_models + "/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" if self.ocr: @@ -267,7 +268,7 @@ class Eynollah: self.model_textline = self.our_load_model(self.model_textline_dir) self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) - self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) + ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) @@ -993,9 +994,16 @@ class Eynollah: img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - + + seg_not_base = label_p_pred[0,:,:,4] + + seg_not_base[seg_not_base>0.4] =1 + seg_not_base[seg_not_base<1] =0 seg = np.argmax(label_p_pred, axis=3)[0] + + seg[seg_not_base==1]=4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) @@ -1781,7 +1789,7 @@ class Eynollah: all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - def textline_contours(self, img, patches, scaler_h, scaler_w): + def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) @@ -1792,10 +1800,34 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) #print(img.shape,'bin shape textline') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) + if num_col_classifier==1: + prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) + if num_col_classifier==1: + prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) + + textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 + + old_art = np.copy(textline_mask_tot_ea_art) + + textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') + textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 + + textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 + textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') + textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 + + prediction_textline[:,:][old_art[:,:]==1]=2 + if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) else: @@ -1855,49 +1887,58 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 1000 + img_w_new = 900#1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 1500 + img_w_new = 1300#1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: - img_w_new = 2000 + img_w_new = 1600#2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 4: - img_w_new = 2500 + img_w_new = 1900#2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: - img_w_new = 3000 + img_w_new = 2300#3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 4000 + img_w_new = 3300#4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) t_bin = time.time() - if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - - #print("inside bin ", time.time()-t_bin) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - prediction_bin = prediction_bin.astype(np.uint16) - #img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) + #if (not self.input_binary) or self.full_layout: + #if self.input_binary: + #img_bin = np.copy(img_resized) + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + else: + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + + #print("inside bin ", time.time()-t_bin) + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + prediction_bin = prediction_bin.astype(np.uint16) + #img= np.copy(prediction_bin) + img_bin = np.copy(prediction_bin) + else: + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) - textline_mask_tot_ea = self.run_textline(img_bin) + ###textline_mask_tot_ea = self.run_textline(img_bin) + textline_mask_tot_ea = self.run_textline(img_bin, num_col_classifier) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -1906,20 +1947,20 @@ class Eynollah: #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: - ###if num_col_classifier == 1 or num_col_classifier == 2: - ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) - ###else: - ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - ###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + if num_col_classifier == 1 or num_col_classifier == 2: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + else: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - ##if num_col_classifier == 1 or num_col_classifier == 2: - ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) - ##else: - ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) @@ -1937,7 +1978,7 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=2) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2899,10 +2940,11 @@ class Eynollah: #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified - def run_textline(self, image_page): - scaler_h_textline = 1 # 1.2#1.2 - scaler_w_textline = 1 # 0.9#1 - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) + def run_textline(self, image_page, num_col_classifier=None): + scaler_h_textline = 1#1.3 # 1.2#1.2 + scaler_w_textline = 1#1.3 # 0.9#1 + #print(image_page.shape) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3147,6 +3189,17 @@ class Eynollah: ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 drop_capital_label_in_full_layout_model = 3 + + drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 + + drops= drops.astype(np.uint8) + + regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1 + + drops = cv2.erode(drops[:,:], KERNEL, iterations=1) + regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model + + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: @@ -3695,7 +3748,7 @@ class Eynollah: """ self.logger.debug("enter run") - skip_layout_ro = True + skip_layout_ro = False#True t0_tot = time.time() diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 929669f..8705ecf 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -792,11 +792,11 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.8: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch From 1b18ae874b9ea086e99ac76281dd30572f947471 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 13 Sep 2024 00:52:06 +0200 Subject: [PATCH 024/107] passing number of columns as an argument --- qurator/eynollah/cli.py | 14 +++++- qurator/eynollah/eynollah.py | 96 ++++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index b0f55cd..357582c 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -191,6 +191,16 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i is_flag=True, help="if this parameter set to true, this tool will try to do ocr", ) +@click.option( + "--num_col_upper", + "-ncu", + help="lower limit of columns in document image", +) +@click.option( + "--num_col_lower", + "-ncl", + help="upper limit of columns in document image", +) @click.option( "--log_level", "-l", @@ -198,7 +208,7 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, ignore_page_extraction, log_level): +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -235,6 +245,8 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s ignore_page_extraction=ignore_page_extraction, reading_order_machine_based=reading_order_machine_based, do_ocr=do_ocr, + num_col_upper=num_col_upper, + num_col_lower=num_col_lower, ) if dir_in: eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 569aec5..f76dce8 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -178,6 +178,8 @@ class Eynollah: ignore_page_extraction=False, reading_order_machine_based=False, do_ocr=False, + num_col_upper=None, + num_col_lower=None, override_dpi=None, logger=None, pcgts=None, @@ -212,6 +214,14 @@ class Eynollah: self.headers_off = headers_off self.ignore_page_extraction = ignore_page_extraction self.ocr = do_ocr + if num_col_upper: + self.num_col_upper = int(num_col_upper) + else: + self.num_col_upper = num_col_upper + if num_col_lower: + self.num_col_lower = int(num_col_lower) + else: + self.num_col_lower = num_col_lower self.pcgts = pcgts if not dir_in: self.plotter = None if not enable_plotting else EynollahPlotter( @@ -597,36 +607,80 @@ class Eynollah: else: img = self.imread() img_bin = None - + + width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) - if self.input_binary: - img_in = np.copy(img) - width_early = img_in.shape[1] - img_in = img_in / 255.0 - img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = img_in.reshape(1, 448, 448, 3) - else: - img_1ch = self.imread(grayscale=True) - width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + if self.num_col_upper and not self.num_col_lower: + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + elif self.num_col_lower and not self.num_col_upper: + num_col = self.num_col_lower + label_p_pred = [np.ones(6)] + + elif (not self.num_col_upper and not self.num_col_lower): + if self.input_binary: + img_in = np.copy(img) + img_in = img_in / 255.0 + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) - img_in[0, :, :, 0] = img_1ch[:, :] - img_in[0, :, :, 1] = img_1ch[:, :] - img_in[0, :, :, 2] = img_1ch[:, :] + img_1ch = img_1ch / 255.0 + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + if self.dir_in: + label_p_pred = self.model_classifier.predict(img_in, verbose=0) + else: + label_p_pred = model_num_classifier.predict(img_in, verbose=0) + num_col = np.argmax(label_p_pred[0]) + 1 + elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): + if self.input_binary: + img_in = np.copy(img) + img_in = img_in / 255.0 + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_1ch = img_1ch / 255.0 + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] + + + if self.dir_in: + label_p_pred = self.model_classifier.predict(img_in, verbose=0) + else: + label_p_pred = model_num_classifier.predict(img_in, verbose=0) + num_col = np.argmax(label_p_pred[0]) + 1 + + if num_col > self.num_col_upper: + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + if num_col < self.num_col_lower: + num_col = self.num_col_lower + label_p_pred = [np.ones(6)] + else: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) - num_col = np.argmax(label_p_pred[0]) + 1 + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) From 21380fc8706474f0c6c791560fb6a5174d03aa8e Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 17 Sep 2024 15:06:41 +0200 Subject: [PATCH 025/107] scaling contours without dilation --- qurator/eynollah/eynollah.py | 207 +++++++++++++++++++++++++++++++---- 1 file changed, 184 insertions(+), 23 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index f76dce8..79cf98b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -256,7 +256,7 @@ class Eynollah: ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" if self.ocr: @@ -796,7 +796,7 @@ class Eynollah: return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -903,6 +903,13 @@ class Eynollah: seg[seg_not_base==1]=4 seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -977,6 +984,14 @@ class Eynollah: seg[seg_not_base==1]=4 seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -1845,42 +1860,50 @@ class Eynollah: def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') + thresholding_for_artificial_class_in_light_version = True#False if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) - img = img.astype(np.uint8) + #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - #print(img.shape,'bin shape textline') + if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) - if num_col_classifier==1: - prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) - if num_col_classifier==1: - prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 old_art = np.copy(textline_mask_tot_ea_art) - textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') - textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) - - prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 + if not thresholding_for_artificial_class_in_light_version: + textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') + textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') - textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) + + if not thresholding_for_artificial_class_in_light_version: + textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 - prediction_textline[:,:][old_art[:,:]==1]=2 + if not thresholding_for_artificial_class_in_light_version: + prediction_textline[:,:][old_art[:,:]==1]=2 if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1959,7 +1982,7 @@ class Eynollah: img_w_new = 2300#3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 3300#4000 + img_w_new = 3000#4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) @@ -1968,7 +1991,7 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout):# or (not self.input_binary and num_col_classifier >= 3): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) @@ -3794,15 +3817,146 @@ class Eynollah: return textline_contour def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - + + def scale_contours(self,all_found_textline_polygons): + for i in range(len(all_found_textline_polygons[0])): + con_ind = all_found_textline_polygons[0][i] + x_min = np.min( con_ind[:,0,0] ) + y_min = np.min( con_ind[:,0,1] ) + + x_max = np.max( con_ind[:,0,0] ) + y_max = np.max( con_ind[:,0,1] ) + + x_mean = np.mean( con_ind[:,0,0] ) + y_mean = np.mean( con_ind[:,0,1] ) + + arg_y_max = np.argmax( con_ind[:,0,1] ) + arg_y_min = np.argmin( con_ind[:,0,1] ) + + x_cor_y_max = con_ind[arg_y_max,0,0] + x_cor_y_min = con_ind[arg_y_min,0,0] + + m_con = (y_max - y_min) / float(x_cor_y_max - x_cor_y_min) + + con_scaled = con_ind*1 + + con_scaled = con_scaled.astype(np.float) + + con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) + con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) + + + if (x_max - x_min) > (y_max - y_min): + + if (y_max-y_min)<=15: + con_scaled[:,0,1] = con_ind[:,0,1]*1.8 + + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.8*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + elif (y_max-y_min)<=30 and (y_max-y_min)>15: + con_scaled[:,0,1] = con_ind[:,0,1]*1.6 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.6*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + elif (y_max-y_min)>30 and (y_max-y_min)<100: + con_scaled[:,0,1] = con_ind[:,0,1]*1.35 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.35*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + else: + con_scaled[:,0,1] = con_ind[:,0,1]*1.2 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.2*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + con_scaled[:,0,0] = con_ind[:,0,0]*1.03 + + + + if y_max_expected<=y_max_scaled: + con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled + + con_scaled[:,0,1] = con_scaled[:,0,1]*(y_max_expected - y_min_scaled)/ (y_max_scaled - y_min_scaled) + con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled + + else: + + if (x_max-x_min)<=15: + con_scaled[:,0,0] = con_ind[:,0,0]*1.8 + elif (x_max-x_min)<=30 and (x_max-x_min)>15: + con_scaled[:,0,0] = con_ind[:,0,0]*1.6 + elif (x_max-x_min)>30 and (x_max-x_min)<100: + con_scaled[:,0,0] = con_ind[:,0,0]*1.35 + else: + con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + + + x_min_n = np.min( con_scaled[:,0,0] ) + y_min_n = np.min( con_scaled[:,0,1] ) + + x_mean_n = np.mean( con_scaled[:,0,0] ) + y_mean_n = np.mean( con_scaled[:,0,1] ) + + ##diff_x = (x_min_n - x_min)*1 + ##diff_y = (y_min_n - y_min)*1 + + diff_x = (x_mean_n - x_mean)*1 + diff_y = (y_mean_n - y_mean)*1 + + + con_scaled[:,0,0] = (con_scaled[:,0,0] - diff_x) + con_scaled[:,0,1] = (con_scaled[:,0,1] - diff_y) + + x_max_n = np.max( con_scaled[:,0,0] ) + y_max_n = np.max( con_scaled[:,0,1] ) + + diff_disp_x = (x_max_n - x_max) / 2. + diff_disp_y = (y_max_n - y_max) / 2. + + x_vals = np.array( np.abs(con_scaled[:,0,0] - diff_disp_x) ).astype(np.int16) + y_vals = np.array( np.abs(con_scaled[:,0,1] - diff_disp_y) ).astype(np.int16) + all_found_textline_polygons[0][i][:,0,0] = x_vals[:] + all_found_textline_polygons[0][i][:,0,1] = y_vals[:] + return all_found_textline_polygons + + def scale_contours_new(self, textline_mask_tot_ea): + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons1 = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + + textline_mask_tot_ea_res = resize_image(textline_mask_tot_ea, int( textline_mask_tot_ea.shape[0]*1.6), textline_mask_tot_ea.shape[1]) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea_res) + ##all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + for i in range(len(all_found_textline_polygons)): + + #x_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,0] ) + y_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,1] ) + + #x_mean = np.mean( all_found_textline_polygons[i][:,0,0] ) + y_mean = np.mean( all_found_textline_polygons[i][:,0,1] ) + + ydiff = y_mean - y_mean_1 + + all_found_textline_polygons[i][:,0,1] = all_found_textline_polygons[i][:,0,1] - ydiff + return all_found_textline_polygons + + def run(self): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - skip_layout_ro = False#True + skip_layout_ro = True t0_tot = time.time() @@ -3820,7 +3974,6 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not skip_layout_ro: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) @@ -4032,6 +4185,7 @@ class Eynollah: if self.textline_light: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) @@ -4212,10 +4366,17 @@ class Eynollah: page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) all_found_textline_polygons=[ all_found_textline_polygons ] + + all_found_textline_polygons = self.scale_contours(all_found_textline_polygons) + + order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] From a1f1f98de3ad7500c80bb5d183fc86aa66e031e5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 18 Sep 2024 00:08:54 +0200 Subject: [PATCH 026/107] updating scaling contours --- qurator/eynollah/eynollah.py | 82 ++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 79cf98b..bbfba0f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3821,23 +3821,51 @@ class Eynollah: def scale_contours(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] - x_min = np.min( con_ind[:,0,0] ) - y_min = np.min( con_ind[:,0,1] ) - x_max = np.max( con_ind[:,0,0] ) - y_max = np.max( con_ind[:,0,1] ) + con_ind = con_ind.astype(np.float) + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) - x_mean = np.mean( con_ind[:,0,0] ) - y_mean = np.mean( con_ind[:,0,1] ) + + m_arr = y_differential / x_differential + + #print(x_differential, 'x_differential') + + #print(y_differential, 'y_differential') + + #print(m_arr) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_mean = float(np.mean( con_ind[:,0,0] )) + y_mean = float(np.mean( con_ind[:,0,1] )) arg_y_max = np.argmax( con_ind[:,0,1] ) arg_y_min = np.argmin( con_ind[:,0,1] ) - x_cor_y_max = con_ind[arg_y_max,0,0] - x_cor_y_min = con_ind[arg_y_min,0,0] - m_con = (y_max - y_min) / float(x_cor_y_max - x_cor_y_min) + arg_x_max = np.argmax( con_ind[:,0,0] ) + arg_x_min = np.argmin( con_ind[:,0,0] ) + + x_cor_y_max = float(con_ind[arg_y_max,0,0]) + x_cor_y_min = float(con_ind[arg_y_min,0,0]) + + y_cor_x_max = float(con_ind[arg_x_max,0,1]) + y_cor_x_min = float(con_ind[arg_x_min,0,1]) + + if (x_cor_y_max - x_cor_y_min) != 0: + m_con = (y_max - y_min) / (x_cor_y_max - x_cor_y_min) + else: + m_con= None + + + m_con_x = (x_max - x_min) / (y_cor_x_max - y_cor_x_min) + #print(m_con,m_con_x, 'm_con') con_scaled = con_ind*1 con_scaled = con_scaled.astype(np.float) @@ -3845,7 +3873,6 @@ class Eynollah: con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) - if (x_max - x_min) > (y_max - y_min): if (y_max-y_min)<=15: @@ -3877,7 +3904,7 @@ class Eynollah: - + #print(m_con, (x_cor_y_max-x_cor_y_min),y_min_scaled, y_max_expected, y_max_scaled, "y_max_scaled") if y_max_expected<=y_max_scaled: con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled @@ -3885,17 +3912,48 @@ class Eynollah: con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled else: - + #print(x_max-x_min, m_con_x,'m_con_x') if (x_max-x_min)<=15: con_scaled[:,0,0] = con_ind[:,0,0]*1.8 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.8*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + elif (x_max-x_min)<=30 and (x_max-x_min)>15: con_scaled[:,0,0] = con_ind[:,0,0]*1.6 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.6*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + elif (x_max-x_min)>30 and (x_max-x_min)<100: con_scaled[:,0,0] = con_ind[:,0,0]*1.35 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.35*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + else: con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.2*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + #print(x_max_expected, x_max_scaled, "x_max_scaled") + if x_max_expected<=x_max_scaled: + con_scaled[:,0,0] = con_scaled[:,0,0] - x_min_scaled + + con_scaled[:,0,0] = con_scaled[:,0,0]*(x_max_expected - x_min_scaled)/ (x_max_scaled - x_min_scaled) + con_scaled[:,0,0] = con_scaled[:,0,0] + x_min_scaled + x_min_n = np.min( con_scaled[:,0,0] ) y_min_n = np.min( con_scaled[:,0,1] ) From 5a07cd9cfa9713e8944195fff6416ed6e639c121 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 19 Sep 2024 16:21:55 +0200 Subject: [PATCH 027/107] the most effective version of contours dilation without opencv and all at once --- qurator/eynollah/eynollah.py | 258 +++++++++++++---------------------- 1 file changed, 97 insertions(+), 161 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index bbfba0f..cb70107 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1964,7 +1964,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 900#1000 + img_w_new = 800#1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -3818,196 +3818,132 @@ class Eynollah: def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - def scale_contours(self,all_found_textline_polygons): + def dilate_textlines(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] con_ind = con_ind.astype(np.float) + x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - - m_arr = y_differential / x_differential - - #print(x_differential, 'x_differential') - - #print(y_differential, 'y_differential') - - #print(m_arr) - x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) + - x_mean = float(np.mean( con_ind[:,0,0] )) - y_mean = float(np.mean( con_ind[:,0,1] )) - - arg_y_max = np.argmax( con_ind[:,0,1] ) - arg_y_min = np.argmin( con_ind[:,0,1] ) - - - arg_x_max = np.argmax( con_ind[:,0,0] ) - arg_x_min = np.argmin( con_ind[:,0,0] ) - - x_cor_y_max = float(con_ind[arg_y_max,0,0]) - x_cor_y_min = float(con_ind[arg_y_min,0,0]) - - - y_cor_x_max = float(con_ind[arg_x_max,0,1]) - y_cor_x_min = float(con_ind[arg_x_min,0,1]) - - if (x_cor_y_max - x_cor_y_min) != 0: - m_con = (y_max - y_min) / (x_cor_y_max - x_cor_y_min) - else: - m_con= None - - - m_con_x = (x_max - x_min) / (y_cor_x_max - y_cor_x_min) - #print(m_con,m_con_x, 'm_con') - con_scaled = con_ind*1 - - con_scaled = con_scaled.astype(np.float) - - con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) - con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) - - if (x_max - x_min) > (y_max - y_min): - - if (y_max-y_min)<=15: - con_scaled[:,0,1] = con_ind[:,0,1]*1.8 - - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + + x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + + mult = x_biger_than_x*x_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if y_differential[0]==0: + y_differential[0] = 0.1 + + if y_differential[-1]==0: + y_differential[-1]= 0.1 - y_max_expected = ( m_con*1.8*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - elif (y_max-y_min)<=30 and (y_max-y_min)>15: - con_scaled[:,0,1] = con_ind[:,0,1]*1.6 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - y_max_expected = ( m_con*1.6*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - elif (y_max-y_min)>30 and (y_max-y_min)<100: - con_scaled[:,0,1] = con_ind[:,0,1]*1.35 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - y_max_expected = ( m_con*1.35*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - else: - con_scaled[:,0,1] = con_ind[:,0,1]*1.2 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - - y_max_expected = ( m_con*1.2*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - con_scaled[:,0,0] = con_ind[:,0,0]*1.03 + y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - - #print(m_con, (x_cor_y_max-x_cor_y_min),y_min_scaled, y_max_expected, y_max_scaled, "y_max_scaled") - if y_max_expected<=y_max_scaled: - con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled + if y_differential[0]==0.1: + y_differential[0] = y_differential[1] + if y_differential[-1]==0.1: + y_differential[-1] = y_differential[-2] - con_scaled[:,0,1] = con_scaled[:,0,1]*(y_max_expected - y_min_scaled)/ (y_max_scaled - y_min_scaled) - con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled + y_differential.append(y_differential[0]) + + y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + + y_differential = np.array(y_differential) + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential + + con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 + con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 + + try: + con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 + con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 + con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 + + try: + con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 + con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + except: + pass + + else: - #print(x_max-x_min, m_con_x,'m_con_x') - if (x_max-x_min)<=15: - con_scaled[:,0,0] = con_ind[:,0,0]*1.8 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.8*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - elif (x_max-x_min)<=30 and (x_max-x_min)>15: - con_scaled[:,0,0] = con_ind[:,0,0]*1.6 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.6*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - elif (x_max-x_min)>30 and (x_max-x_min)<100: - con_scaled[:,0,0] = con_ind[:,0,0]*1.35 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.35*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - else: - con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + + y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) + + mult = y_biger_than_x*y_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if x_differential[0]==0: + x_differential[0] = 0.1 + + if x_differential[-1]==0: + x_differential[-1]= 0.1 - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - x_max_expected = ( m_con_x*1.2*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - #print(x_max_expected, x_max_scaled, "x_max_scaled") - if x_max_expected<=x_max_scaled: - con_scaled[:,0,0] = con_scaled[:,0,0] - x_min_scaled + + if x_differential[0]==0.1: + x_differential[0] = x_differential[1] + if x_differential[-1]==0.1: + x_differential[-1] = x_differential[-2] - con_scaled[:,0,0] = con_scaled[:,0,0]*(x_max_expected - x_min_scaled)/ (x_max_scaled - x_min_scaled) - con_scaled[:,0,0] = con_scaled[:,0,0] + x_min_scaled + x_differential.append(x_differential[0]) + + x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + + x_differential = np.array(x_differential) + + con_scaled = con_ind*1 + + con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential + + con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 + con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + + con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 + con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + + con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 + con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + + con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 + con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 - - x_min_n = np.min( con_scaled[:,0,0] ) - y_min_n = np.min( con_scaled[:,0,1] ) - - x_mean_n = np.mean( con_scaled[:,0,0] ) - y_mean_n = np.mean( con_scaled[:,0,1] ) - - ##diff_x = (x_min_n - x_min)*1 - ##diff_y = (y_min_n - y_min)*1 - - diff_x = (x_mean_n - x_mean)*1 - diff_y = (y_mean_n - y_mean)*1 - - - con_scaled[:,0,0] = (con_scaled[:,0,0] - diff_x) - con_scaled[:,0,1] = (con_scaled[:,0,1] - diff_y) - - x_max_n = np.max( con_scaled[:,0,0] ) - y_max_n = np.max( con_scaled[:,0,1] ) - - diff_disp_x = (x_max_n - x_max) / 2. - diff_disp_y = (y_max_n - y_max) / 2. - - x_vals = np.array( np.abs(con_scaled[:,0,0] - diff_disp_x) ).astype(np.int16) - y_vals = np.array( np.abs(con_scaled[:,0,1] - diff_disp_y) ).astype(np.int16) - all_found_textline_polygons[0][i][:,0,0] = x_vals[:] - all_found_textline_polygons[0][i][:,0,1] = y_vals[:] - return all_found_textline_polygons - - def scale_contours_new(self, textline_mask_tot_ea): - - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons1 = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - - textline_mask_tot_ea_res = resize_image(textline_mask_tot_ea, int( textline_mask_tot_ea.shape[0]*1.6), textline_mask_tot_ea.shape[1]) - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea_res) - ##all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - for i in range(len(all_found_textline_polygons)): - - #x_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,0] ) - y_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,1] ) - #x_mean = np.mean( all_found_textline_polygons[i][:,0,0] ) - y_mean = np.mean( all_found_textline_polygons[i][:,0,1] ) + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - ydiff = y_mean - y_mean_1 + all_found_textline_polygons[0][i][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[0][i][:,0,0] = con_scaled[:,0, 0] - all_found_textline_polygons[i][:,0,1] = all_found_textline_polygons[i][:,0,1] - ydiff return all_found_textline_polygons - - def run(self): """ Get image and scales, then extract the page of scanned image @@ -4432,7 +4368,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.scale_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) order_text_new = [0] From 2d18739d9b267a14dfe0934b02772940976a8e72 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Sep 2024 15:08:09 +0200 Subject: [PATCH 028/107] postprocessing of textline contour dilation + skip layout and reading order passed as an argument --- qurator/eynollah/cli.py | 9 +++++++- qurator/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 357582c..b293403 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -201,6 +201,12 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i "-ncl", help="upper limit of columns in document image", ) +@click.option( + "--skip_layout_and_reading_order", + "-slro/-noslro", + is_flag=True, + help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.", +) @click.option( "--log_level", "-l", @@ -208,7 +214,7 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, ignore_page_extraction, log_level): +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -247,6 +253,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s do_ocr=do_ocr, num_col_upper=num_col_upper, num_col_lower=num_col_lower, + skip_layout_and_reading_order=skip_layout_and_reading_order, ) if dir_in: eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index cb70107..0619ef0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -180,6 +180,7 @@ class Eynollah: do_ocr=False, num_col_upper=None, num_col_lower=None, + skip_layout_and_reading_order = False, override_dpi=None, logger=None, pcgts=None, @@ -213,6 +214,7 @@ class Eynollah: self.allow_scaling = allow_scaling self.headers_off = headers_off self.ignore_page_extraction = ignore_page_extraction + self.skip_layout_and_reading_order = skip_layout_and_reading_order self.ocr = do_ocr if num_col_upper: self.num_col_upper = int(num_col_upper) @@ -1951,7 +1953,7 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False): + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -2019,7 +2021,7 @@ class Eynollah: textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - if not skip_layout_ro: + if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) #print(img_resized.shape, num_col_classifier, "num_col_classifier") @@ -3818,6 +3820,30 @@ class Eynollah: def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + def return_it_in_two_groups(self,x_differential): + split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 for ind in range(len(x_differential)-1)] + + split_masked = list( np.array(split[:])[np.array(split[:])!=-1] ) + + if 0 not in split_masked: + split_masked.insert(0, -1) + + split_masked.append(len(x_differential)-1) + + split_masked = np.array(split_masked) +1 + + sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) for ind in range(len(split_masked)-1)] + + indexes_to_bec_changed = [ind if ( np.abs(sums[ind-1]) > np.abs(sums[ind]) and np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 for ind in range(1,len(sums)-1) ] + + indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1] + + x_differential_new = np.copy(x_differential) + for i in indexes_to_bec_changed_filtered: + x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] + + return x_differential_new + def dilate_textlines(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] @@ -3863,6 +3889,8 @@ class Eynollah: y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + y_differential = self.return_it_in_two_groups(y_differential) + y_differential = np.array(y_differential) @@ -3890,7 +3918,6 @@ class Eynollah: else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) mult = y_biger_than_x*y_differential @@ -3918,8 +3945,10 @@ class Eynollah: x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + x_differential = self.return_it_in_two_groups(x_differential) x_differential = np.array(x_differential) + con_scaled = con_ind*1 con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential @@ -3949,8 +3978,6 @@ class Eynollah: Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - skip_layout_ro = True t0_tot = time.time() @@ -3968,7 +3995,7 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not skip_layout_ro: + if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4356,7 +4383,7 @@ class Eynollah: return pcgts #print("text region early 7 in %.1fs", time.time() - t0) else: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro) + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) From b9e8959c4aefb0b9d24efb99abc309d7d350163c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Sep 2024 16:33:13 +0200 Subject: [PATCH 029/107] update of light versions --- qurator/eynollah/eynollah.py | 238 ++++++++++++++++++----------------- 1 file changed, 126 insertions(+), 112 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0619ef0..c7407e2 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1862,7 +1862,10 @@ class Eynollah: def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') - thresholding_for_artificial_class_in_light_version = True#False + if self.textline_light: + thresholding_for_artificial_class_in_light_version = True#False + else: + thresholding_for_artificial_class_in_light_version = False if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) #img = img.astype(np.uint8) @@ -2016,7 +2019,7 @@ class Eynollah: #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) - textline_mask_tot_ea = self.run_textline(img_bin, num_col_classifier) + textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -2057,7 +2060,8 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=2) + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2097,6 +2101,7 @@ class Eynollah: polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) @@ -3845,132 +3850,139 @@ class Eynollah: return x_differential_new def dilate_textlines(self,all_found_textline_polygons): - for i in range(len(all_found_textline_polygons[0])): - con_ind = all_found_textline_polygons[0][i] - - con_ind = con_ind.astype(np.float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + for j in range(len(all_found_textline_polygons)): + for i in range(len(all_found_textline_polygons[j])): + con_ind = all_found_textline_polygons[j][i] - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + con_ind = con_ind.astype(np.float) - mult = x_biger_than_x*x_differential + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) - if y_differential[0]==0: - y_differential[0] = 0.1 + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + - if y_differential[-1]==0: - y_differential[-1]= 0.1 + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + mult = x_biger_than_x*x_differential - y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - - - if y_differential[0]==0.1: - y_differential[0] = y_differential[1] - if y_differential[-1]==0.1: - y_differential[-1] = y_differential[-2] + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) - y_differential.append(y_differential[0]) - - y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] - - y_differential = self.return_it_in_two_groups(y_differential) - - y_differential = np.array(y_differential) - - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 - con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 - - try: - con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 - con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 - con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 - - try: - con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 - con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 - except: - pass - - - else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - - mult = y_biger_than_x*y_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if x_differential[0]==0: - x_differential[0] = 0.1 - - if x_differential[-1]==0: - x_differential[-1]= 0.1 + if y_differential[0]==0: + y_differential[0] = 0.1 + if y_differential[-1]==0: + y_differential[-1]= 0.1 + + + + y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - - - if x_differential[0]==0.1: - x_differential[0] = x_differential[1] - if x_differential[-1]==0.1: - x_differential[-1] = x_differential[-2] + if y_differential[0]==0.1: + y_differential[0] = y_differential[1] + if y_differential[-1]==0.1: + y_differential[-1] = y_differential[-2] + + y_differential.append(y_differential[0]) - x_differential.append(x_differential[0]) - - x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] - - x_differential = self.return_it_in_two_groups(x_differential) - x_differential = np.array(x_differential) - - - con_scaled = con_ind*1 - - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 - con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + + y_differential = self.return_it_in_two_groups(y_differential) + + y_differential = np.array(y_differential) + + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential + + con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 + con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 + + try: + con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 + con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 + con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 + + try: + con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 + con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + except: + pass - con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 - con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 - con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 - con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + else: + y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) + + mult = y_biger_than_x*y_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if x_differential[0]==0: + x_differential[0] = 0.1 + + if x_differential[-1]==0: + x_differential[-1]= 0.1 + + + + x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] + + + if x_differential[0]==0.1: + x_differential[0] = x_differential[1] + if x_differential[-1]==0.1: + x_differential[-1] = x_differential[-2] + + x_differential.append(x_differential[0]) + + x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + + x_differential = self.return_it_in_two_groups(x_differential) + x_differential = np.array(x_differential) + + + con_scaled = con_ind*1 + + con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential + + con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 + con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + + try: + con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 + con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 + con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + + try: + con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 + con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + except: + pass + - con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 - con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - all_found_textline_polygons[0][i][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[0][i][:,0,0] = con_scaled[:,0, 0] + all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons def run(self): @@ -4207,6 +4219,8 @@ class Eynollah: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + else: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) From 5d680136a4ed752e398cd47d3be0fd5aaf698f13 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 21 Sep 2024 01:04:28 +0200 Subject: [PATCH 030/107] updating light version --- qurator/eynollah/eynollah.py | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c7407e2..629818f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -260,7 +260,7 @@ class Eynollah: if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -1916,11 +1916,7 @@ class Eynollah: prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) - - if self.textline_light: - return (prediction_textline[:, :, 0]==1)*1, (prediction_textline_longshot_true_size[:, :, 0]==1)*1 - else: - return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] + return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): @@ -1996,7 +1992,7 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout):# or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) @@ -4066,8 +4062,35 @@ class Eynollah: t1 = time.time() #plt.imshow(table_prediction) #plt.show() - + if self.light_version and num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) t1 = time.time() @@ -4222,18 +4245,20 @@ class Eynollah: all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: From 7f08458436d1f6aad43f809b3a388c8c275d44f7 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 21 Sep 2024 14:39:54 +0200 Subject: [PATCH 031/107] dilation of text regions without opencv --- qurator/eynollah/eynollah.py | 84 +++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 629818f..b2dea47 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -37,9 +37,7 @@ from tensorflow.keras.models import load_model sys.stderr = stderr tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") -from scipy.signal import find_peaks import matplotlib.pyplot as plt -from scipy.ndimage import gaussian_filter1d from tensorflow.python.keras.backend import set_session from tensorflow.keras import layers @@ -2056,8 +2054,8 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2097,6 +2095,8 @@ class Eynollah: polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -3845,6 +3845,79 @@ class Eynollah: return x_differential_new + def dilate_textregions_contours(self,all_found_textline_polygons): + for j in range(len(all_found_textline_polygons)): + + con_ind = all_found_textline_polygons[j] + + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + x_differential = gaussian_filter1d(x_differential, 3) + y_differential = gaussian_filter1d(y_differential, 3) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + + ###inc_x =list(inc_x) + ###inc_x.append(inc_x[0]) + + ###inc_y =list(inc_y) + ###inc_y.append(inc_y[0]) + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons + + + + + + + + def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): @@ -4096,7 +4169,7 @@ class Eynollah: t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - + polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: if not self.light_version: img_bin_light = None @@ -4230,6 +4303,7 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + txt_con_org = self.dilate_textregions_contours(txt_con_org) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) From 62f8ae486043ddf9e39b057e754cc28081275ce3 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 23 Sep 2024 14:03:07 +0200 Subject: [PATCH 032/107] updating dilation of textlines and text regions --- qurator/eynollah/eynollah.py | 96 +++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b2dea47..fb2d699 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3855,6 +3855,7 @@ class Eynollah: x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) + x_differential = gaussian_filter1d(x_differential, 3) y_differential = gaussian_filter1d(y_differential, 3) @@ -3912,6 +3913,93 @@ class Eynollah: return all_found_textline_polygons + def dilate_textline_contours(self,all_found_textline_polygons): + for j in range(len(all_found_textline_polygons)): + for ij in range(len(all_found_textline_polygons[j])): + + con_ind = all_found_textline_polygons[j][ij] + + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + x_differential = gaussian_filter1d(x_differential, 3) + y_differential = gaussian_filter1d(y_differential, 3) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + + #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) + ##if (y_max-y_min)<40: + ##dilation_m1 = 5 + ##dilation_m2 = int(dilation_m1/2.) +1 + ##else: + ##dilation_m1 = 12 + ##dilation_m2 = int(dilation_m1/2.) +1 + + if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 1/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + else: + dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + dilation_m2 = int(dilation_m1/2.) +1 + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + ###inc_x =list(inc_x) + ###inc_x.append(inc_x[0]) + + ###inc_y =list(inc_y) + ###inc_y.append(inc_y[0]) + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons @@ -4174,6 +4262,7 @@ class Eynollah: if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -4304,6 +4393,7 @@ class Eynollah: if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) txt_con_org = self.dilate_textregions_contours(txt_con_org) + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) @@ -4316,7 +4406,9 @@ class Eynollah: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) @@ -4508,7 +4600,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) order_text_new = [0] From 6626dc68660d239cf8a4a15b64e8bb670e395409 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 23 Sep 2024 15:50:37 +0200 Subject: [PATCH 033/107] updating textline dilation parameters --- qurator/eynollah/eynollah.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index fb2d699..a69854d 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3950,16 +3950,26 @@ class Eynollah: ##dilation_m1 = 12 ##dilation_m2 = int(dilation_m1/2.) +1 - if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 2/20.0 ) elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 2/20.0 ) elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: dilation_m1 = int( (x_max-x_min) * 1/20.0 ) else: dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + + if dilation_m1>12: + dilation_m1 = 12 + if dilation_m1<4: + dilation_m1 = 4 + #print(dilation_m1, 'dilation_m1') dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): From b33739adeef5cd40b48faa3a955cd1d473b5e250 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 24 Sep 2024 16:06:27 +0200 Subject: [PATCH 034/107] parametriyation in the case of textline contours dilation is accomplished --- qurator/eynollah/eynollah.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index a69854d..8c0979d 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3919,6 +3919,8 @@ class Eynollah: con_ind = all_found_textline_polygons[j][ij] + area = cv2.contourArea(con_ind) + con_ind = con_ind.astype(np.float) x_differential = np.diff( con_ind[:,0,0]) @@ -3943,6 +3945,7 @@ class Eynollah: #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) + #print(area / (x_max-x_min)) ##if (y_max-y_min)<40: ##dilation_m1 = 5 ##dilation_m2 = int(dilation_m1/2.) +1 @@ -3950,20 +3953,26 @@ class Eynollah: ##dilation_m1 = 12 ##dilation_m2 = int(dilation_m1/2.) +1 - if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 5/20.0 ) - elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 2/20.0 ) - elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 5/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 2/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + #########if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 2/20.0 ) + #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 1/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 2/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + #########else: + #########dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.35) else: - dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + dilation_m1 = round(area / (y_max-y_min) * 0.35) + if dilation_m1>12: dilation_m1 = 12 From 95effe54a0159811b80c7ca5bd9147d196ef5187 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 25 Sep 2024 20:00:53 +0200 Subject: [PATCH 035/107] updating textregions dilation --- qurator/eynollah/eynollah.py | 151 ++++++++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 12 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8c0979d..794ebe6 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2054,7 +2054,7 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -3846,18 +3846,22 @@ class Eynollah: return x_differential_new def dilate_textregions_contours(self,all_found_textline_polygons): + #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): con_ind = all_found_textline_polygons[j] - + area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) + con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.1) + con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.1) + x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, 3) - y_differential = gaussian_filter1d(y_differential, 3) + x_differential = gaussian_filter1d(x_differential, .5) + y_differential = gaussian_filter1d(y_differential, .5) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) @@ -3873,23 +3877,54 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.12) + else: + dilation_m1 = round(area / (y_max-y_min) * 0.12) + + if dilation_m1>8: + dilation_m1 = 8 + if dilation_m1<5: + dilation_m1 = 5 + #print(dilation_m1, 'dilation_m1') + dilation_m2 = int(dilation_m1/2.) +1 + for i in range(len(x_differential)): if abs_diff[i]==0: - inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and abs_diff[i]>=3: if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) else: - inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) else: - inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + ###for i in range(len(x_differential)): + ###if abs_diff[i]==0: + ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + + ###elif abs_diff[i]!=0 and abs_diff[i]>=3: + ###if abs(x_differential[i])>abs(y_differential[i]): + ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + ###else: + ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + ###else: + ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) ###inc_x =list(inc_x) ###inc_x.append(inc_x[0]) @@ -3908,6 +3943,98 @@ class Eynollah: con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + #print(results,'results') + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + #print(area_scaled / area, "ratio") + #print(results,'results') + #if results[0]==1 and diff_result[-1]==-2: + ##indices_2 = indices_2[1:] + ##indices_m2 = indices_m2[1:] + + #con_scaled[:indices_m2[0]+1,0, 1] = con_scaled[indices_m2[-1],0, 1] + #con_scaled[:indices_m2[0]+1,0, 0] = con_scaled[indices_m2[-1],0, 0] + + + #con_scaled[indices_2[-1]+1:,0, 1] = con_scaled[indices_m2[-1],0, 1] + #con_scaled[indices_2[-1]+1:,0, 0] = con_scaled[indices_m2[-1],0, 0] + + #indices_2 = indices_2[:-1] + #indices_m2 = indices_m2[1:-1] + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + #indices_2 = indices_2[1:] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + indices_2 = indices_2[:-1] + + + + #diff_neg_pos = np.array(indices_m2) - np.array(indices_2) + + + #print(diff_neg_pos,'diff') + ##print(indices_2, 'indices_2') + #indices_2 = np.array(indices_2)[diff_neg_pos>1] + #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] + + for ii in range(len(indices_2)): + + #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] + #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] + + #if x_inner[-1]>=x_inner[0]: + #x_interest = np.min(x_inner) + #else: + #x_interest = np.max(x_inner) + + #if y_inner[-1]>=y_inner[0]: + #y_interest = np.min(y_inner) + #else: + #y_interest = np.max(y_inner) + + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + + + + #con_scaled[:,0, 1][results[:]>0] = con_ind[:,0,1][results[:]>0] + #con_scaled[:,0, 0][results[:]>0] = con_ind[:,0,0][results[:]>0] + + #print(list(results), 'results') + #print(list(diff_result), 'diff_result') + #print(indices_2,'2') + #print(indices_m2,'-2') + #print(diff_neg_pos,'diff_neg_pos') + + #con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) + #con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) + + con_scaled[-1,0, 1] = con_scaled[0,0, 1] + con_scaled[-1,0, 0] = con_scaled[0,0, 0] all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons From 133091137dc01f04eedf153119a04559a8f0633d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 27 Sep 2024 13:57:01 +0200 Subject: [PATCH 036/107] dilation of textregions and marginals are accomplished --- qurator/eynollah/eynollah.py | 454 ++++++++++++++++++++++++----------- 1 file changed, 314 insertions(+), 140 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 794ebe6..2fe7325 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1050,7 +1050,7 @@ class Eynollah: #del model #gc.collect() return prediction_true - def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_percent=0.1): + def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -1064,14 +1064,14 @@ class Eynollah: label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - seg_not_base = label_p_pred[0,:,:,4] + #seg_not_base = label_p_pred[0,:,:,4] - seg_not_base[seg_not_base>0.4] =1 - seg_not_base[seg_not_base<1] =0 + #seg_not_base[seg_not_base>0.4] =1 + #seg_not_base[seg_not_base<1] =0 seg = np.argmax(label_p_pred, axis=3)[0] - seg[seg_not_base==1]=4 + #seg[seg_not_base==1]=4 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) @@ -1099,6 +1099,16 @@ class Eynollah: nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): @@ -1120,44 +1130,57 @@ class Eynollah: if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model + + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + #img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + #verbose=0) + #seg = np.argmax(label_p_pred, axis=3)[0] - seg_not_base = label_p_pred[0,:,:,4] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_not_base = label_p_pred[0,:,:,4] + ########seg2 = -label_p_pred[0,:,:,2] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + ######seg_not_base[seg_not_base>0.03] =1 + ######seg_not_base[seg_not_base<1] =0 - seg_test = label_p_pred[0,:,:,1] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_test = label_p_pred[0,:,:,1] + ########seg2 = -label_p_pred[0,:,:,2] - seg_test[seg_test>0.75] =1 - seg_test[seg_test<1] =0 + ######seg_test[seg_test>0.75] =1 + ######seg_test[seg_test<1] =0 - seg_line = label_p_pred[0,:,:,3] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_line = label_p_pred[0,:,:,3] + ########seg2 = -label_p_pred[0,:,:,2] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 + ######seg_line[seg_line>0.1] =1 + ######seg_line[seg_line<1] =0 - seg_background = label_p_pred[0,:,:,0] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_background = label_p_pred[0,:,:,0] + ########seg2 = -label_p_pred[0,:,:,2] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 + ######seg_background[seg_background>0.25] =1 + ######seg_background[seg_background<1] =0 ##seg = seg+seg2 #seg = label_p_pred[0,:,:,2] #seg[seg>0.4] =1 @@ -1170,56 +1193,221 @@ class Eynollah: ##plt.show() #seg[seg==1]=0 #seg[seg_test==1]=1 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + ######seg[seg_not_base==1]=4 + ######seg[seg_background==1]=0 + ######seg[(seg_line==1) & (seg==0)]=3 + #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + + #if i == 0 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i == 0 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i == 0 and j != 0 and j != nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j != 0 and j != nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i != 0 and i != nxf - 1 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + #elif i != 0 and i != nxf - 1 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color + #else: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + + + if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) return prediction_true @@ -1963,7 +2151,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 800#1000 + img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -1971,17 +2159,17 @@ class Eynollah: img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: - img_w_new = 1600#2000 + img_w_new = 2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 4: - img_w_new = 1900#2500 + img_w_new = 2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: - img_w_new = 2300#3000 + img_w_new = 3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 3000#4000 + img_w_new = 4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) @@ -2025,17 +2213,17 @@ class Eynollah: if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region, n_batch_inference=1) else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light, n_batch_inference=3) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2, n_batch_inference=1) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -2054,8 +2242,12 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + ##if num_col_classifier == 1 or num_col_classifier == 2: + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + + mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) + mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -3150,7 +3342,14 @@ class Eynollah: pixel_img = 4 min_area_mar = 0.00001 - polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) + if self.light_version: + marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = marginal_mask.astype('uint8') + marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) + + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) + else: + polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) @@ -3241,7 +3440,15 @@ class Eynollah: pixel_img = 4 min_area_mar = 0.00001 - polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) + + if self.light_version: + marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = marginal_mask.astype('uint8') + marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) + + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) + else: + polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) @@ -3850,18 +4057,19 @@ class Eynollah: for j in range(len(all_found_textline_polygons)): con_ind = all_found_textline_polygons[j] + #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.1) - con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.1) + #con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.5) + #con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.5) x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, .5) - y_differential = gaussian_filter1d(y_differential, .5) + x_differential = gaussian_filter1d(x_differential, 0.1) + y_differential = gaussian_filter1d(y_differential, 0.1) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) @@ -3885,8 +4093,8 @@ class Eynollah: if dilation_m1>8: dilation_m1 = 8 - if dilation_m1<5: - dilation_m1 = 5 + if dilation_m1<6: + dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') dilation_m2 = int(dilation_m1/2.) +1 @@ -4002,7 +4210,6 @@ class Eynollah: #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] for ii in range(len(indices_2)): - #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] @@ -4030,11 +4237,12 @@ class Eynollah: #print(indices_m2,'-2') #print(diff_neg_pos,'diff_neg_pos') - #con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) - #con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) + ##con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) + ##con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) - con_scaled[-1,0, 1] = con_scaled[0,0, 1] - con_scaled[-1,0, 0] = con_scaled[0,0, 0] + #con_scaled[-1,0, 1] = con_scaled[0,0, 1] + #con_scaled[-1,0, 0] = con_scaled[0,0, 0] + ##print(len(con_scaled[:,0,0]),'con_scaled[:,0,0]') all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons @@ -4045,7 +4253,7 @@ class Eynollah: for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] - + print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -4069,31 +4277,6 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) - - - #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) - #print(area / (x_max-x_min)) - ##if (y_max-y_min)<40: - ##dilation_m1 = 5 - ##dilation_m2 = int(dilation_m1/2.) +1 - ##else: - ##dilation_m1 = 12 - ##dilation_m2 = int(dilation_m1/2.) +1 - - #########if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 5/20.0 ) - #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 2/20.0 ) - #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 5/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 2/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 1/20.0 ) - #########else: - #########dilation_m1 = int( (y_max-y_min) * 4/20.0 ) if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.35) @@ -4126,11 +4309,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - ###inc_x =list(inc_x) - ###inc_x.append(inc_x[0]) - - ###inc_y =list(inc_y) - ###inc_y.append(inc_y[0]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4146,11 +4324,6 @@ class Eynollah: all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons - - - - - def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): @@ -4403,12 +4576,12 @@ class Eynollah: t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -4537,9 +4710,10 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - txt_con_org = self.dilate_textregions_contours(txt_con_org) contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) From ad323162173f651e9c5f2cb28804c23a582432d5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 27 Sep 2024 20:59:01 +0200 Subject: [PATCH 037/107] updating light version --- qurator/eynollah/eynollah.py | 46 +++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2fe7325..72a72d9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -541,6 +541,7 @@ class Eynollah: img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification(img) + if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) if self.input_binary: @@ -611,6 +612,10 @@ class Eynollah: width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) + + self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :] + self.page_coord = page_coord + if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) @@ -737,7 +742,7 @@ class Eynollah: def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) - self.image = self.image.astype(np.uint8) + #self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] @@ -1059,19 +1064,18 @@ class Eynollah: if not patches: img_h_page = img.shape[0] img_w_page = img.shape[1] - img = img / float(255.0) + img = img / 255.0 img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - - #seg_not_base = label_p_pred[0,:,:,4] - - #seg_not_base[seg_not_base>0.4] =1 - #seg_not_base[seg_not_base<1] =0 - seg = np.argmax(label_p_pred, axis=3)[0] - #seg[seg_not_base==1]=4 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[0,:,:,4] + seg_art[seg_art<0.1] =0 + seg_art[seg_art>0] =1 + seg[seg_art==1]=4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) @@ -2151,7 +2155,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 1000 + img_w_new = 800 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -2206,29 +2210,39 @@ class Eynollah: textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) + + #print(self.image_org.shape) + + #plt.imshwo(self.image_page_org_size) + #plt.show() if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) - #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region, n_batch_inference=1) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = False) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light, n_batch_inference=3) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2, n_batch_inference=1) + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=False) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) + #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) @@ -3195,7 +3209,7 @@ class Eynollah: scale = 1 if is_image_enhanced: if self.allow_enhancement: - img_res = img_res.astype(np.uint8) + #img_res = img_res.astype(np.uint8) self.get_image_and_scales(img_org, img_res, scale) if self.plotter: self.plotter.save_enhanced_image(img_res) From 1774076f4a9536ae68d9ab0a982bb84f65c8d858 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 30 Sep 2024 16:10:29 +0200 Subject: [PATCH 038/107] updating light version. Remove textlines or textregion contours inside a bigger one --- qurator/eynollah/eynollah.py | 124 ++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 10 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 72a72d9..cbc7b88 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylay12sp_0_2"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1071,8 +1071,13 @@ class Eynollah: seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: + #seg_text = label_p_pred[0,:,:,1] + #seg_text[seg_text<0.2] =0 + #seg_text[seg_text>0] =1 + #seg[seg_text==1]=1 + seg_art = label_p_pred[0,:,:,4] - seg_art[seg_art<0.1] =0 + seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 @@ -2159,7 +2164,7 @@ class Eynollah: img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 1300#1500 + img_w_new = 1500#1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: @@ -2222,7 +2227,7 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = False) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) @@ -2232,7 +2237,7 @@ class Eynollah: else: if num_col_classifier == 1 or num_col_classifier == 2: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=False) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) @@ -2249,16 +2254,19 @@ class Eynollah: img_bin = resize_image(img_bin,img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_texts_only = mask_texts_only.astype('uint8') - ##if num_col_classifier == 1 or num_col_classifier == 2: - ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + #if num_col_classifier == 1 or num_col_classifier == 2: + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) @@ -4110,6 +4118,7 @@ class Eynollah: if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') + dilation_m1 = 5 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): @@ -4267,7 +4276,6 @@ class Eynollah: for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] - print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -4303,7 +4311,7 @@ class Eynollah: if dilation_m1<4: dilation_m1 = 4 #print(dilation_m1, 'dilation_m1') - dilation_m2 = int(dilation_m1/2.) +1 + dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): if abs_diff[i]==0: @@ -4339,6 +4347,100 @@ class Eynollah: all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons + def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None, type_contour="textregion"): + if type_contour=="textregion": + areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] + area_tot = image.shape[0]*image.shape[1] + + M_main = [cv2.moments(contours[j]) for j in range(len(contours))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot + contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] + contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] + + #contours_> = [contours[ind] for ind in contours_index_big] + indexes_to_be_removed = [] + for ind_small in contours_index_small: + results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big ] + if marginal_cnts: + results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in range(len(marginal_cnts)) ] + results_marginal = np.array(results_marginal) + + if np.any(results_marginal==1): + indexes_to_be_removed.append(ind_small) + + results = np.array(results) + + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + + + if len(indexes_to_be_removed)>0: + indexes_to_be_removed = np.unique(indexes_to_be_removed) + for ind in indexes_to_be_removed: + contours.pop(ind) + return contours + + + else: + contours_txtline_of_all_textregions = [] + + for jj in range(len(contours)): + contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] + + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] + cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + + areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] + area_tot_tot = image.shape[0]*image.shape[1] + + areas_ratio_tot = np.array(areas_tot)/ area_tot_tot + + contours_index_big_tot = [ind for ind in range(len(contours_txtline_of_all_textregions)) if areas_ratio_tot[ind] >= 1e-2] + + + for jj in range(len(contours)): + contours_in = contours[jj] + #print(len(contours_in)) + areas = [cv2.contourArea(con_ind) for con_ind in contours_in] + area_tot = image.shape[0]*image.shape[1] + + M_main = [cv2.moments(contours_in[j]) for j in range(len(contours_in))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot + + if len(areas_ratio)>=1: + #print(np.max(areas_ratio), np.min(areas_ratio)) + contours_index_small = [ind for ind in range(len(contours_in)) if areas_ratio[ind] < 1e-2] + #contours_index_big = [ind for ind in range(len(contours_in)) if areas_ratio[ind] >= 1e-3] + + if len(contours_index_small)>0: + indexes_to_be_removed = [] + for ind_small in contours_index_small: + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big_tot ] + + results = np.array(results) + + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + + + if len(indexes_to_be_removed)>0: + indexes_to_be_removed = np.unique(indexes_to_be_removed) + + for ind in indexes_to_be_removed: + contours[jj].pop(ind) + + return contours + + + + def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): @@ -4725,6 +4827,7 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) @@ -4742,6 +4845,7 @@ class Eynollah: #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) else: From ab63d5ba408a3dfe42ee897b5e6976d4fc501bdd Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 30 Sep 2024 21:28:39 +0200 Subject: [PATCH 039/107] updating light version features --- qurator/eynollah/eynollah.py | 105 +++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 42 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index cbc7b88..61289fa 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2225,10 +2225,13 @@ class Eynollah: if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + else: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) @@ -2236,9 +2239,12 @@ class Eynollah: ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + else: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) @@ -4356,6 +4362,8 @@ class Eynollah: cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] @@ -4379,64 +4387,75 @@ class Eynollah: if len(indexes_to_be_removed)>0: indexes_to_be_removed = np.unique(indexes_to_be_removed) + indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1] for ind in indexes_to_be_removed: contours.pop(ind) + return contours else: contours_txtline_of_all_textregions = [] + indexes_of_textline_tot = [] + index_textline_inside_textregion = [] for jj in range(len(contours)): contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] + ind_ins = np.zeros( len(contours[jj]) ) + jj + list_ind_ins = list(ind_ins) + + ind_textline_inside_tr = np.array (range(len(contours[jj])) ) + + list_ind_textline_inside_tr = list(ind_textline_inside_tr) + + index_textline_inside_textregion = index_textline_inside_textregion + list_ind_textline_inside_tr + + indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins + + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] area_tot_tot = image.shape[0]*image.shape[1] - areas_ratio_tot = np.array(areas_tot)/ area_tot_tot - - contours_index_big_tot = [ind for ind in range(len(contours_txtline_of_all_textregions)) if areas_ratio_tot[ind] >= 1e-2] - - - for jj in range(len(contours)): - contours_in = contours[jj] - #print(len(contours_in)) - areas = [cv2.contourArea(con_ind) for con_ind in contours_in] - area_tot = image.shape[0]*image.shape[1] + textregion_index_to_del = [] + textline_in_textregion_index_to_del = [] + for ij in range(len(contours_txtline_of_all_textregions)): - M_main = [cv2.moments(contours_in[j]) for j in range(len(contours_in))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) - areas_ratio = np.array(areas)/ area_tot + args_all.pop(ij) - if len(areas_ratio)>=1: - #print(np.max(areas_ratio), np.min(areas_ratio)) - contours_index_small = [ind for ind in range(len(contours_in)) if areas_ratio[ind] < 1e-2] - #contours_index_big = [ind for ind in range(len(contours_in)) if areas_ratio[ind] >= 1e-3] - - if len(contours_index_small)>0: - indexes_to_be_removed = [] - for ind_small in contours_index_small: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big_tot ] - - results = np.array(results) + areas_without = np.array(areas_tot)[args_all] + area_of_con_interest = areas_tot[ij] + + args_with_bigger_area = np.array(args_all)[areas_without > area_of_con_interest] + + if len(args_with_bigger_area)>0: + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] + results = np.array(results) + if np.any(results==1): + #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) + textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) + textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) + #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) - if np.any(results==1): - indexes_to_be_removed.append(ind_small) - - - if len(indexes_to_be_removed)>0: - indexes_to_be_removed = np.unique(indexes_to_be_removed) - - for ind in indexes_to_be_removed: - contours[jj].pop(ind) - - return contours + uniqe_args_trs = np.unique(textregion_index_to_del) + + for ind_u_a_trs in uniqe_args_trs: + textline_in_textregion_index_to_del_ind = np.array(textline_in_textregion_index_to_del)[np.array(textregion_index_to_del)==ind_u_a_trs] + textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] + + for ittrd in textline_in_textregion_index_to_del_ind: + contours[ind_u_a_trs].pop(ittrd) + + return contours + + @@ -4852,6 +4871,8 @@ class Eynollah: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) From 543ed4bc38b94acf53f48a9224b97322cade0e5b Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 2 Oct 2024 14:09:13 +0200 Subject: [PATCH 040/107] -light version need -tll to be enabled otherwise the process will be ended. --- qurator/eynollah/cli.py | 3 ++ qurator/eynollah/eynollah.py | 63 +++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index b293403..4c762a8 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -227,6 +227,9 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s if textline_light and not light_version: print('Error: You used -tll to enable light textline detection but -light is not enabled') sys.exit(1) + if light_version and not textline_light: + print('Error: You used -light without -tll. Light version need light textline to be enabled.') + sys.exit(1) eynollah = Eynollah( image_filename=image, dir_out=out, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 61289fa..6b8193c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylay12sp_0_2"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1055,6 +1055,35 @@ class Eynollah: #del model #gc.collect() return prediction_true + def do_padding_with_scale(self,img, scale): + h_n = int(img.shape[0]*scale) + w_n = int(img.shape[1]*scale) + + channel0_avg = int( np.mean(img[:,:,0]) ) + channel1_avg = int( np.mean(img[:,:,1]) ) + channel2_avg = int( np.mean(img[:,:,2]) ) + + h_diff = img.shape[0] - h_n + w_diff = img.shape[1] - w_n + + h_start = int(h_diff / 2.) + w_start = int(w_diff / 2.) + + img_res = resize_image(img, h_n, w_n) + #label_res = resize_image(label, h_n, w_n) + + img_scaled_padded = np.copy(img) + + #label_scaled_padded = np.zeros(label.shape) + + img_scaled_padded[:,:,0] = channel0_avg + img_scaled_padded[:,:,1] = channel1_avg + img_scaled_padded[:,:,2] = channel2_avg + + img_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = img_res[:,:,:] + #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] + + return img_scaled_padded#, label_scaled_padded def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") @@ -4349,6 +4378,38 @@ class Eynollah: con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + indices_2 = indices_2[:-1] + + + for ii in range(len(indices_2)): + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons From 1da4b7f589af94beea75157b80c0a7ecb6a213de Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 7 Oct 2024 10:55:10 +0200 Subject: [PATCH 041/107] updating light version --- qurator/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 6b8193c..2c14ab9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -2189,7 +2189,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 800 + img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -2299,9 +2299,9 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - #if num_col_classifier == 1 or num_col_classifier == 2: - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + ##if num_col_classifier == 1 or num_col_classifier == 2: + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) @@ -4153,7 +4153,7 @@ class Eynollah: if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') - dilation_m1 = 5 + dilation_m1 = 6 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): @@ -4657,6 +4657,31 @@ class Eynollah: all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons + + def delete_regions_without_textlines(self,slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): + slopes_rem = [] + all_found_textline_polygons_rem = [] + boxes_text_rem = [] + txt_con_org_rem = [] + contours_only_text_parent_rem = [] + index_by_text_par_con_rem = [] + + for i, ind_con in enumerate(all_found_textline_polygons): + if len(ind_con): + all_found_textline_polygons_rem.append(ind_con) + slopes_rem.append(slopes[i]) + boxes_text_rem.append(boxes_text[i]) + txt_con_org_rem.append(txt_con_org[i]) + contours_only_text_parent_rem.append(contours_only_text_parent[i]) + index_by_text_par_con_rem.append(index_by_text_par_con[i]) + + index_sort = np.argsort(index_by_text_par_con_rem) + indexes_new = np.array(range(len(index_by_text_par_con_rem))) + + index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))] + + return slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort + def run(self): """ Get image and scales, then extract the page of scanned image @@ -4923,6 +4948,9 @@ class Eynollah: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") @@ -5121,6 +5149,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") order_text_new = [0] From 3ef4eac24ca5d876243c62860ad9d4fa05110081 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 17 Oct 2024 19:12:28 +0200 Subject: [PATCH 042/107] textlines of textregions are extracted in a faster way + early layout for all documents is done with no patches model and on rgb input --- qurator/eynollah/eynollah.py | 120 +++++++++++++++++++--------- qurator/eynollah/utils/marginals.py | 65 ++------------- 2 files changed, 89 insertions(+), 96 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2c14ab9..fd66b81 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1710,6 +1710,36 @@ class Eynollah: self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + + polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) + + M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] + cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + + args_textlines = np.array(range(len(polygons_of_textlines))) + all_found_textline_polygons = [] + slopes = [] + all_box_coord =[] + + for index, con_region_ind in enumerate(contours_par): + results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) for ind in args_textlines ] + results = np.array(results) + + indexes_in = args_textlines[results==1] + + textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] + + all_found_textline_polygons.append(textlines_ins) + slopes.append(0) + + _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + + all_box_coord.append(crop_coor) + + return slopes, all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))) + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") if len(contours)>15: @@ -2099,14 +2129,14 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) @@ -2216,14 +2246,14 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - #print("inside bin ", time.time()-t_bin) + print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2236,7 +2266,7 @@ class Eynollah: else: img_bin = np.copy(img_resized) - #print("inside 1 ", time.time()-t_in) + print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2246,14 +2276,15 @@ class Eynollah: #print(self.image_org.shape) + #cv2.imwrite('out_13.png', self.image_page_org_size) #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: - #print("inside 2 ", time.time()-t_in) + print("inside 2 ", time.time()-t_in) if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: + if num_col_classifier == 1 or num_col_classifier >= 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) @@ -2267,7 +2298,7 @@ class Eynollah: ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - if num_col_classifier == 1 or num_col_classifier == 2: + if num_col_classifier == 1 or num_col_classifier >= 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) else: @@ -2278,7 +2309,7 @@ class Eynollah: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - #print("inside 3 ", time.time()-t_in) + print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2356,7 +2387,15 @@ class Eynollah: text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - #print("inside 4 ", time.time()-t_in) + + #plt.imshow(textline_mask_tot_ea) + #plt.show() + + textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0 + + #plt.imshow(textline_mask_tot_ea) + #plt.show() + print("inside 4 ", time.time()-t_in) return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) @@ -3308,7 +3347,7 @@ class Eynollah: if self.tables: regions_without_separators[table_prediction==1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) - text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=KERNEL) + text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) @@ -3319,6 +3358,7 @@ class Eynollah: def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts): self.logger.debug('enter run_boxes_no_full_layout') + t_0_box = time.time() if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -3328,6 +3368,7 @@ class Eynollah: if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3340,7 +3381,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - + print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: @@ -3350,6 +3391,7 @@ class Eynollah: else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) @@ -3378,7 +3420,7 @@ class Eynollah: img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - + print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: @@ -3410,7 +3452,7 @@ class Eynollah: pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -3751,8 +3793,10 @@ class Eynollah: img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - - model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + if self.dir_in: + pass + else: + self.model_reading_order_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) height1 =672#448 width1 = 448#224 @@ -3793,7 +3837,7 @@ class Eynollah: img3 = img3.astype(np.uint16) - inference_bs = 4 + inference_bs = 3 input_1= np.zeros( (inference_bs, height1, width1,3)) starting_list_of_regions = [] starting_list_of_regions.append( list(range(labels_con.shape[2])) ) @@ -3835,7 +3879,7 @@ class Eynollah: batch_counter = batch_counter+1 if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr=model_ro_machine.predict(input_1 , verbose=0) + y_pr=self.model_reading_order_machine.predict(input_1 , verbose=0) if batch_counter==inference_bs: iteration_batches = inference_bs @@ -4698,16 +4742,16 @@ class Eynollah: t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - #print("text region early -11 in %.1fs", time.time() - t0) + print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) + print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) + print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: @@ -4720,17 +4764,17 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) + print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) + print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) + print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4751,7 +4795,7 @@ class Eynollah: continue else: return pcgts - #print("text region early in %.1fs", time.time() - t0) + print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -4793,7 +4837,8 @@ class Eynollah: image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) + print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) @@ -4807,7 +4852,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - #print("text region early 2 in %.1fs", time.time() - t0) + print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -4929,7 +4974,7 @@ class Eynollah: else: pass - #print("text region early 3 in %.1fs", time.time() - t0) + print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) @@ -4938,14 +4983,17 @@ class Eynollah: #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) + print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) + print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir if not self.curved_line: if self.light_version: if self.textline_light: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) @@ -4974,7 +5022,7 @@ class Eynollah: all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) + print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) @@ -5134,7 +5182,7 @@ class Eynollah: self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + print("text region early 7 in %.1fs", time.time() - t0) else: _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) diff --git a/qurator/eynollah/utils/marginals.py b/qurator/eynollah/utils/marginals.py index 7c43de6..984156f 100644 --- a/qurator/eynollah/utils/marginals.py +++ b/qurator/eynollah/utils/marginals.py @@ -8,7 +8,7 @@ from .contour import find_new_features_of_contours, return_contours_of_intereste from .resize import resize_image from .rotate import rotate_image -def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None): +def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_version=False, kernel=None): mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) mask_marginals=mask_marginals.astype(np.uint8) @@ -49,27 +49,14 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N if thickness_along_y_percent>=14: text_with_lines_y_rev=-1*text_with_lines_y[:] - #print(text_with_lines_y) - #print(text_with_lines_y_rev) - - - - - #plt.plot(text_with_lines_y) - #plt.show() - text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) - #plt.plot(text_with_lines_y_rev) - #plt.show() sigma_gaus=1 region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) - #plt.plot(region_sum_0_rev) - #plt.show() region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None)) @@ -78,43 +65,17 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N last_nonzero=len(region_sum_0)-last_nonzero - ##img_sum_0_smooth_rev=-region_sum_0 - - mid_point=(last_nonzero+first_nonzero)/2. one_third_right=(last_nonzero-mid_point)/3.0 one_third_left=(mid_point-first_nonzero)/3.0 - #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) - - - - peaks, _ = find_peaks(text_with_lines_y_rev, height=0) - - peaks=np.array(peaks) - - - #print(region_sum_0[peaks]) - ##plt.plot(region_sum_0) - ##plt.plot(peaks,region_sum_0[peaks],'*') - ##plt.show() - #print(first_nonzero,last_nonzero,peaks) peaks=peaks[(peaks>first_nonzero) & ((peaksmid_point] @@ -137,9 +98,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N - - #print(point_left,point_right) - #print(text_regions.shape) if point_right>=mask_marginals.shape[1]: point_right=mask_marginals.shape[1]-1 @@ -148,10 +106,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N except: mask_marginals[:,:]=1 - #print(mask_marginals.shape,point_left,point_right,'nadosh') mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew) - #print(mask_marginals_rotated.shape,'nadosh') mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 @@ -168,11 +124,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N max_point_of_right_marginal=text_regions.shape[1]-1 - #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') - #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') - #plt.imshow(mask_marginals) - #plt.show() - #plt.imshow(mask_marginals_rotated) #plt.show() @@ -195,10 +146,9 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N x_min_marginals_right=[] for i in range(len(cx_text_only)): - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) - #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') + if x_width_mar>16 and y_height_mar/x_width_mar<18: marginlas_should_be_main_text.append(polygons_of_marginals[i]) if x_min_text_only[i]<(mid_point-one_third_left): @@ -220,18 +170,13 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N x_min_marginals_right=[text_regions.shape[1]-1] - - - #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') - - #print(marginlas_should_be_main_text,'marginlas_should_be_main_text') text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) - #print(np.unique(text_regions)) #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - + + text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 From f93fa12441104324ee8e7ced0488b44827704de3 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 18 Oct 2024 09:14:42 +0200 Subject: [PATCH 043/107] doing more multiprocessing in order to make the process faster --- qurator/eynollah/eynollah.py | 92 +++--- qurator/eynollah/utils/__init__.py | 93 +----- qurator/eynollah/utils/contour.py | 73 ++++- qurator/eynollah/utils/separate_lines.py | 386 +++++++++++++++++------ 4 files changed, 407 insertions(+), 237 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index fd66b81..79724cc 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2253,7 +2253,7 @@ class Eynollah: else: prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - print("inside bin ", time.time()-t_bin) + #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2266,7 +2266,7 @@ class Eynollah: else: img_bin = np.copy(img_resized) - print("inside 1 ", time.time()-t_in) + #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2281,7 +2281,7 @@ class Eynollah: #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: - print("inside 2 ", time.time()-t_in) + #print("inside 2 ", time.time()-t_in) if not self.dir_in: if num_col_classifier == 1 or num_col_classifier >= 2: @@ -2309,7 +2309,7 @@ class Eynollah: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - print("inside 3 ", time.time()-t_in) + #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2395,7 +2395,7 @@ class Eynollah: #plt.imshow(textline_mask_tot_ea) #plt.show() - print("inside 4 ", time.time()-t_in) + #print("inside 4 ", time.time()-t_in) return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) @@ -3368,7 +3368,7 @@ class Eynollah: if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) - print(time.time()-t_0_box,'time box in 1') + #print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3381,7 +3381,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - print(time.time()-t_0_box,'time box in 2') + #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: @@ -3391,36 +3391,41 @@ class Eynollah: else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - print(time.time()-t_0_box,'time box in 3') + #print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) + #print(time.time()-t_0_box,'time box in 3.1') - text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + if self.tables: + text_regions_p_tables = np.copy(text_regions_p) + text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + #print(time.time()-t_0_box,'time box in 3.2') + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + #print(time.time()-t_0_box,'time box in 3.3') else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) - text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 - - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) - - img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) - img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - print(time.time()-t_0_box,'time box in 4') + if self.tables: + text_regions_p_tables = np.copy(text_regions_p_1_n) + text_regions_p_tables =np.round(text_regions_p_tables) + text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 + + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) + + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) + img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: @@ -3452,7 +3457,7 @@ class Eynollah: pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - print(time.time()-t_0_box,'time box in 5') + #print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -4742,16 +4747,16 @@ class Eynollah: t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - print("text region early -11 in %.1fs", time.time() - t0) + #print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - print("text region early -1 in %.1fs", time.time() - t0) + #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - print("text region early -2 in %.1fs", time.time() - t0) + #print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: @@ -4764,17 +4769,17 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) - print("text region early -2,5 in %.1fs", time.time() - t0) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) - print("text region early -3 in %.1fs", time.time() - t0) + #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - print("text region early -4 in %.1fs", time.time() - t0) + #print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4795,7 +4800,7 @@ class Eynollah: continue else: return pcgts - print("text region early in %.1fs", time.time() - t0) + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -4837,7 +4842,7 @@ class Eynollah: image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) - print("text region early 2 marginal in %.1fs", time.time() - t0) + #print("text region early 2 marginal in %.1fs", time.time() - t0) ## birdan sora chock chakir t1 = time.time() if not self.full_layout: @@ -4852,7 +4857,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - print("text region early 2 in %.1fs", time.time() - t0) + #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -4974,19 +4979,20 @@ class Eynollah: else: pass - print("text region early 3 in %.1fs", time.time() - t0) + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - print("text region early 4 in %.1fs", time.time() - t0) + #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - print("text region early 5 in %.1fs", time.time() - t0) + #print("text region early 5 in %.1fs", time.time() - t0) ## birdan sora chock chakir if not self.curved_line: if self.light_version: @@ -5022,7 +5028,7 @@ class Eynollah: all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - print("text region early 6 in %.1fs", time.time() - t0) + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) @@ -5182,7 +5188,7 @@ class Eynollah: self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - print("text region early 7 in %.1fs", time.time() - t0) + #print("text region early 7 in %.1fs", time.time() - t0) else: _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 8705ecf..6219df2 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -7,7 +7,7 @@ import cv2 import imutils from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d - +import time from .is_nan import isNaN from .contour import (contours_in_same_horizon, find_new_features_of_contours, @@ -1342,7 +1342,7 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): return peaks_neg_tot def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): - + t_ins_c0 = time.time() separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 separators_closeup[0:110,:,:]=0 @@ -1356,84 +1356,47 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, separators_closeup_new=np.zeros((separators_closeup.shape[0] ,separators_closeup.shape[1] )) - - - - ##_,separators_closeup_n=self.combine_hor_lines_and_delete_cross_points_and_get_lines_features_back(region_pre_p[:,:,0]) separators_closeup_n=np.copy(separators_closeup) separators_closeup_n=separators_closeup_n.astype(np.uint8) - ##plt.imshow(separators_closeup_n[:,:,0]) - ##plt.show() separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==0]=255 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==-255]=0 - - - #separators_closeup_n_binary=(separators_closeup_n_binary[:,:]==2)*1 - - #gray = cv2.cvtColor(separators_closeup_n, cv2.COLOR_BGR2GRAY) - - ### - - #print(separators_closeup_n_binary.shape) + gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) gray_early=gray_early.astype(np.uint8) - #print(gray_early.shape,'burda') imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) - #print('burda2') ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) - #print('burda3') contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - #slope_lines_e,dist_x_e, x_min_main_e ,x_max_main_e ,cy_main_e,slope_lines_org_e,y_min_main_e, y_max_main_e, cx_main_e=self.find_features_of_lines(contours_line_e) - slope_linese,dist_xe, x_min_maine ,x_max_maine ,cy_maine,slope_lines_orge,y_min_maine, y_max_maine, cx_maine=find_features_of_lines(contours_line_e) dist_ye=y_max_maine-y_min_maine - #print(y_max_maine-y_min_maine,'y') - #print(dist_xe,'x') args_e=np.array(range(len(contours_line_e))) args_hor_e=args_e[(dist_ye<=50) & (dist_xe>=3*dist_ye)] - #print(args_hor_e,'jidi',len(args_hor_e),'jilva') cnts_hor_e=[] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - #print(len(slope_linese),'lieee') figs_e=np.zeros(thresh_e.shape) figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - #plt.imshow(figs_e) - #plt.show() - - ### - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary,pts=cnts_hor_e,color=(0,0,0)) gray = cv2.bitwise_not(separators_closeup_n_binary) gray=gray.astype(np.uint8) - - #plt.imshow(gray) - #plt.show() - - bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \ cv2.THRESH_BINARY, 15, -2) - ##plt.imshow(bw[:,:]) - ##plt.show() - + horizontal = np.copy(bw) vertical = np.copy(bw) @@ -1451,16 +1414,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2) - - ### - #print(np.unique(horizontal),'uni') horizontal=cv2.fillPoly(horizontal,pts=cnts_hor_e,color=(255,255,255)) - ### - - - - #plt.imshow(horizontal) - #plt.show() rows = vertical.shape[0] verticalsize = rows // 30 @@ -1471,35 +1425,21 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, vertical = cv2.dilate(vertical, verticalStructure) vertical = cv2.dilate(vertical,kernel,iterations = 1) - # Show extracted vertical lines horizontal,special_separators=combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(vertical,horizontal,num_col_classifier) - - #plt.imshow(horizontal) - #plt.show() - #print(vertical.shape,np.unique(vertical),'verticalvertical') separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1 - ##plt.imshow(separators_closeup_new) - ##plt.show() - ##separators_closeup_n vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) vertical=vertical.astype(np.uint8) - ##plt.plot(vertical[:,:,0].sum(axis=0)) - ##plt.show() - - #plt.plot(vertical[:,:,0].sum(axis=1)) - #plt.show() - imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_vers) - #print(slope_lines,'vertical') + args=np.array( range(len(slope_lines) )) args_ver=args[slope_lines==1] dist_x_ver=dist_x[slope_lines==1] @@ -1512,9 +1452,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, len_y=separators_closeup.shape[0]/3.0 - #plt.imshow(horizontal) - #plt.show() - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) horizontal=horizontal.astype(np.uint8) imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) @@ -1582,8 +1519,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, matrix_of_lines_ch[len(cy_main_hor):,9]=1 - - if contours_h is not None: slope_lines_head,dist_x_head, x_min_main_head ,x_max_main_head ,cy_main_head,slope_lines_org_head,y_min_main_head, y_max_main_head, cx_main_head=find_features_of_lines(contours_h) matrix_l_n=np.zeros((matrix_of_lines_ch.shape[0]+len(cy_main_head),matrix_of_lines_ch.shape[1])) @@ -1629,8 +1564,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, args_big_parts=np.array(range(len(splitter_y_new_diff))) [ splitter_y_new_diff>22 ] - - regions_without_separators=return_regions_without_separators(region_pre_p) @@ -1640,19 +1573,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin_fin=[] for itiles in args_big_parts: - - regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:,0] - #image_page_background_zero_tile=image_page_background_zero[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:] - - #print(regions_without_separators_tile.shape) - ##plt.imshow(regions_without_separators_tile) - ##plt.show() - - #num_col, peaks_neg_fin=self.find_num_col(regions_without_separators_tile,multiplier=6.0) - - #regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3) - # + try: num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) except: @@ -1670,9 +1592,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin=peaks_neg_fin[peaks_neg_fin<=(vertical.shape[1]-500)] peaks_neg_fin_fin=peaks_neg_fin[:] - #print(peaks_neg_fin_fin,'peaks_neg_fin_fintaza') - - return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index 53b39b5..8a92ace 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -263,7 +263,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): return cnts_org -def get_textregion_contours_in_org_image_light(cnts, img, slope_first): +def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): h_o = img.shape[0] w_o = img.shape[1] @@ -278,14 +278,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) - # plt.imshow(img_copy) - # plt.show() - - # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') - # plt.imshow(img_copy) - # plt.show() img_copy = img_copy.astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) @@ -300,6 +293,70 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): return cnts_org +def return_list_of_contours_with_desired_order(ls_cons, sorted_indexes): + return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] +def do_back_rotation_and_get_cnt_back(queue_of_all_params, contours_par_per_process,indexes_r_con_per_pro, img, slope_first): + contours_textregion_per_each_subprocess = [] + index_by_text_region_contours = [] + for mv in range(len(contours_par_per_process)): + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) + + img_copy = rotation_image_new(img_copy, -slope_first) + + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + # print(np.shape(cont_int[0])) + contours_textregion_per_each_subprocess.append(cont_int[0]*6) + index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) + + queue_of_all_params.put([contours_textregion_per_each_subprocess, index_by_text_region_contours]) + +def get_textregion_contours_in_org_image_light(cnts, img, slope_first): + num_cores = cpu_count() + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(cnts), num_cores + 1) + indexes_by_text_con = np.array(range(len(cnts))) + + h_o = img.shape[0] + w_o = img.shape[1] + + img = cv2.resize(img, (int(img.shape[1]/6.), int(img.shape[0]/6.)), interpolation=cv2.INTER_NEAREST) + ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) + #cnts = cnts/2 + cnts = [(i/ 6).astype(np.int32) for i in cnts] + + for i in range(num_cores): + contours_par_per_process = cnts[int(nh[i]) : int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_back_rotation_and_get_cnt_back, args=(queue_of_all_params, contours_par_per_process, indexes_text_con_per_process, img, slope_first))) + + for i in range(num_cores): + processes[i].start() + + cnts_org = [] + all_index_text_con = [] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + contours_for_subprocess = list_all_par[0] + indexes_for_subprocess = list_all_par[1] + for j in range(len(contours_for_subprocess)): + cnts_org.append(contours_for_subprocess[j]) + all_index_text_con.append(indexes_for_subprocess[j]) + for i in range(num_cores): + processes[i].join() + + cnts_org = return_list_of_contours_with_desired_order(cnts_org, all_index_text_con) + + return cnts_org + def return_contours_of_interested_textline(region_pre_p, pixel): # pixels of images are identified by 5 diff --git a/qurator/eynollah/utils/separate_lines.py b/qurator/eynollah/utils/separate_lines.py index 1004a92..f8df33f 100644 --- a/qurator/eynollah/utils/separate_lines.py +++ b/qurator/eynollah/utils/separate_lines.py @@ -3,7 +3,8 @@ import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d import os - +from multiprocessing import Process, Queue, cpu_count +from multiprocessing import Pool from .rotate import rotate_image from .contour import ( return_parent_contours, @@ -1569,8 +1570,21 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): +def do_image_rotation(queue_of_all_params,angels_per_process, img_resized, sigma_des): + angels_per_each_subprocess = [] + for mv in range(len(angels_per_process)): + img_rot=rotate_image(img_resized,angels_per_process[mv]) + img_rot[img_rot!=0]=1 + try: + var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + except: + var_spectrum=0 + angels_per_each_subprocess.append(var_spectrum) + + queue_of_all_params.put([angels_per_each_subprocess]) +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): + num_cores = cpu_count() if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1603,22 +1617,44 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##print(rot,var_spectrum,'var_spectrum') - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ###img_rot[img_rot!=0]=1 + ####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(var_spectrum,'var_spectrum') + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + #####print(rot,var_spectrum,'var_spectrum') + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) + + + try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1628,17 +1664,38 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ##var_res=[] + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ##img_rot[img_rot!=0]=1 + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##except: + ##var_spectrum=0 + ##var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1650,24 +1707,46 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) + + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() - var_res=[] + ##var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ###plt.imshow(img_rot) + ###plt.show() + ##img_rot[img_rot!=0]=1 + ###neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + ###print(var_spectrum,'var_spectrum') + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 + ##except: + ##var_spectrum=0 - var_res.append(var_spectrum) + ##var_res.append(var_spectrum) if plotter: @@ -1681,17 +1760,38 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: angels=np.linspace(-90,-12,n_tot_angles) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + ##var_res=[] + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ##img_rot[img_rot!=0]=1 + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##except: + ##var_spectrum=0 + ##var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1701,18 +1801,41 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals elif abs(ang_int)>early_slope_edge and ang_int>0: angels=np.linspace(90,12,n_tot_angles) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + + ###var_res=[] + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(indexer,'indexer') + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1720,20 +1843,42 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals ang_int=0 else: angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) - var_res=[] indexer=0 - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + ####var_res=[] + + ####for rot in angels: + ####img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ####img_rot[img_rot!=0]=1 + #####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #####print(var_spectrum,'var_spectrum') + ####try: + ####var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####except: + ####var_spectrum=0 + ####var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1750,19 +1895,40 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals if abs(ang_int)>early_slope_edge and ang_int<0: angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) - + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ###var_res=[] + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) @@ -1773,22 +1939,44 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals elif abs(ang_int)>early_slope_edge and ang_int>0: angels=np.linspace(90,25,int(n_tot_angles/2.)+10) - + indexer=0 + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() - indexer=0 - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 + ###var_res=[] - var_res.append(var_spectrum) + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(indexer,'indexer') + ###except: + ###var_spectrum=0 + + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] From 70772d41042df2415a0918d99f51cb183db36fe5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 21 Oct 2024 23:46:38 +0200 Subject: [PATCH 044/107] binarization as a standalone command --- qurator/eynollah/cli.py | 33 +++ qurator/eynollah/eynollah.py | 5 +- qurator/eynollah/sbb_binarize.py | 383 +++++++++++++++++++++++++++++++ 3 files changed, 418 insertions(+), 3 deletions(-) create mode 100644 qurator/eynollah/sbb_binarize.py diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 4c762a8..0daf0c9 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -2,6 +2,7 @@ import sys import click from ocrd_utils import initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah +from qurator.eynollah.sbb_binarize import SbbBinarizer @click.group() def main(): @@ -48,6 +49,38 @@ def main(): def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): xml_files_ind = os.listdir(dir_xml) +@main.command() +@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') + +@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') + +@click.argument('input_image') + +@click.argument('output_image') +@click.option( + "--dir_in", + "-di", + help="directory of images", + type=click.Path(exists=True, file_okay=False), +) +@click.option( + "--dir_out", + "-do", + help="directory where the binarized images will be written", + type=click.Path(exists=True, file_okay=False), +) + +def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out): + if not dir_out and (dir_in): + print("Error: You used -di but did not set -do") + sys.exit(1) + elif dir_out and not (dir_in): + print("Error: You used -do to write out binarized images but have not set -di") + sys.exit(1) + SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) + + + @main.command() @click.option( diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 79724cc..e587ff3 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -240,7 +240,6 @@ class Eynollah: pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models - self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" @@ -4769,9 +4768,9 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ diff --git a/qurator/eynollah/sbb_binarize.py b/qurator/eynollah/sbb_binarize.py new file mode 100644 index 0000000..36e9ab0 --- /dev/null +++ b/qurator/eynollah/sbb_binarize.py @@ -0,0 +1,383 @@ +""" +Tool to load model and binarize a given image. +""" + +import sys +from glob import glob +from os import environ, devnull +from os.path import join +from warnings import catch_warnings, simplefilter +import os + +import numpy as np +from PIL import Image +import cv2 +environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +stderr = sys.stderr +sys.stderr = open(devnull, 'w') +import tensorflow as tf +from tensorflow.keras.models import load_model +from tensorflow.python.keras import backend as tensorflow_backend +sys.stderr = stderr + + +import logging + +def resize_image(img_in, input_height, input_width): + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + +class SbbBinarizer: + + def __init__(self, model_dir, logger=None): + self.model_dir = model_dir + self.log = logger if logger else logging.getLogger('SbbBinarizer') + + self.start_new_session() + + self.model_files = glob(self.model_dir+"/*/", recursive = True) + + self.models = [] + for model_file in self.model_files: + self.models.append(self.load_model(model_file)) + + def start_new_session(self): + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + + self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() + tensorflow_backend.set_session(self.session) + + def end_session(self): + tensorflow_backend.clear_session() + self.session.close() + del self.session + + def load_model(self, model_name): + model = load_model(join(self.model_dir, model_name), compile=False) + model_height = model.layers[len(model.layers)-1].output_shape[1] + model_width = model.layers[len(model.layers)-1].output_shape[2] + n_classes = model.layers[len(model.layers)-1].output_shape[3] + return model, model_height, model_width, n_classes + + def predict(self, model_in, img, use_patches, n_batch_inference=5): + tensorflow_backend.set_session(self.session) + model, model_height, model_width, n_classes = model_in + + img_org_h = img.shape[0] + img_org_w = img.shape[1] + + if img.shape[0] < model_height and img.shape[1] >= model_width: + img_padded = np.zeros(( model_height, img.shape[1], img.shape[2] )) + + index_start_h = int( abs( img.shape[0] - model_height) /2.) + index_start_w = 0 + + img_padded [ index_start_h: index_start_h+img.shape[0], :, : ] = img[:,:,:] + + elif img.shape[0] >= model_height and img.shape[1] < model_width: + img_padded = np.zeros(( img.shape[0], model_width, img.shape[2] )) + + index_start_h = 0 + index_start_w = int( abs( img.shape[1] - model_width) /2.) + + img_padded [ :, index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] + + + elif img.shape[0] < model_height and img.shape[1] < model_width: + img_padded = np.zeros(( model_height, model_width, img.shape[2] )) + + index_start_h = int( abs( img.shape[0] - model_height) /2.) + index_start_w = int( abs( img.shape[1] - model_width) /2.) + + img_padded [ index_start_h: index_start_h+img.shape[0], index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] + + else: + index_start_h = 0 + index_start_w = 0 + img_padded = np.copy(img) + + + img = np.copy(img_padded) + + + + if use_patches: + + margin = int(0.1 * model_width) + + width_mid = model_width - 2 * margin + height_mid = model_height - 2 * margin + + + img = img / float(255.0) + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + for i in range(nxf): + for j in range(nyf): + + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width + + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height + elif j > 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - model_width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - model_height + + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + + + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + + + if batch_indexer == n_batch_inference: + + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + #print(seg.shape, len(seg), len(list_i_s)) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + #print(seg.shape, len(seg), len(list_i_s)) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + + + prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] + prediction_true = prediction_true.astype(np.uint8) + + else: + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / float(255.0) + img = resize_image(img, model_height, model_width) + + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) + prediction_true = prediction_true.astype(np.uint8) + return prediction_true[:,:,0] + + def run(self, image=None, image_path=None, save=None, use_patches=False, dir_in=None, dir_out=None): + print(dir_in,'dir_in') + if not dir_in: + if (image is not None and image_path is not None) or \ + (image is None and image_path is None): + raise ValueError("Must pass either a opencv2 image or an image_path") + if image_path is not None: + image = cv2.imread(image_path) + img_last = 0 + for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): + self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + + res = self.predict(model, image, use_patches) + + img_fin = np.zeros((res.shape[0], res.shape[1], 3)) + res[:, :][res[:, :] == 0] = 2 + res = res - 1 + res = res * 255 + img_fin[:, :, 0] = res + img_fin[:, :, 1] = res + img_fin[:, :, 2] = res + + img_fin = img_fin.astype(np.uint8) + img_fin = (res[:, :] == 0) * 255 + img_last = img_last + img_fin + + kernel = np.ones((5, 5), np.uint8) + img_last[:, :][img_last[:, :] > 0] = 255 + img_last = (img_last[:, :] == 0) * 255 + if save: + cv2.imwrite(save, img_last) + return img_last + else: + ls_imgs = os.listdir(dir_in) + for image_name in ls_imgs: + image_stem = image_name.split('.')[0] + print(image_name,'image_name') + image = cv2.imread(os.path.join(dir_in,image_name) ) + img_last = 0 + for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): + self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + + res = self.predict(model, image, use_patches) + + img_fin = np.zeros((res.shape[0], res.shape[1], 3)) + res[:, :][res[:, :] == 0] = 2 + res = res - 1 + res = res * 255 + img_fin[:, :, 0] = res + img_fin[:, :, 1] = res + img_fin[:, :, 2] = res + + img_fin = img_fin.astype(np.uint8) + img_fin = (res[:, :] == 0) * 255 + img_last = img_last + img_fin + + kernel = np.ones((5, 5), np.uint8) + img_last[:, :][img_last[:, :] > 0] = 255 + img_last = (img_last[:, :] == 0) * 255 + + cv2.imwrite(os.path.join(dir_out,image_stem+'.png'), img_last) From 328d33e3dc294b4d93fcdca833ed679ee0169f9f Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 23 Oct 2024 16:55:41 +0200 Subject: [PATCH 045/107] =?UTF-8?q?Temporary=20commit=20=E2=80=93=20textli?= =?UTF-8?q?ne=20prediction=20without=20patches?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/eynollah/eynollah.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e587ff3..6ee3dc7 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2120,7 +2120,7 @@ class Eynollah: else: thresholding_for_artificial_class_in_light_version = False if not self.dir_in: - model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) + model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] @@ -3311,7 +3311,8 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + patches = False + textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) From 82281bd6cfa218e7e434fe8da535fae394d5f59c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 25 Oct 2024 19:42:48 +0200 Subject: [PATCH 046/107] fixing a bug occuring with reading order + Slro option with no patch textline model and thresholding artificial class --- qurator/eynollah/eynollah.py | 79 +++++++++++++++++------------- qurator/eynollah/utils/__init__.py | 21 ++++---- qurator/eynollah/utils/xml.py | 2 +- 3 files changed, 58 insertions(+), 44 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e587ff3..03252fb 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,11 +253,11 @@ class Eynollah: self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -816,6 +816,14 @@ class Eynollah: verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[0,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) @@ -1546,7 +1554,7 @@ class Eynollah: pass else: img = otsu_copy_binary(img) - img = img.astype(np.uint8) + #img = img.astype(np.uint8) prediction_regions2 = None else: if cols == 1: @@ -1605,9 +1613,12 @@ class Eynollah: img = img.astype(np.uint8) marginal_of_patch_percent = 0.1 - + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) + + ##prediction_regions = self.do_prediction(False, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) + prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions @@ -2148,7 +2159,7 @@ class Eynollah: if not thresholding_for_artificial_class_in_light_version: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') - textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 @@ -2245,26 +2256,27 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): - if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - - #print("inside bin ", time.time()-t_bin) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - prediction_bin = prediction_bin.astype(np.uint16) - #img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) - else: - img_bin = np.copy(img_resized) - + ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): + ###if not self.dir_in: + ###model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + ###prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + ###else: + ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + + ####print("inside bin ", time.time()-t_bin) + ###prediction_bin=prediction_bin[:,:,0] + ###prediction_bin = (prediction_bin[:,:]==0)*1 + ###prediction_bin = prediction_bin*255 + + ###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + ###prediction_bin = prediction_bin.astype(np.uint16) + ####img= np.copy(prediction_bin) + ###img_bin = np.copy(prediction_bin) + ###else: + ###img_bin = np.copy(img_resized) + + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) @@ -3311,7 +3323,8 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + patches = False + textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3564,9 +3577,9 @@ class Eynollah: image_page = image_page.astype(np.uint8) #print("full inside 1", time.time()- t_full0) if self.light_version: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, False, cols=num_col_classifier) else: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, False, cols=num_col_classifier) #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model @@ -3590,7 +3603,7 @@ class Eynollah: regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + ##regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -4768,9 +4781,9 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 6219df2..e7cbbea 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -1204,17 +1204,12 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): top = peaks_neg_new[i] down = peaks_neg_new[i + 1] - # print(top,down,'topdown') - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - # print(top,down) - # print(cys_in,'cyyyins') - # print(indexes_in,'indexes') sorted_inside = np.argsort(cxs_in) ind_in_int = indexes_in[sorted_inside] @@ -1228,11 +1223,17 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] - # print(peaks_neg_new,'peaks') - # print(final_indexers_sorted,'indexsorted') - # print(final_types,'types') - # print(final_index_type,'final_index_type') - + # This fix is applied if the sum of the lengths of contours and contours_h does not match final_indexers_sorted. However, this is not the optimal solution.. + if (len(cy_main)+len(cy_header) ) == len(final_index_type): + pass + else: + indexes_missed = set(list( np.array( range((len(cy_main)+len(cy_header) ) )) )) - set(final_indexers_sorted) + for ind_missed in indexes_missed: + final_indexers_sorted.append(ind_missed) + final_types.append(1) + final_index_type.append(ind_missed) + + return final_indexers_sorted, matrix_of_orders, final_types, final_index_type def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier): diff --git a/qurator/eynollah/utils/xml.py b/qurator/eynollah/utils/xml.py index 0386b25..bd95702 100644 --- a/qurator/eynollah/utils/xml.py +++ b/qurator/eynollah/utils/xml.py @@ -72,7 +72,7 @@ def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region index_of_types_2 = index_of_types[kind_of_texts == 2] indexes_sorted_2 = indexes_sorted[kind_of_texts == 2] - + counter = EynollahIdCounter(region_idx=ref_point) for idx_textregion, _ in enumerate(found_polygons_text_region): id_of_texts.append(counter.next_region_id) From 90ee2d61dc1d2ce05724d6d0f11c200ba1709108 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 28 Oct 2024 20:56:06 +0100 Subject: [PATCH 047/107] textline segmentation is masked with drop capitals --- qurator/eynollah/eynollah.py | 223 +++++++++++++++++++++-------------- 1 file changed, 135 insertions(+), 88 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 1cb00c7..d0a8299 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,11 +253,11 @@ class Eynollah: self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -502,7 +502,8 @@ class Eynollah: if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False - elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: @@ -523,7 +524,8 @@ class Eynollah: if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False - elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: @@ -3323,7 +3325,7 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - patches = False + patches = True textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3634,6 +3636,7 @@ class Eynollah: regions_without_separators = (text_regions_p[:, :] == 1) * 1 img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) + self.logger.debug('exit run_boxes_full_layout') #print("full inside 3", time.time()- t_full0) return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables @@ -4169,7 +4172,123 @@ class Eynollah: x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] return x_differential_new - + def dilate_textregions_contours_textline_version(self,all_found_textline_polygons): + #print(all_found_textline_polygons) + + for j in range(len(all_found_textline_polygons)): + for ij in range(len(all_found_textline_polygons[j])): + + con_ind = all_found_textline_polygons[j][ij] + area = cv2.contourArea(con_ind) + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + + x_differential = gaussian_filter1d(x_differential, 0.1) + y_differential = gaussian_filter1d(y_differential, 0.1) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.12) + else: + dilation_m1 = round(area / (y_max-y_min) * 0.12) + + if dilation_m1>8: + dilation_m1 = 8 + if dilation_m1<6: + dilation_m1 = 6 + #print(dilation_m1, 'dilation_m1') + dilation_m1 = 6 + dilation_m2 = int(dilation_m1/2.) +1 + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + #print(results,'results') + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + #indices_2 = indices_2[1:] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + indices_2 = indices_2[:-1] + + + for ii in range(len(indices_2)): + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + + + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons def dilate_textregions_contours(self,all_found_textline_polygons): #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): @@ -4179,9 +4298,6 @@ class Eynollah: area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - #con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.5) - #con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.5) - x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) @@ -4235,29 +4351,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - ###for i in range(len(x_differential)): - ###if abs_diff[i]==0: - ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) - ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) - ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) - - ###elif abs_diff[i]!=0 and abs_diff[i]>=3: - ###if abs(x_differential[i])>abs(y_differential[i]): - ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) - ###else: - ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) - ###else: - ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) - - ###inc_x =list(inc_x) - ###inc_x.append(inc_x[0]) - - ###inc_y =list(inc_y) - ###inc_y.append(inc_y[0]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4288,21 +4381,6 @@ class Eynollah: indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - #print(area_scaled / area, "ratio") - #print(results,'results') - #if results[0]==1 and diff_result[-1]==-2: - ##indices_2 = indices_2[1:] - ##indices_m2 = indices_m2[1:] - - #con_scaled[:indices_m2[0]+1,0, 1] = con_scaled[indices_m2[-1],0, 1] - #con_scaled[:indices_m2[0]+1,0, 0] = con_scaled[indices_m2[-1],0, 0] - - - #con_scaled[indices_2[-1]+1:,0, 1] = con_scaled[indices_m2[-1],0, 1] - #con_scaled[indices_2[-1]+1:,0, 0] = con_scaled[indices_m2[-1],0, 0] - - #indices_2 = indices_2[:-1] - #indices_m2 = indices_m2[1:-1] if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] @@ -4318,50 +4396,12 @@ class Eynollah: indices_2 = indices_2[:-1] - - - #diff_neg_pos = np.array(indices_m2) - np.array(indices_2) - - - #print(diff_neg_pos,'diff') - ##print(indices_2, 'indices_2') - #indices_2 = np.array(indices_2)[diff_neg_pos>1] - #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] for ii in range(len(indices_2)): - #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] - #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] - - #if x_inner[-1]>=x_inner[0]: - #x_interest = np.min(x_inner) - #else: - #x_interest = np.max(x_inner) - - #if y_inner[-1]>=y_inner[0]: - #y_interest = np.min(y_inner) - #else: - #y_interest = np.max(y_inner) - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - #con_scaled[:,0, 1][results[:]>0] = con_ind[:,0,1][results[:]>0] - #con_scaled[:,0, 0][results[:]>0] = con_ind[:,0,0][results[:]>0] - - #print(list(results), 'results') - #print(list(diff_result), 'diff_result') - #print(indices_2,'2') - #print(indices_m2,'-2') - #print(diff_neg_pos,'diff_neg_pos') - - ##con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) - ##con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) - - #con_scaled[-1,0, 1] = con_scaled[0,0, 1] - #con_scaled[-1,0, 0] = con_scaled[0,0, 0] - ##print(len(con_scaled[:,0,0]),'con_scaled[:,0,0]') all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons @@ -4865,6 +4905,12 @@ class Eynollah: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + + text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -5018,7 +5064,8 @@ class Eynollah: #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) From 438df5228705e93f52d43a17a9284cc199fb97f4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Oct 2024 00:52:09 +0100 Subject: [PATCH 048/107] updating --- qurator/eynollah/eynollah.py | 8 +++++--- qurator/eynollah/utils/__init__.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index d0a8299..543ed92 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1726,6 +1726,7 @@ class Eynollah: polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) + M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] @@ -3605,7 +3606,7 @@ class Eynollah: regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - ##regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -4901,6 +4902,7 @@ class Eynollah: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: + cv2.imwrite('dewar_page.png', image_page) if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) @@ -5067,7 +5069,7 @@ class Eynollah: #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) @@ -5261,7 +5263,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index e7cbbea..29f80b4 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -792,7 +792,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.8: + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: From e796a99c5cae651ae1601f2033feecd695b382f2 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Oct 2024 15:02:50 +0100 Subject: [PATCH 049/107] updating inference for early layout in the case of documents with number of columns bigger than 2 --- qurator/eynollah/eynollah.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 543ed92..0a1c2b1 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2296,9 +2296,8 @@ class Eynollah: #plt.show() if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) - if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier >= 2: + if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) @@ -2307,12 +2306,12 @@ class Eynollah: prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - if num_col_classifier == 1 or num_col_classifier >= 2: + if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) else: @@ -2320,7 +2319,7 @@ class Eynollah: prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) From 751b0102f7787f2ab8a45e3ecc4604e7c107e1e6 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 5 Nov 2024 19:50:18 +0100 Subject: [PATCH 050/107] updating early layout inference for light version --- qurator/eynollah/eynollah.py | 37 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0a1c2b1..9095c15 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,7 +253,7 @@ class Eynollah: self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: @@ -743,7 +743,7 @@ class Eynollah: def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) - #self.image = self.image.astype(np.uint8) + self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] @@ -1298,20 +1298,25 @@ class Eynollah: seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + ###seg[seg_art==1]=4 + ##seg_not_base = label_p_pred[:,:,:,4] + ##seg_not_base[seg_not_base>0.03] =1 + ##seg_not_base[seg_not_base<1] =0 seg_line = label_p_pred[:,:,:,3] seg_line[seg_line>0.1] =1 seg_line[seg_line<1] =0 - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 + ##seg_background = label_p_pred[:,:,:,0] + ##seg_background[seg_background>0.25] =1 + ##seg_background[seg_background<1] =0 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 + seg[seg_art==1]=4 + ##seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 if thresholding_for_artificial_class_in_light_version: seg_art = label_p_pred[:,:,:,2] @@ -2300,26 +2305,26 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -4595,7 +4600,7 @@ class Eynollah: areas_without = np.array(areas_tot)[args_all] area_of_con_interest = areas_tot[ij] - args_with_bigger_area = np.array(args_all)[areas_without > area_of_con_interest] + args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] From 8409de0e58457f2ae4661a42ce8942e96794f2e8 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sun, 10 Nov 2024 19:34:43 +0100 Subject: [PATCH 051/107] sbb_binarization is integrated into eynollah works in framework of ocrd - sbb_binarization in ocrd works for individual images by the way as standalone flowing from directory can be used now. For eynollah in ocrd framework I have added -light version as default parameter. --- pyproject.toml | 1 + src/eynollah/eynollah.py | 1 - src/eynollah/ocrd-tool-binarization.json | 47 +++++++ src/eynollah/ocrd-tool.json | 10 ++ src/eynollah/ocrd_cli_binarization.py | 158 +++++++++++++++++++++++ src/eynollah/processor.py | 2 + 6 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 src/eynollah/ocrd-tool-binarization.json create mode 100644 src/eynollah/ocrd_cli_binarization.py diff --git a/pyproject.toml b/pyproject.toml index 67a420d..b056cb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ classifiers = [ [project.scripts] eynollah = "eynollah.cli:main" ocrd-eynollah-segment = "eynollah.ocrd_cli:main" +ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:cli" [project.urls] Homepage = "https://github.com/qurator-spk/eynollah" diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 0d0d683..29d2788 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4964,7 +4964,6 @@ class Eynollah: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: - cv2.imwrite('dewar_page.png', image_page) if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) diff --git a/src/eynollah/ocrd-tool-binarization.json b/src/eynollah/ocrd-tool-binarization.json new file mode 100644 index 0000000..1711e89 --- /dev/null +++ b/src/eynollah/ocrd-tool-binarization.json @@ -0,0 +1,47 @@ +{ + "version": "0.1.0", + "git_url": "https://github.com/qurator-spk/sbb_binarization", + "tools": { + "ocrd-sbb-binarize": { + "executable": "ocrd-sbb-binarize", + "description": "Pixelwise binarization with selectional auto-encoders in Keras", + "categories": ["Image preprocessing"], + "steps": ["preprocessing/optimization/binarization"], + "input_file_grp": [], + "output_file_grp": [], + "parameters": { + "operation_level": { + "type": "string", + "enum": ["page", "region"], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "model": { + "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", + "type": "string", + "format": "uri", + "content-type": "text/directory", + "required": true + } + }, + "resources": [ + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2020_01_16.zip", + "name": "default", + "type": "archive", + "path_in_archive": "saved_model_2020_01_16", + "size": 563147331, + "description": "default models provided by github.com/qurator-spk (SavedModel format)" + }, + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip", + "name": "default-2021-03-09", + "type": "archive", + "path_in_archive": ".", + "size": 133230419, + "description": "updated default models provided by github.com/qurator-spk (SavedModel format)" + } + ] + } + } +} diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index b840005..9eb8932 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -28,6 +28,16 @@ "type": "boolean", "default": true, "description": "Try to detect all element subtypes, including drop-caps and headings" + }, + "light_version": { + "type": "boolean", + "default": true, + "description": "Try to detect all element subtypes in light version" + }, + "textline_light": { + "type": "boolean", + "default": true, + "description": "Light version need textline light" }, "tables": { "type": "boolean", diff --git a/src/eynollah/ocrd_cli_binarization.py b/src/eynollah/ocrd_cli_binarization.py new file mode 100644 index 0000000..6a8bbdc --- /dev/null +++ b/src/eynollah/ocrd_cli_binarization.py @@ -0,0 +1,158 @@ +from os import environ +from os.path import join +from pathlib import Path +from pkg_resources import resource_string +from json import loads + +from PIL import Image +import numpy as np +import cv2 +from click import command + +from ocrd_utils import ( + getLogger, + assert_file_grp_cardinality, + make_file_id, + MIMETYPE_PAGE +) +from ocrd import Processor +from ocrd_modelfactory import page_from_file +from ocrd_models.ocrd_page import AlternativeImageType, to_xml +from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor + +from .sbb_binarize import SbbBinarizer + +OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool-binarization.json').decode('utf8')) +TOOL = 'ocrd-sbb-binarize' + +def cv2pil(img): + return Image.fromarray(img.astype('uint8')) + +def pil2cv(img): + # from ocrd/workspace.py + color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR + pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) + return cv2.cvtColor(pil_as_np_array, color_conversion) + +class SbbBinarizeProcessor(Processor): + + def __init__(self, *args, **kwargs): + kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] + kwargs['version'] = OCRD_TOOL['version'] + super().__init__(*args, **kwargs) + if hasattr(self, 'output_file_grp'): + # processing context + self.setup() + + def setup(self): + """ + Set up the model prior to processing. + """ + LOG = getLogger('processor.SbbBinarize.__init__') + if not 'model' in self.parameter: + raise ValueError("'model' parameter is required") + # resolve relative path via environment variable + model_path = Path(self.parameter['model']) + if not model_path.is_absolute(): + if 'SBB_BINARIZE_DATA' in environ and environ['SBB_BINARIZE_DATA']: + LOG.info("Environment variable SBB_BINARIZE_DATA is set to '%s'" \ + " - prepending to model value '%s'. If you don't want this mechanism," \ + " unset the SBB_BINARIZE_DATA environment variable.", + environ['SBB_BINARIZE_DATA'], model_path) + model_path = Path(environ['SBB_BINARIZE_DATA']).joinpath(model_path) + model_path = model_path.resolve() + if not model_path.is_dir(): + raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path) + # resolve relative path via OCR-D ResourceManager + model_path = self.resolve_resource(str(model_path)) + self.binarizer = SbbBinarizer(model_dir=model_path, logger=LOG) + + def process(self): + """ + Binarize images with sbb_binarization (based on selectional auto-encoders). + + For each page of the input file group, open and deserialize input PAGE-XML + and its respective images. Then iterate over the element hierarchy down to + the requested ``operation_level``. + + For each segment element, retrieve a raw (non-binarized) segment image + according to the layout annotation (from an existing ``AlternativeImage``, + or by cropping into the higher-level images, and deskewing when applicable). + + Pass the image to the binarizer (which runs in fixed-size windows/patches + across the image and stitches the results together). + + Serialize the resulting bilevel image as PNG file and add it to the output + file group (with file ID suffix ``.IMG-BIN``) along with the output PAGE-XML + (referencing it as new ``AlternativeImage`` for the segment element). + + Produce a new PAGE output file by serialising the resulting hierarchy. + """ + LOG = getLogger('processor.SbbBinarize') + assert_file_grp_cardinality(self.input_file_grp, 1) + assert_file_grp_cardinality(self.output_file_grp, 1) + + oplevel = self.parameter['operation_level'] + + for n, input_file in enumerate(self.input_files): + file_id = make_file_id(input_file, self.output_file_grp) + page_id = input_file.pageId or input_file.ID + LOG.info("INPUT FILE %i / %s", n, page_id) + pcgts = page_from_file(self.workspace.download_file(input_file)) + self.add_metadata(pcgts) + pcgts.set_pcGtsId(file_id) + page = pcgts.get_Page() + page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') + + if oplevel == 'page': + LOG.info("Binarizing on 'page' level in page '%s'", page_id) + bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True)) + # update METS (add the image file): + bin_image_path = self.workspace.save_image_file(bin_image, + file_id + '.IMG-BIN', + page_id=input_file.pageId, + file_grp=self.output_file_grp) + page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comments='%s,binarized' % page_xywh['features'])) + + elif oplevel == 'region': + regions = page.get_AllRegions(['Text', 'Table'], depth=1) + if not regions: + LOG.warning("Page '%s' contains no text/table regions", page_id) + for region in regions: + region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') + region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) + region_image_bin_path = self.workspace.save_image_file( + region_image_bin, + "%s_%s.IMG-BIN" % (file_id, region.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) + region.add_AlternativeImage( + AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features'])) + + elif oplevel == 'line': + region_line_tuples = [(r.id, r.get_TextLine()) for r in page.get_AllRegions(['Text'], depth=0)] + if not region_line_tuples: + LOG.warning("Page '%s' contains no text lines", page_id) + for region_id, line in region_line_tuples: + line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') + line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) + line_image_bin_path = self.workspace.save_image_file( + line_image_bin, + "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) + line.add_AlternativeImage( + AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features'])) + + self.workspace.add_file( + ID=file_id, + file_grp=self.output_file_grp, + pageId=input_file.pageId, + mimetype=MIMETYPE_PAGE, + local_filename=join(self.output_file_grp, file_id + '.xml'), + content=to_xml(pcgts)) + +@command() +@ocrd_cli_options +def cli(*args, **kwargs): + return ocrd_cli_wrap_processor(SbbBinarizeProcessor, *args, **kwargs) diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index 1bd190e..ed510c8 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -49,6 +49,8 @@ class EynollahProcessor(Processor): 'curved_line': self.parameter['curved_line'], 'full_layout': self.parameter['full_layout'], 'allow_scaling': self.parameter['allow_scaling'], + 'light_version': self.parameter['light_version'], + 'textline_light': self.parameter['textline_light'], 'headers_off': self.parameter['headers_off'], 'tables': self.parameter['tables'], 'override_dpi': self.parameter['dpi'], From 1ae77e61c854b03c7de29eaf99592186dd19fc74 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:11:36 +0100 Subject: [PATCH 052/107] Update requirements.txt --- requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f01d319..f4ab5eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,10 @@ ocrd >= 2.23.3 numpy <1.24.0 scikit-learn >= 0.23.2 -tensorflow == 2.12.1 +tensorflow < 2.13 imutils >= 0.5.3 matplotlib setuptools >= 50 +transformers +torch +numba From 22b0b07a733052390ac0b00822f6e662686fbcc7 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 11 Nov 2024 19:01:40 +0100 Subject: [PATCH 053/107] drop capital and marginals extraction is updated --- src/eynollah/eynollah.py | 6 +- src/eynollah/utils/__init__.py | 20 +++++- src/eynollah/utils/marginals.py | 112 ++++++++++++++++++++------------ 3 files changed, 90 insertions(+), 48 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 29d2788..2c3965a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -249,7 +249,7 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -258,7 +258,7 @@ class Eynollah: self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: @@ -3653,7 +3653,7 @@ class Eynollah: regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 29f80b4..d7f9ccd 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -775,7 +775,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): return layout_no_patch -def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label): +def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p): drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -791,12 +791,26 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) + mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w] = text_regions_p[y : y + h, x : x + w] + + all_drop_capital_pixels_which_is_text_in_early_lo = np.sum( mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]==1 ) + + mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]=1 + all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1 ) + + percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) + + + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.6 and percent_text_to_all_in_drop>=0.3: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 0] = drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 4] = drop_capital_label# images + #layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 984156f..a29e50d 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -2,8 +2,6 @@ import numpy as np import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d - - from .contour import find_new_features_of_contours, return_contours_of_interested_region from .resize import resize_image from .rotate import rotate_image @@ -123,62 +121,92 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve if max_point_of_right_marginal>=text_regions.shape[1]: max_point_of_right_marginal=text_regions.shape[1]-1 + if light_version: + text_regions_org = np.copy(text_regions) + text_regions[text_regions[:,:]==1]=4 + + pixel_img=4 + min_area_text=0.00001 + + polygon_mask_marginals_rotated = return_contours_of_interested_region(mask_marginals,1,min_area_text) + + polygon_mask_marginals_rotated = polygon_mask_marginals_rotated[0] - #plt.imshow(mask_marginals_rotated) - #plt.show() + polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) - text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 + cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) - #plt.imshow(text_regions) - #plt.show() + text_regions[(text_regions[:,:]==4)]=1 - pixel_img=4 - min_area_text=0.00001 - polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + marginlas_should_be_main_text=[] - cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) + x_min_marginals_left=[] + x_min_marginals_right=[] - text_regions[(text_regions[:,:]==4)]=1 + for i in range(len(cx_text_only)): + results = cv2.pointPolygonTest(polygon_mask_marginals_rotated, (cx_text_only[i], cy_text_only[i]), False) - marginlas_should_be_main_text=[] + if results == -1: + marginlas_should_be_main_text.append(polygons_of_marginals[i]) - x_min_marginals_left=[] - x_min_marginals_right=[] - for i in range(len(cx_text_only)): - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) - y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) - if x_width_mar>16 and y_height_mar/x_width_mar<18: - marginlas_should_be_main_text.append(polygons_of_marginals[i]) - if x_min_text_only[i]<(mid_point-one_third_left): - x_min_marginals_left_new=x_min_text_only[i] - if len(x_min_marginals_left)==0: - x_min_marginals_left.append(x_min_marginals_left_new) - else: - x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) - else: - x_min_marginals_right_new=x_min_text_only[i] - if len(x_min_marginals_right)==0: - x_min_marginals_right.append(x_min_marginals_right_new) + text_regions_org=cv2.fillPoly(text_regions_org, pts =marginlas_should_be_main_text, color=(4,4)) + text_regions = np.copy(text_regions_org) + + + else: + + text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 + + pixel_img=4 + min_area_text=0.00001 + + polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + + cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) + + text_regions[(text_regions[:,:]==4)]=1 + + marginlas_should_be_main_text=[] + + x_min_marginals_left=[] + x_min_marginals_right=[] + + for i in range(len(cx_text_only)): + x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) + y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) + + if x_width_mar>16 and y_height_mar/x_width_mar<18: + marginlas_should_be_main_text.append(polygons_of_marginals[i]) + if x_min_text_only[i]<(mid_point-one_third_left): + x_min_marginals_left_new=x_min_text_only[i] + if len(x_min_marginals_left)==0: + x_min_marginals_left.append(x_min_marginals_left_new) + else: + x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) else: - x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) + x_min_marginals_right_new=x_min_text_only[i] + if len(x_min_marginals_right)==0: + x_min_marginals_right.append(x_min_marginals_right_new) + else: + x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) - if len(x_min_marginals_left)==0: - x_min_marginals_left=[0] - if len(x_min_marginals_right)==0: - x_min_marginals_right=[text_regions.shape[1]-1] + if len(x_min_marginals_left)==0: + x_min_marginals_left=[0] + if len(x_min_marginals_right)==0: + x_min_marginals_right=[text_regions.shape[1]-1] - text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) + text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) - #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 - #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - - - text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 - text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 + #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 + #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 + + + text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 + text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 From f43c49c5086289b695322640693a2bf4f5cfa797 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 13 Nov 2024 11:53:56 +0100 Subject: [PATCH 054/107] textlines of drop capitals are connected to corresponding textline if possible otherwise they are inserted in corresponding textregion --- src/eynollah/eynollah.py | 2 +- src/eynollah/utils/drop_capitals.py | 89 ++++++++++++++++++++--------- src/eynollah/writer.py | 6 +- 3 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 2c3965a..d7e389d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5176,7 +5176,7 @@ class Eynollah: pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) pixel_lines = 6 if not self.reading_order_machine_based: diff --git a/src/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py index e12028f..67547d3 100644 --- a/src/eynollah/utils/drop_capitals.py +++ b/src/eynollah/utils/drop_capitals.py @@ -4,6 +4,7 @@ from .contour import ( find_new_features_of_contours, return_contours_of_image, return_parent_contours, + return_contours_of_interested_region, ) def adhere_drop_capital_region_into_corresponding_textline( @@ -17,6 +18,7 @@ def adhere_drop_capital_region_into_corresponding_textline( all_found_textline_polygons_h, kernel=None, curved_line=False, + textline_light=False, ): # print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape') # print(all_found_textline_polygons[3]) @@ -76,7 +78,7 @@ def adhere_drop_capital_region_into_corresponding_textline( # region_with_intersected_drop=region_with_intersected_drop/3 region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8) # print(np.unique(img_con_all_copy[:,:,0])) - if curved_line: + if curved_line or textline_light: if len(region_with_intersected_drop) > 1: sum_pixels_of_intersection = [] @@ -114,12 +116,17 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + + #plt.imshow(img_textlines) + #plt.show() + + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -130,8 +137,13 @@ def adhere_drop_capital_region_into_corresponding_textline( # contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0] # contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass except: # print('gordun1') @@ -167,14 +179,13 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = img_textlines.astype(np.uint8) - # plt.imshow(img_textlines) - # plt.show() - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + ##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + ##ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + ##contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -186,7 +197,12 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(np.shape(contours_biggest),'contours_biggest') # print(np.shape(all_found_textline_polygons[int(region_final)][arg_min])) ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass except: pass @@ -215,10 +231,11 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) @@ -231,7 +248,12 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0] ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass # all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest except: @@ -320,10 +342,12 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) @@ -336,8 +360,12 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0] contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2]) - - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass except: # print('gordun1') @@ -375,10 +403,12 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) @@ -391,7 +421,12 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0] contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2]) - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass # all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest except: diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 96441c6..496b3db 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -285,9 +285,9 @@ class EynollahXmlWriter(): dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) page.add_TextRegion(dropcapital) - all_box_coord_drop = None - slopes_drop = None - self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) + ###all_box_coord_drop = None + ###slopes_drop = None + ###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) for mm in range(len(found_polygons_text_region_img)): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) From ce5b6112960f67d7819b11a9b346da0d8f5fdb4d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 14 Nov 2024 17:18:07 +0100 Subject: [PATCH 055/107] tests are passed - new models by the way should be uploaded --- tests/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_run.py b/tests/test_run.py index 2596dad..cdb715a 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,7 +2,7 @@ from os import environ from pathlib import Path from ocrd_utils import pushd_popd from tests.base import CapturingTestCase as TestCase, main -from eynollah.cli import main as eynollah_cli +from eynollah.cli import layout as eynollah_cli testdir = Path(__file__).parent.resolve() From 5fa8ca46a47be5349ad91ae0f4121cdf661bf466 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 14 Nov 2024 17:35:00 +0100 Subject: [PATCH 056/107] updating requirements --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index f4ab5eb..02450aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,6 @@ tensorflow < 2.13 imutils >= 0.5.3 matplotlib setuptools >= 50 -transformers -torch -numba +transformers <= 4.30.2 +torch <= 2.0.1 +numba <= 0.58.1 From d9f79c3404fb6372031625d357fed5727fa6ec51 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 18 Nov 2024 10:15:19 +0100 Subject: [PATCH 057/107] fixing IndexError by reading order detection --- src/eynollah/eynollah.py | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d7e389d..4f9eaa6 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -2678,17 +2678,29 @@ class Eynollah: try: arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) arg_text_con_h = [] for ii in range(len(cx_text_only_h)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only_h[ii] + 80) >= boxes[jj][0] and (x_min_text_only_h[ii] + 80) < boxes[jj][1] and y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]: arg_text_con_h.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) @@ -2742,15 +2754,22 @@ class Eynollah: order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + except Exception as why: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) @@ -2759,10 +2778,16 @@ class Eynollah: arg_text_con_h = [] for ii in range(len(cx_text_only_h)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) @@ -2814,6 +2839,7 @@ class Eynollah: order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) + return order_text_new, id_of_texts_tot def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2823,10 +2849,16 @@ class Eynollah: try: arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) @@ -2868,10 +2900,16 @@ class Eynollah: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) From b622494f34f8366f21ded3f3c8b85b8519245fa7 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 21 Nov 2024 02:16:22 +0100 Subject: [PATCH 058/107] new table detection model is integrated --- src/eynollah/eynollah.py | 413 +++++++++++++++++++++++---------------- 1 file changed, 241 insertions(+), 172 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 4f9eaa6..f2426f8 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -264,9 +264,13 @@ class Eynollah: else: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: - self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" + self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" - self.model_tables = dir_models + "/eynollah-tables_20210319" + if self.tables: + if self.light_version: + self.model_table_dir = dir_models + "/modelens_table_0t4_201124" + else: + self.model_table_dir = dir_models + "/eynollah-tables_20210319" self.models = {} @@ -290,6 +294,9 @@ class Eynollah: self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + if self.tables: + self.model_table = self.our_load_model(self.model_table_dir) + self.ls_imgs = os.listdir(self.dir_in) @@ -325,9 +332,13 @@ class Eynollah: self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) - + if self.tables: + self.model_table = self.our_load_model(self.model_table_dir) + self.ls_imgs = os.listdir(self.dir_in) + + def _cache_images(self, image_filename=None, image_pil=None): ret = {} @@ -2326,8 +2337,23 @@ class Eynollah: ###img_bin = np.copy(prediction_bin) ###else: ###img_bin = np.copy(img_resized) - - img_bin = np.copy(img_resized) + if self.ocr and not self.input_binary: + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + else: + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + prediction_bin = prediction_bin.astype(np.uint16) + #img= np.copy(prediction_bin) + img_bin = np.copy(prediction_bin) + else: + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) @@ -3175,91 +3201,101 @@ class Eynollah: img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - model_region, session_region = self.start_new_session_and_model(self.model_tables) + + + if self.dir_in: + pass + else: + self.model_table, _ = self.start_new_session_and_model(self.model_table_dir) patches = False - if num_col_classifier < 4 and num_col_classifier > 2: - prediction_table = self.do_prediction(patches, img, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 + if self.light_version: + prediction_table = self.do_prediction_new_concept(patches, img, self.model_table) prediction_table = prediction_table.astype(np.int16) + return prediction_table[:,:,0] + else: + if num_col_classifier < 4 and num_col_classifier > 2: + prediction_table = self.do_prediction(patches, img, self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 + prediction_table = prediction_table.astype(np.int16) + + elif num_col_classifier ==2: + height_ext = 0#int( img.shape[0]/4. ) + h_start = int(height_ext/2.) + width_ext = int( img.shape[1]/8. ) + w_start = int(width_ext/2.) - elif num_col_classifier ==2: - height_ext = 0#int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/8. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_new = img.shape[0]+height_ext + width_new = img.shape[1]+width_ext + + img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 + img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] - prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 - prediction_table = prediction_table.astype(np.int16) + prediction_ext = self.do_prediction(patches, img_new, self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + + prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + prediction_table = prediction_table.astype(np.int16) - elif num_col_classifier ==1: - height_ext = 0# int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/4. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext + elif num_col_classifier ==1: + height_ext = 0# int( img.shape[0]/4. ) + h_start = int(height_ext/2.) + width_ext = int( img.shape[1]/4. ) + w_start = int(width_ext/2.) - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_new = img.shape[0]+height_ext + width_new = img.shape[1]+width_ext + + img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 + img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] - prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 - prediction_table = prediction_table.astype(np.int16) + prediction_ext = self.do_prediction(patches, img_new, self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + + prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + prediction_table = prediction_table.astype(np.int16) - else: - prediction_table = np.zeros(img.shape) - img_w_half = int(img.shape[1]/2.) + else: + prediction_table = np.zeros(img.shape) + img_w_half = int(img.shape[1]/2.) - pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], model_region) - pre2 = self.do_prediction(patches, img[:,img_w_half:,:], model_region) - pre_full = self.do_prediction(patches, img[:,:,:], model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) - prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4) - - prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4) - prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4) + pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table) + pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table) + pre_full = self.do_prediction(patches, img[:,:,:], self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) + prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4) + + prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4) + prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4) - prediction_table[:,0:img_w_half,:] = pre1[:,:,:] - prediction_table[:,img_w_half:,:] = pre2[:,:,:] - - prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1 - prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1 - prediction_table = prediction_table.astype(np.int16) + prediction_table[:,0:img_w_half,:] = pre1[:,:,:] + prediction_table[:,img_w_half:,:] = pre2[:,:,:] + + prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1 + prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1 + prediction_table = prediction_table.astype(np.int16) + + #prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) + #prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) - #prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) - #prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) - - prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) - prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) - return prediction_table_erode.astype(np.int16) + prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) + prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) + return prediction_table_erode.astype(np.int16) def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') @@ -3500,49 +3536,62 @@ class Eynollah: #print(time.time()-t_0_box,'time box in 3.1') if self.tables: - text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - #print(time.time()-t_0_box,'time box in 3.2') - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) - #print(time.time()-t_0_box,'time box in 3.3') + if self.light_version: + pass + else: + text_regions_p_tables = np.copy(text_regions_p) + text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + #print(time.time()-t_0_box,'time box in 3.2') + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + #print(time.time()-t_0_box,'time box in 3.3') else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) if self.tables: - text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 - - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) - - img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) - img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + if self.light_version: + pass + else: + text_regions_p_tables = np.copy(text_regions_p_1_n) + text_regions_p_tables =np.round(text_regions_p_tables) + text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 + + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) + + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) + img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + if self.light_version: + text_regions_p[:,:][table_prediction[:,:]==1] = 10 + img_revised_tab=text_regions_p[:,:] else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + else: + img_revised_tab = np.copy(text_regions_p[:,:]) + img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 + img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + + text_regions_p[:,:][text_regions_p[:,:]==10] = 0 + text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 else: img_revised_tab=text_regions_p[:,:] #img_revised_tab = text_regions_p[:, :] - polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) + if self.light_version: + polygons_of_images = return_contours_of_interested_region(text_regions_p, 2) + else: + polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) pixel_img = 4 min_area_mar = 0.00001 @@ -3565,82 +3614,102 @@ class Eynollah: self.logger.debug('enter run_boxes_full_layout') t_full0 = time.time() if self.tables: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) - - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - - regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 - regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + if self.light_version: + text_regions_p[:,:][table_prediction[:,:]==1] = 10 + img_revised_tab=text_regions_p[:,:] + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + + text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + + regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + else: + text_regions_p_1_n = None + textline_mask_tot_d = None + regions_without_separators_d = None + regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators[table_prediction == 1] = 1 + else: - text_regions_p_1_n = None - textline_mask_tot_d = None - regions_without_separators_d = None + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + + text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + + regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + else: + text_regions_p_1_n = None + textline_mask_tot_d = None + regions_without_separators_d = None + + regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators[table_prediction == 1] = 1 - regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) - regions_without_separators[table_prediction == 1] = 1 - - pixel_lines=3 - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) - - if num_col_classifier>=3: + pixel_lines=3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6) + num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6) - else: - pass - - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) - text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - - img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) - - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables = np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 - - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction_n, 10, num_col_classifier) - img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) + + if num_col_classifier>=3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6) + + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6) + else: + pass + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + text_regions_p_tables = np.copy(text_regions_p) + text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + + img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) + + else: + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + text_regions_p_tables = np.copy(text_regions_p_1_n) + text_regions_p_tables = np.round(text_regions_p_tables) + text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 + + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction_n, 10, num_col_classifier) + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) - img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) + img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) + img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - if np.abs(slope_deskew) < 0.13: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - - - ##img_revised_tab=img_revised_tab2[:,:,0] - #img_revised_tab=text_regions_p[:,:] - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 - #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 + if np.abs(slope_deskew) < 0.13: + img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + else: + img_revised_tab = np.copy(text_regions_p[:,:]) + img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 + img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + + + ##img_revised_tab=img_revised_tab2[:,:,0] + #img_revised_tab=text_regions_p[:,:] + text_regions_p[:,:][text_regions_p[:,:]==10] = 0 + text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 pixel_img = 4 min_area_mar = 0.00001 From 1746920275a759e06efa5a862dc39b898e4db75c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 21 Nov 2024 12:08:29 +0100 Subject: [PATCH 059/107] Update Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a3b7b95..454c75e 100644 --- a/Makefile +++ b/Makefile @@ -32,9 +32,9 @@ models_eynollah: models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz' - wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz' + # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz' # Install with pip install: From 3000255a243105ed82ae5059117c43d6bc93f31d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 22 Nov 2024 12:40:21 +0100 Subject: [PATCH 060/107] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 454c75e..6089f6e 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ models_eynollah: models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz' # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz' # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz' From 8014a9e416dd4bf80f4047d55644844d4d75293a Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 22 Nov 2024 19:47:06 +0100 Subject: [PATCH 061/107] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6089f6e..506fcf7 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ install-dev: pip install -e . smoke-test: - eynollah -i tests/resources/kant_aufklaerung_1784_0020.tif -o . -m $(PWD)/models_eynollah + eynollah layout -i tests/resources/kant_aufklaerung_1784_0020.tif -o . -m $(PWD)/models_eynollah # Run unit tests test: From 1083d1c7fb48d9182e6f635b6815dbc34b145e24 Mon Sep 17 00:00:00 2001 From: kba Date: Mon, 25 Nov 2024 19:32:42 +0100 Subject: [PATCH 062/107] gha: try to free disk space --- .github/workflows/test-eynollah.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 3a33dcf..8a6941f 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -14,6 +14,12 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11'] steps: + - name: clean up + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v4 - uses: actions/cache@v4 id: model_cache From 6aad006f4c556b33a1d23d83c20fe2ca112448bc Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 2 Dec 2024 12:43:57 +0100 Subject: [PATCH 063/107] filter textregions without textline --- src/eynollah/eynollah.py | 45 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index f2426f8..c28c441 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4772,8 +4772,45 @@ class Eynollah: - + def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline): + + ###contours_txtline_of_all_textregions = [] + + ###for jj in range(len(contours_textline)): + ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] + + ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] + ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] + ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] + + ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] + ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + ###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + ###contours_with_textline = [] + ###for ind_tr, con_tr in enumerate(contours): + ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] + + ###results = np.array(results) + ###if np.any(results==1): + ###contours_with_textline.append(con_tr) + + textregion_index_to_del = [] + for index_textregion, textlines_textregion in enumerate(contours_textline): + if len(textlines_textregion)==0: + textregion_index_to_del.append(index_textregion) + + uniqe_args_trs = np.unique(textregion_index_to_del) + uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] + + + for ind_u_a_trs in uniqe_args_trs_sorted: + contours.pop(ind_u_a_trs) + contours_textline.pop(ind_u_a_trs) + text_con_org.pop(ind_u_a_trs) + + return contours, text_con_org, contours_textline def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): @@ -5239,6 +5276,8 @@ class Eynollah: all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) + else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) @@ -5395,17 +5434,17 @@ class Eynollah: if self.textline_light: mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) ocr_textline_in_textregion.append(text_ocr) - ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 ocr_all_textlines.append(ocr_textline_in_textregion) From 871d7bfc5a76d8a81b4aec0b4b7c701eeeb883f9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 4 Dec 2024 16:41:00 +0100 Subject: [PATCH 064/107] fixed: machine based reading order cause tuple index out of range error if number of textregion is one. --- src/eynollah/eynollah.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index c28c441..e802e29 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4013,7 +4013,10 @@ class Eynollah: inference_bs = 3 input_1= np.zeros( (inference_bs, height1, width1,3)) starting_list_of_regions = [] - starting_list_of_regions.append( list(range(labels_con.shape[2])) ) + if len(co_text_all)<=1: + starting_list_of_regions.append( list(range(1)) ) + else: + starting_list_of_regions.append( list(range(labels_con.shape[2])) ) index_update = 0 index_selected = starting_list_of_regions[0] #print(labels_con.shape[2],"number of regions for reading order") From f765e2603b14186574ec86ff70a1767adfec867d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 15:57:13 +0000 Subject: [PATCH 065/107] move Torch to optional dependencies (to avoid clash with TF over CuDNN) --- pyproject.toml | 4 ++++ requirements.txt | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b056cb7..61d488a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,10 @@ classifiers = [ "Topic :: Scientific/Engineering :: Image Processing", ] +[project.optional-dependencies] +OCR = ["torch <= 2.0.1", "transformers <= 4.30.2"] +plotting = ["matplotlib"] + [project.scripts] eynollah = "eynollah.cli:main" ocrd-eynollah-segment = "eynollah.ocrd_cli:main" diff --git a/requirements.txt b/requirements.txt index 02450aa..d72df29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,8 +4,4 @@ numpy <1.24.0 scikit-learn >= 0.23.2 tensorflow < 2.13 imutils >= 0.5.3 -matplotlib -setuptools >= 50 -transformers <= 4.30.2 -torch <= 2.0.1 numba <= 0.58.1 From 7ae64f3717ac84f7aebc12c5933015d8bc4b8056 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 16:18:35 +0000 Subject: [PATCH 066/107] RO model: do not reload when in dir_in mode --- src/eynollah/eynollah.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index e802e29..2dd5505 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -255,7 +255,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" + self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" @@ -289,7 +289,7 @@ class Eynollah: ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) + self.model_reading_order = self.our_load_model(self.model_reading_order_dir) if self.ocr: self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -331,7 +331,7 @@ class Eynollah: self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) - self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) + self.model_reading_order = self.our_load_model(self.model_reading_order_dir) if self.tables: self.model_table = self.our_load_model(self.model_table_dir) @@ -3804,7 +3804,7 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + def do_order_of_regions_with_model(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -3818,7 +3818,8 @@ class Eynollah: img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + if not self.dir_in: + self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) height1 =672#448 width1 = 448#224 @@ -3896,7 +3897,7 @@ class Eynollah: batch_counter = batch_counter+1 if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr=model_ro_machine.predict(input_1 , verbose=0) + y_pr = self.model_reading_order.predict(input_1 , verbose=0) if batch_counter==inference_bs: iteration_batches = inference_bs @@ -3952,7 +3953,7 @@ class Eynollah: else: early_list_bigger_than_one = -20 return list_inp, early_list_bigger_than_one - def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + def do_order_of_regions_with_model_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -3969,7 +3970,7 @@ class Eynollah: if self.dir_in: pass else: - self.model_reading_order_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) height1 =672#448 width1 = 448#224 @@ -4055,7 +4056,7 @@ class Eynollah: batch_counter = batch_counter+1 if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr=self.model_reading_order_machine.predict(input_1 , verbose=0) + y_pr = self.model_reading_order.predict(input_1 , verbose=0) if batch_counter==inference_bs: iteration_batches = inference_bs @@ -5362,7 +5363,7 @@ class Eynollah: if self.full_layout: if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) @@ -5384,7 +5385,7 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) From 3b9a29bc5c187fe6ae4c41450a0095c3271ec703 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 18:19:54 +0000 Subject: [PATCH 067/107] simplify dir_in conditionals --- src/eynollah/eynollah.py | 78 +++++++++++++--------------------------- 1 file changed, 24 insertions(+), 54 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 2dd5505..c1e0f4d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -274,7 +274,8 @@ class Eynollah: self.models = {} - if dir_in and light_version: + if dir_in: + # as in start_new_session: config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) @@ -283,62 +284,31 @@ class Eynollah: self.model_page = self.our_load_model(self.model_page_dir) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_bin = self.our_load_model(self.model_dir_of_binarization) - self.model_textline = self.our_load_model(self.model_textline_dir) - self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) - self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) - ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) - self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) - self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - self.model_reading_order = self.our_load_model(self.model_reading_order_dir) - if self.ocr: - self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") - if self.tables: - self.model_table = self.our_load_model(self.model_table_dir) - - - self.ls_imgs = os.listdir(self.dir_in) - - if dir_in and self.extract_only_images: - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True - session = tf.compat.v1.Session(config=config) - set_session(session) - - self.model_page = self.our_load_model(self.model_page_dir) - self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) - self.model_bin = self.our_load_model(self.model_dir_of_binarization) - #self.model_textline = self.our_load_model(self.model_textline_dir) - self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction) - #self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) - #self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - - self.ls_imgs = os.listdir(self.dir_in) - - if dir_in and not (light_version or self.extract_only_images): - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True - session = tf.compat.v1.Session(config=config) - set_session(session) + if self.extract_only_images: + self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction) + else: + self.model_textline = self.our_load_model(self.model_textline_dir) + if self.light_version: + self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) + self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) + else: + self.model_region = self.our_load_model(self.model_region_dir_p_ens) + self.model_region_p2 = self.our_load_model(self.model_region_dir_p2) + self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) + ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) + self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) + self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + if self.reading_order_machine_based: + self.model_reading_order = self.our_load_model(self.model_reading_order_dir) + if self.ocr: + self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + if self.tables: + self.model_table = self.our_load_model(self.model_table_dir) - self.model_page = self.our_load_model(self.model_page_dir) - self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) - self.model_bin = self.our_load_model(self.model_dir_of_binarization) - self.model_textline = self.our_load_model(self.model_textline_dir) - self.model_region = self.our_load_model(self.model_region_dir_p_ens) - self.model_region_p2 = self.our_load_model(self.model_region_dir_p2) - self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) - self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) - self.model_reading_order = self.our_load_model(self.model_reading_order_dir) - if self.tables: - self.model_table = self.our_load_model(self.model_table_dir) - self.ls_imgs = os.listdir(self.dir_in) - - def _cache_images(self, image_filename=None, image_pil=None): ret = {} From 329fac23f67b2a46fe3c00b0340bd3a56143bed2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 18:29:49 +0000 Subject: [PATCH 068/107] do not reload enhancement model in dir_in mode, simplify --- src/eynollah/eynollah.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index c1e0f4d..145f722 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -363,10 +363,11 @@ class Eynollah: def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") - model_enhancement, session_enhancement = self.start_new_session_and_model(self.model_dir_of_enhancement) + if not self.dir_in: + self.model_enhancement, _ = self.start_new_session_and_model(self.model_dir_of_enhancement) - img_height_model = model_enhancement.layers[len(model_enhancement.layers) - 1].output_shape[1] - img_width_model = model_enhancement.layers[len(model_enhancement.layers) - 1].output_shape[2] + img_height_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[1] + img_width_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[2] if img.shape[0] < img_height_model: img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) @@ -409,9 +410,8 @@ class Eynollah: index_y_u = img_h index_y_d = img_h - img_height_model - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model_enhancement.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) + img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] + label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) seg = label_p_pred[0, :, :, :] seg = seg * 255 From 14beb46224b3f0660f44dafbf4bbe094b68d7274 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 21:07:26 +0000 Subject: [PATCH 069/107] simplify loading models w/o dir_in mode --- src/eynollah/eynollah.py | 203 +++++++++++++++------------------------ 1 file changed, 75 insertions(+), 128 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 145f722..d11531a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -567,7 +567,8 @@ class Eynollah: _, page_coord = self.early_page_for_num_of_column_classification(img) if not self.dir_in: - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier) + if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 @@ -590,10 +591,7 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - if not self.dir_in: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) - else: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 @@ -613,12 +611,10 @@ class Eynollah: self.logger.info("Detected %s DPI", dpi) if self.input_binary: img = self.imread() - if self.dir_in: - prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) - else: - - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img, model_bin, n_batch_inference=5) + if not self.dir_in: + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + + prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 @@ -641,7 +637,7 @@ class Eynollah: self.page_coord = page_coord if not self.dir_in: - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier) if self.num_col_upper and not self.num_col_lower: num_col = self.num_col_upper @@ -669,10 +665,7 @@ class Eynollah: img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) - else: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): if self.input_binary: @@ -693,10 +686,7 @@ class Eynollah: img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) - else: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 if num_col > self.num_col_upper: @@ -1381,12 +1371,9 @@ class Eynollah: img = cv2.GaussianBlur(self.image, (5, 5), 0) if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) - if not self.dir_in: - img_page_prediction = self.do_prediction(False, img, model_page) - else: - img_page_prediction = self.do_prediction(False, img, self.model_page) + img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) @@ -1429,13 +1416,10 @@ class Eynollah: else: img = self.imread() if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) - if self.dir_in: - img_page_prediction = self.do_prediction(False, img, self.model_page) - else: - img_page_prediction = self.do_prediction(False, img, model_page) + img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -1462,9 +1446,12 @@ class Eynollah: img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) - else: - model_region = self.model_region_fl if patches else self.model_region_fl_np + if patches: + self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) + else: + self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) + + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: if self.light_version: @@ -1546,9 +1533,12 @@ class Eynollah: img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) - else: - model_region = self.model_region_fl if patches else self.model_region_fl_np + if patches: + self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) + else: + self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) + + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: img = otsu_copy_binary(img) @@ -2049,26 +2039,18 @@ class Eynollah: else: thresholding_for_artificial_class_in_light_version = False if not self.dir_in: - model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) + self.model_textline, _ = self.start_new_session_and_model(self.model_textline_dir) #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - - #if not thresholding_for_artificial_class_in_light_version: - #if num_col_classifier==1: - #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 - else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - #if not thresholding_for_artificial_class_in_light_version: - #if num_col_classifier==1: - #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 @@ -2092,10 +2074,7 @@ class Eynollah: if not thresholding_for_artificial_class_in_light_version: prediction_textline[:,:][old_art[:,:]==1]=2 - if not self.dir_in: - prediction_textline_longshot = self.do_prediction(False, img, model_textline) - else: - prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) + prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') @@ -2161,10 +2140,8 @@ class Eynollah: if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region) - else: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) @@ -2256,7 +2233,7 @@ class Eynollah: img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + #model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) #print(num_col_classifier,'num_col_classifier') @@ -2290,10 +2267,8 @@ class Eynollah: #img_bin = np.copy(img_resized) ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): ###if not self.dir_in: - ###model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - ###prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - ###else: - ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + ###self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) ####print("inside bin ", time.time()-t_bin) ###prediction_bin=prediction_bin[:,:,0] @@ -2309,10 +2284,8 @@ class Eynollah: ###img_bin = np.copy(img_resized) if self.ocr and not self.input_binary: if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2341,30 +2314,27 @@ class Eynollah: if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) - else: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + self.model_region_1_2, _ = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + ##self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + + if num_col_classifier == 1 or num_col_classifier == 2: + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept( + True, img_resized, self.model_region_1_2, n_batch_inference=1, + thresholding_for_some_classes_in_light_version=True) else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) - ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept( + False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, + thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - if num_col_classifier == 1 or num_col_classifier == 2: - if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) - else: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page - else: - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) - ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + new_h = (900+ (num_col_classifier-3)*100) + img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) + prediction_regions_org = self.do_prediction_new_concept( + True, img_resized, self.model_region_1_2, n_batch_inference=2, + thresholding_for_some_classes_in_light_version=True) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -2466,16 +2436,13 @@ class Eynollah: img_width_h = img_org.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1.3 ratio_x=1 img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - if not self.dir_in: - prediction_regions_org_y = self.do_prediction(True, img, model_region) - else: - prediction_regions_org_y = self.do_prediction(True, img, self.model_region) + prediction_regions_org_y = self.do_prediction(True, img, self.model_region) prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) #plt.imshow(prediction_regions_org_y[:,:,0]) @@ -2494,10 +2461,7 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) - if self.dir_in: - prediction_regions_org = self.do_prediction(True, img, self.model_region) - else: - prediction_regions_org = self.do_prediction(True, img, model_region) + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -2505,14 +2469,11 @@ class Eynollah: if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) + self.model_region_p2, _ = self.start_new_session_and_model(self.model_region_dir_p2) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) - if self.dir_in: - prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) - else: - prediction_regions_org2 = self.do_prediction(True, img, model_region, marginal_of_patch_percent=0.2) + prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) @@ -2544,10 +2505,8 @@ class Eynollah: prediction_bin = np.copy(img_org) else: if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -2557,17 +2516,14 @@ class Eynollah: prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1 ratio_x=1 img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - if not self.dir_in: - prediction_regions_org = self.do_prediction(True, img, model_region) - else: - prediction_regions_org = self.do_prediction(True, img, self.model_region) + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -2597,10 +2553,8 @@ class Eynollah: prediction_bin = np.copy(img_org) if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) + self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -2612,7 +2566,7 @@ class Eynollah: if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) else: prediction_bin = np.copy(img_org) @@ -2621,17 +2575,14 @@ class Eynollah: img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - if not self.dir_in: - prediction_regions_org = self.do_prediction(True, img, model_region) - else: - prediction_regions_org = self.do_prediction(True, img, self.model_region) + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] #mask_lines_only=(prediction_regions_org[:,:]==3)*1 #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) - #prediction_regions_org = self.do_prediction(True, img, model_region) + #prediction_regions_org = self.do_prediction(True, img, self.model_region) #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) @@ -3173,9 +3124,7 @@ class Eynollah: - if self.dir_in: - pass - else: + if not self.dir_in: self.model_table, _ = self.start_new_session_and_model(self.model_table_dir) patches = False @@ -3937,9 +3886,7 @@ class Eynollah: img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - if self.dir_in: - pass - else: + if not self.dir_in: self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) height1 =672#448 From 9f12fa241dfb09eff9119e940285e1271dfc700b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:09:15 +0000 Subject: [PATCH 070/107] log-level: only set 'eynollah' logger level --- src/eynollah/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index bed0c03..5f4b5a4 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -1,6 +1,6 @@ import sys import click -from ocrd_utils import initLogging, setOverrideLogLevel +from ocrd_utils import initLogging, getLevelName, getLogger from eynollah.eynollah import Eynollah from eynollah.sbb_binarize import SbbBinarizer @@ -254,9 +254,9 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) ) def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): - if log_level: - setOverrideLogLevel(log_level) initLogging() + if log_level: + getLogger('eynollah').setLevel(getLevelName(log_level)) if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep") sys.exit(1) From 5b82320707e92162220b819734705c40e77bce74 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:09:32 +0000 Subject: [PATCH 071/107] avoid indentation --- src/eynollah/eynollah.py | 899 ++++++++++++++++++++------------------- 1 file changed, 450 insertions(+), 449 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d11531a..4f1d8e3 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4926,496 +4926,497 @@ class Eynollah: if self.dir_in: self.writer.write_pagexml(pcgts) + continue else: return pcgts - else: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) - t1 = time.time() - if not self.skip_layout_and_reading_order: - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) - - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) - else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) - t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - if self.light_version and num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) + #print("text region early -1 in %.1fs", time.time() - t0) + t1 = time.time() + if not self.skip_layout_and_reading_order: + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) + + if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: - img_w_new = 2000 + img_w_new = 1000 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - + elif num_col_classifier == 2: - img_w_new = 2400 + img_w_new = 1300 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - - - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early in %.1fs", time.time() - t0) + t1 = time.time() + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + t1 = time.time() + #plt.imshow(table_prediction) + #plt.show() + if self.light_version and num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.full_layout: + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - + + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] + else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + #print("text region early 3 in %.1fs", time.time() - t0) + if self.light_version: + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + + contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) else: - pass - - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + else: + + scale_param = 1 + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: - if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - - contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) - - else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - - if not self.reading_order_machine_based: + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() - - if self.full_layout: - - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - if self.ocr: - ocr_all_textlines = [] + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - ocr_all_textlines = None - - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - - + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() + + if self.full_layout: + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts - if self.ocr: - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) - - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - - ocr_textline_in_textregion.append(text_ocr) - - - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) else: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) - - page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) - - - ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) - - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - all_found_textline_polygons=[ all_found_textline_polygons ] - - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") - - - order_text_new = [0] - slopes =[0] - id_of_texts_tot =['region_0001'] - - polygons_of_images = [] - slopes_marginals = [] - polygons_of_marginals = [] - all_found_textline_polygons_marginals = [] - all_box_coord_marginals = [] - polygons_lines_xml = [] - contours_tables = [] - ocr_all_textlines = None - - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + + + if self.ocr: + + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) + + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) + + + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - - if self.dir_in: - self.writer.write_pagexml(pcgts) - #self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs", time.time() - t0) + #print("text region early 7 in %.1fs", time.time() - t0) + else: + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) + + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + + ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + all_found_textline_polygons=[ all_found_textline_polygons ] + + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") + + + order_text_new = [0] + slopes =[0] + id_of_texts_tot =['region_0001'] + + polygons_of_images = [] + slopes_marginals = [] + polygons_of_marginals = [] + all_found_textline_polygons_marginals = [] + all_box_coord_marginals = [] + polygons_lines_xml = [] + contours_tables = [] + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + if not self.dir_in: + return pcgts + + if self.dir_in: + self.writer.write_pagexml(pcgts) + #self.logger.info("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs" % time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From cd4e426977193e34452f76abf5af8b7af222d8b0 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:11:34 +0000 Subject: [PATCH 072/107] avoid indentation (skip_layout_and_reading_order) --- src/eynollah/eynollah.py | 824 ++++++++++++++++++++------------------- 1 file changed, 413 insertions(+), 411 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 4f1d8e3..2772bd4 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4934,489 +4934,491 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not self.skip_layout_and_reading_order: - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) + if self.skip_layout_and_reading_order: + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) - else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) + all_found_textline_polygons=[ all_found_textline_polygons ] - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") - t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - if self.light_version and num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] - if num_col_classifier == 1: - img_w_new = 2000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - elif num_col_classifier == 2: - img_w_new = 2400 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + order_text_new = [0] + slopes =[0] + id_of_texts_tot =['region_0001'] - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + polygons_of_images = [] + slopes_marginals = [] + polygons_of_marginals = [] + all_found_textline_polygons_marginals = [] + all_box_coord_marginals = [] + polygons_lines_xml = [] + contours_tables = [] + ocr_all_textlines = None - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + if self.dir_in: + continue + else: + return pcgts + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early in %.1fs", time.time() - t0) + t1 = time.time() + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) - index_con_parents = np.argsort(areas_cnt_text_parent) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + t1 = time.time() + #plt.imshow(table_prediction) + #plt.show() + if self.light_version and num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.full_layout: + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + + + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() else: - pass + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: - if self.light_version: - if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + #print("text region early 3 in %.1fs", time.time() - t0) + if self.light_version: + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 + else: - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + scale_param = 1 + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 - if not self.reading_order_machine_based: + if not self.reading_order_machine_based: + if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() - - if self.full_layout: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if self.ocr: - ocr_all_textlines = [] + if not self.reading_order_machine_based: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - ocr_all_textlines = None + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() + if self.full_layout: + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - - if self.ocr: - - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) - - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - - ocr_textline_in_textregion.append(text_ocr) + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) else: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + if self.ocr: - ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - all_found_textline_polygons=[ all_found_textline_polygons ] + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + ocr_textline_in_textregion.append(text_ocr) - order_text_new = [0] - slopes =[0] - id_of_texts_tot =['region_0001'] - polygons_of_images = [] - slopes_marginals = [] - polygons_of_marginals = [] - all_found_textline_polygons_marginals = [] - all_box_coord_marginals = [] - polygons_lines_xml = [] - contours_tables = [] - ocr_all_textlines = None + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs" % time.time() - t0) + print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From a520bd1f771b9263ed865e879248b38692cdef86 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 4 Dec 2024 22:49:34 +0000 Subject: [PATCH 073/107] wrap extremely long lines --- src/eynollah/eynollah.py | 95 ++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 2772bd4..769093d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4935,7 +4935,8 @@ class Eynollah: #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.skip_layout_and_reading_order: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, + skip_layout_and_reading_order=self.skip_layout_and_reading_order) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) @@ -4964,7 +4965,10 @@ class Eynollah: contours_tables = [] ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if self.dir_in: continue else: @@ -5005,6 +5009,8 @@ class Eynollah: self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) self.logger.info("Graphics detection took %.1fs ", time.time() - t1) #self.logger.info('cont_page %s', cont_page) + #plt.imshow(table_prediction) + #plt.show() if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") @@ -5016,19 +5022,16 @@ class Eynollah: continue else: return pcgts + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info("textline detection took %.1fs", time.time() - t1) - t1 = time.time() slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - if self.light_version and num_col_classifier in (1,2): + elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] org_w_l_m = textline_mask_tot_ea.shape[1] if num_col_classifier == 1: @@ -5062,14 +5065,13 @@ class Eynollah: ## birdan sora chock chakir t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + else: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.light_version: drop_label_in_full_layout = 4 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 @@ -5219,14 +5221,19 @@ class Eynollah: if not self.curved_line: if self.light_version: if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) @@ -5237,22 +5244,28 @@ class Eynollah: else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: @@ -5261,17 +5274,17 @@ class Eynollah: #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) #except: #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: #takes long timee contours_only_text_parent_d_ordered = None - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + if self.light_version: + fun = check_any_text_region_in_model_one_is_main_or_header_light + else: + fun = check_any_text_region_in_model_one_is_main_or_header + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ + all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ + fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) @@ -5279,7 +5292,9 @@ class Eynollah: pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) pixel_lines = 6 if not self.reading_order_machine_based: @@ -5303,7 +5318,6 @@ class Eynollah: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if not self.reading_order_machine_based: if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: @@ -5329,7 +5343,10 @@ class Eynollah: else: ocr_all_textlines = None - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts @@ -5409,7 +5426,9 @@ class Eynollah: ocr_all_textlines = None #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts From 3d88b207fc15c73b44675eb9e454840531095825 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 09:39:55 +0000 Subject: [PATCH 074/107] run: log instead of print --- src/eynollah/eynollah.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 769093d..6333a7f 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4906,7 +4906,7 @@ class Eynollah: self.ls_imgs = [1] for img_name in self.ls_imgs: - print(img_name) + self.logger.info(img_name) t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) @@ -5436,8 +5436,8 @@ class Eynollah: if self.dir_in: self.writer.write_pagexml(pcgts) - #self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs" % (time.time() - t0)) + self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From aaea2ef4637b19a2731119f763085ee1eda12249 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 09:40:02 +0000 Subject: [PATCH 075/107] simplify --- src/eynollah/eynollah.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 6333a7f..a3e6f9e 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -832,8 +832,7 @@ class Eynollah: img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), - verbose=0) + label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] @@ -1082,6 +1081,7 @@ class Eynollah: #del model #gc.collect() return prediction_true + def do_padding_with_scale(self,img, scale): h_n = int(img.shape[0]*scale) w_n = int(img.shape[1]*scale) @@ -2032,22 +2032,20 @@ class Eynollah: all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): + def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') - if self.textline_light: - thresholding_for_artificial_class_in_light_version = True#False - else: - thresholding_for_artificial_class_in_light_version = False if not self.dir_in: self.model_textline, _ = self.start_new_session_and_model(self.model_textline_dir) + #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - #if not thresholding_for_artificial_class_in_light_version: + prediction_textline = self.do_prediction(use_patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, + thresholding_for_artificial_class_in_light_version=self.textline_light) + #if not self.textline_light: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 @@ -2057,7 +2055,7 @@ class Eynollah: old_art = np.copy(textline_mask_tot_ea_art) - if not thresholding_for_artificial_class_in_light_version: + if not self.textline_light: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) @@ -2066,12 +2064,12 @@ class Eynollah: textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') - if not thresholding_for_artificial_class_in_light_version: + if not self.textline_light: textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 - if not thresholding_for_artificial_class_in_light_version: + if not self.textline_light: prediction_textline[:,:][old_art[:,:]==1]=2 prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) @@ -3366,8 +3364,7 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - patches = True - textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) From 055463d23a3ef6b3bbdf2581740c5d0dab3d501a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 09:43:30 +0000 Subject: [PATCH 076/107] avoid indentation --- src/eynollah/eynollah.py | 453 +++++++++++++++++++-------------------- 1 file changed, 226 insertions(+), 227 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index a3e6f9e..4cf9e81 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -846,238 +846,237 @@ class Eynollah: seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) + return prediction_true + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) - else: - if img.shape[0] < img_height_model: - img = resize_image(img, img_height_model, img.shape[1]) + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) - if img.shape[1] < img_width_model: - img = resize_image(img, img.shape[0], img_width_model) + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) + margin = int(marginal_of_patch_percent * img_height_model) + width_mid = img_width_model - 2 * margin + height_mid = img_height_model - 2 * margin + img = img / float(255.0) + #img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) + nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) - margin = int(marginal_of_patch_percent * img_height_model) - width_mid = img_width_model - 2 * margin - height_mid = img_height_model - 2 * margin - img = img / float(255.0) - #img = img.astype(np.float16) - img_h = img.shape[0] - img_w = img.shape[1] - prediction_true = np.zeros((img_h, img_w, 3)) - mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) - nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - for i in range(nxf): - for j in range(nyf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - else: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - else: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - img_width_model - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - img_height_model - - list_i_s.append(i) - list_j_s.append(j) - list_x_u.append(index_x_u) - list_x_d.append(index_x_d) - list_y_d.append(index_y_d) - list_y_u.append(index_y_u) - + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 - - if batch_indexer == n_batch_inference: - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - - if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - prediction_true = prediction_true.astype(np.uint8) + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + for i in range(nxf): + for j in range(nyf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + else: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + else: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - img_width_model + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - img_height_model + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + + + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() return prediction_true From c3163caefdb9a0843cc3e3f1408ff874fc4ce46e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 5 Dec 2024 14:28:17 +0000 Subject: [PATCH 077/107] avoid indentation --- src/eynollah/eynollah.py | 427 +++++++++++++++++++-------------------- 1 file changed, 213 insertions(+), 214 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 4cf9e81..d483cac 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1140,227 +1140,226 @@ class Eynollah: seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) + return prediction_true + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) - else: - if img.shape[0] < img_height_model: - img = resize_image(img, img_height_model, img.shape[1]) + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) - if img.shape[1] < img_width_model: - img = resize_image(img, img.shape[0], img_width_model) + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) + margin = int(marginal_of_patch_percent * img_height_model) + width_mid = img_width_model - 2 * margin + height_mid = img_height_model - 2 * margin + img = img / float(255.0) + img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) + nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) - margin = int(marginal_of_patch_percent * img_height_model) - width_mid = img_width_model - 2 * margin - height_mid = img_height_model - 2 * margin - img = img / float(255.0) - img = img.astype(np.float16) - img_h = img.shape[0] - img_w = img.shape[1] - prediction_true = np.zeros((img_h, img_w, 3)) - mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) - nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - for i in range(nxf): - for j in range(nyf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - else: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - else: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - img_width_model - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - img_height_model - - - list_i_s.append(i) - list_j_s.append(j) - list_x_u.append(index_x_u) - list_x_d.append(index_x_d) - list_y_d.append(index_y_d) - list_y_u.append(index_y_u) - + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 + batch_indexer = 0 + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - if batch_indexer == n_batch_inference: - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - - if thresholding_for_some_classes_in_light_version: - seg_art = label_p_pred[:,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg[seg_art==1]=4 - seg[(seg_line==1) & (seg==0)]=3 - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_art = label_p_pred[:,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg[seg_art==1]=4 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + for i in range(nxf): + for j in range(nyf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + else: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + else: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - img_width_model + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - img_height_model - prediction_true = prediction_true.astype(np.uint8) + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + + + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + if thresholding_for_some_classes_in_light_version: + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg[seg_art==1]=4 + seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg[seg_art==1]=4 + seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + prediction_true = prediction_true.astype(np.uint8) return prediction_true def extract_page(self): From ad748d003978b643dab6ec482542b03fdb3dc1e4 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 10:55:41 +0000 Subject: [PATCH 078/107] do_prediction: avoid code duplication --- src/eynollah/eynollah.py | 169 +++------------------------------------ 1 file changed, 9 insertions(+), 160 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d483cac..50f0f34 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -912,7 +912,10 @@ class Eynollah: batch_indexer = batch_indexer + 1 - if batch_indexer == n_batch_inference: + if (batch_indexer == n_batch_inference or + # last batch + i == nxf - 1 and j == nyf - 1): + self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) @@ -994,88 +997,6 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -1111,7 +1032,7 @@ class Eynollah: return img_scaled_padded#, label_scaled_padded def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction") + self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] img_width_model = model.layers[len(model.layers) - 1].output_shape[2] @@ -1207,7 +1128,10 @@ class Eynollah: batch_indexer = batch_indexer + 1 - if batch_indexer == n_batch_inference: + if (batch_indexer == n_batch_inference or + # last batch + i == nxf - 1 and j == nyf - 1): + self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) @@ -1284,81 +1208,6 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - - seg = np.argmax(label_p_pred, axis=3) - if thresholding_for_some_classes_in_light_version: - seg_art = label_p_pred[:,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - seg[seg_art==1]=4 - seg[(seg_line==1) & (seg==0)]=3 - - if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 - - indexer_inside_batch = 0 - for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - - index_y_u_in = list_y_u[indexer_inside_batch] - index_y_d_in = list_y_d[indexer_inside_batch] - - index_x_u_in = list_x_u[indexer_inside_batch] - index_x_d_in = list_x_d[indexer_inside_batch] - - if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color - elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - list_i_s = [] - list_j_s = [] - list_x_u = [] - list_x_d = [] - list_y_u = [] - list_y_d = [] - - batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - prediction_true = prediction_true.astype(np.uint8) return prediction_true From d68017037ce0f42dc036b30e1de36d8c49b73429 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 11:27:11 +0000 Subject: [PATCH 079/107] do_prediction: trigger GC to avoid CUDA OOM --- src/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 50f0f34..90824c8 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -999,7 +999,7 @@ class Eynollah: prediction_true = prediction_true.astype(np.uint8) #del model - #gc.collect() + gc.collect() return prediction_true def do_padding_with_scale(self,img, scale): @@ -1209,6 +1209,7 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) + gc.collect() return prediction_true def extract_page(self): From 6fe02df97394edcc741243a9f3d635e4b2b472e2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 16:35:31 +0000 Subject: [PATCH 080/107] do_image_rotation: fix f93fa12 (do return results) --- src/eynollah/utils/separate_lines.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index f8df33f..a57acbb 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1573,13 +1573,14 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): def do_image_rotation(queue_of_all_params,angels_per_process, img_resized, sigma_des): angels_per_each_subprocess = [] for mv in range(len(angels_per_process)): + print(f"rotating image by {angels_per_process[mv]}") img_rot=rotate_image(img_resized,angels_per_process[mv]) img_rot[img_rot!=0]=1 try: var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) except: var_spectrum=0 - angels_per_each_subprocess.append(var_spectrum) + angels_per_each_subprocess.append(var_spectrum) queue_of_all_params.put([angels_per_each_subprocess]) From 54cb15056b352fa8f04212071851f4e66b5931bb Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 9 Dec 2024 16:37:34 +0000 Subject: [PATCH 081/107] do_image_rotation / return_deskew_slop: avoid code duplication, simplify via mp.Pool --- src/eynollah/utils/separate_lines.py | 418 +++------------------------ 1 file changed, 45 insertions(+), 373 deletions(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index a57acbb..36a1b01 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np import cv2 from scipy.signal import find_peaks @@ -1570,19 +1571,15 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def do_image_rotation(queue_of_all_params,angels_per_process, img_resized, sigma_des): - angels_per_each_subprocess = [] - for mv in range(len(angels_per_process)): - print(f"rotating image by {angels_per_process[mv]}") - img_rot=rotate_image(img_resized,angels_per_process[mv]) - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - angels_per_each_subprocess.append(var_spectrum) - - queue_of_all_params.put([angels_per_each_subprocess]) +def do_image_rotation(angle, img, sigma_des): + print(f"rotating image by {angle}") + img_rot = rotate_image(img, angle) + img_rot[img_rot!=0] = 1 + try: + var = find_num_col_deskew(img_rot, sigma_des, 20.3) + except: + var = 0 + return var def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): num_cores = cpu_count() @@ -1613,376 +1610,51 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() - if main_page and img_patch_org.shape[1]>img_patch_org.shape[0]: - + if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: #plt.imshow(img_resized) #plt.show() - angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - ####plt.imshow(img_rot) - ####plt.show() - ###img_rot[img_rot!=0]=1 - ####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####print(var_spectrum,'var_spectrum') - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #####print(rot,var_spectrum,'var_spectrum') - ###except: - ###var_spectrum=0 - ###var_res.append(var_spectrum) - - - - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 + angles = np.array([-45, 0, 45, 90,]) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - - angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) + angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ##var_res=[] - ##for rot in angels: - ##img_rot=rotate_image(img_resized,rot) - ####plt.imshow(img_rot) - ####plt.show() - ##img_rot[img_rot!=0]=1 - ##try: - ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##except: - ##var_spectrum=0 - ##var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif main_page and img_patch_org.shape[1]<=img_patch_org.shape[0]: - + elif main_page: #plt.imshow(img_resized) #plt.show() - angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) - - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - - ##var_res=[] - - ##for rot in angels: - ##img_rot=rotate_image(img_resized,rot) - ###plt.imshow(img_rot) - ###plt.show() - ##img_rot[img_rot!=0]=1 - ###neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - ###print(var_spectrum,'var_spectrum') - ##try: - ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - - ##except: - ##var_spectrum=0 - - ##var_res.append(var_spectrum) - - - if plotter: - plotter.save_plot_of_rotation_angle(angels, var_res) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 + angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) early_slope_edge=11 - if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-12,n_tot_angles) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - ##var_res=[] - ##for rot in angels: - ##img_rot=rotate_image(img_resized,rot) - ####plt.imshow(img_rot) - ####plt.show() - ##img_rot[img_rot!=0]=1 - ##try: - ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##except: - ##var_spectrum=0 - ##var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif abs(ang_int)>early_slope_edge and ang_int>0: - - angels=np.linspace(90,12,n_tot_angles) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - - ###var_res=[] - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ###img_rot[img_rot!=0]=1 - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####print(indexer,'indexer') - ###except: - ###var_spectrum=0 - ###var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - else: - angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) - indexer=0 - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - ####var_res=[] - - ####for rot in angels: - ####img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ####img_rot[img_rot!=0]=1 - #####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #####print(var_spectrum,'var_spectrum') - ####try: - ####var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####except: - ####var_spectrum=0 - ####var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 + if abs(angle) > early_slope_edge: + if angle < 0: + angles = np.linspace(-90, -12, n_tot_angles) + else: + angles = np.linspace(90, 12, n_tot_angles) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - #plt.plot(var_res) - #plt.show() - ##plt.plot(mom3_res) - ##plt.show() - #print(ang_int,'ang_int111') + else: + angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) early_slope_edge=22 - if abs(ang_int)>early_slope_edge and ang_int<0: - - angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ###var_res=[] - - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ###img_rot[img_rot!=0]=1 - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ###except: - ###var_spectrum=0 - ###var_res.append(var_spectrum) - - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif abs(ang_int)>early_slope_edge and ang_int>0: - - angels=np.linspace(90,25,int(n_tot_angles/2.)+10) - indexer=0 - - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(angels), num_cores + 1) - - for i in range(num_cores): - angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) - - for i in range(num_cores): - processes[i].start() - - var_res=[] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - angles_for_subprocess = list_all_par[0] - for j in range(len(angles_for_subprocess)): - var_res.append(angles_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - - ###var_res=[] + if abs(angle) > early_slope_edge: + if angle < 0: + angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) + else: + angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) + angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) - - ###for rot in angels: - ###img_rot=rotate_image(img_resized,rot) - #####plt.imshow(img_rot) - #####plt.show() - ###img_rot[img_rot!=0]=1 - ###try: - ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ####print(indexer,'indexer') - ###except: - ###var_spectrum=0 - - ###var_res.append(var_spectrum) - try: - var_res=np.array(var_res) - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - return ang_int + return angle +def get_smallest_skew(img, sigma_des, angles, num_cores=1, plotter=None): + with Pool(processes=num_cores) as pool: + results = pool.map(partial(do_image_rotation, img=img, sigma_des=sigma_des), angles) + if plotter: + plotter.save_plot_of_rotation_angle(angles, results) + try: + var_res = np.array(results) + angle = angles[np.argmax(var_res)] + except: + angle = 0 + return angle From 5e0c1da7111690bd4898d928bfcde0c1de39c3b9 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 00:18:58 +0000 Subject: [PATCH 082/107] simplify --- src/eynollah/eynollah.py | 87 ++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 58 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 90824c8..3b43f7b 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -823,8 +823,8 @@ class Eynollah: def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") - img_height_model = model.layers[len(model.layers) - 1].output_shape[1] - img_width_model = model.layers[len(model.layers) - 1].output_shape[2] + img_height_model = model.layers[-1].output_shape[1] + img_width_model = model.layers[-1].output_shape[2] if not patches: img_h_page = img.shape[0] @@ -1034,8 +1034,8 @@ class Eynollah: def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction_new_concept") - img_height_model = model.layers[len(model.layers) - 1].output_shape[1] - img_width_model = model.layers[len(model.layers) - 1].output_shape[2] + img_height_model = model.layers[-1].output_shape[1] + img_width_model = model.layers[-1].output_shape[2] if not patches: img_h_page = img.shape[0] @@ -1043,7 +1043,7 @@ class Eynollah: img = img / 255.0 img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) + label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: @@ -4928,31 +4928,31 @@ class Eynollah: #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) @@ -5018,35 +5018,6 @@ class Eynollah: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) - else: - pass #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: From 21efea87116aeb7c89bea31a0227f614f19c9c6b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:36:57 +0000 Subject: [PATCH 083/107] no del on function argument --- src/eynollah/utils/contour.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 8a92ace..c5d56b8 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -39,7 +39,6 @@ def get_text_region_boxes_by_given_contours(contours): boxes.append([x, y, w, h]) contours_new.append(contours[jj]) - del contours return boxes, contours_new def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): From 25e967397d753a0fdfd1c4c9181cfc93f94414b7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 11:24:56 +0000 Subject: [PATCH 084/107] exit early if no text regions found (to avoid segfault) --- src/eynollah/eynollah.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 3b43f7b..d6ba8a9 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5019,6 +5019,20 @@ class Eynollah: contours_only_text_parent_d = [] contours_only_text_parent = [] + if not len(contours_only_text_parent): + # stop early + empty_marginals = [[]] * len(polygons_of_marginals) + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) + else: + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) @@ -5164,10 +5178,12 @@ class Eynollah: all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: return pcgts - else: contours_only_text_parent_h = None if self.reading_order_machine_based: From 68456ea0022b47f50fb7e2614358879b6484d0b0 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 11:30:38 +0000 Subject: [PATCH 085/107] do_work_of_slopes_new*, do_back_rotation_and_get_cnt_back, do_work_of_contours_in_image: use mp.Pool, simplify --- src/eynollah/eynollah.py | 454 +++++---------------------- src/eynollah/utils/contour.py | 176 +++-------- src/eynollah/utils/separate_lines.py | 207 +++++++++++- 3 files changed, 324 insertions(+), 513 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d6ba8a9..ae292c6 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -11,8 +11,9 @@ import os import sys import time import warnings +from functools import partial from pathlib import Path -from multiprocessing import Process, Queue, cpu_count +from multiprocessing import Pool, cpu_count import gc from ocrd_utils import getLogger import cv2 @@ -60,14 +61,20 @@ from .utils.contour import ( from .utils.rotate import ( rotate_image, rotation_not_90_func, - rotation_not_90_func_full_layout) + rotation_not_90_func_full_layout +) from .utils.separate_lines import ( textline_contours_postprocessing, separate_lines_new2, - return_deskew_slop) + return_deskew_slop, + do_work_of_slopes_new, + do_work_of_slopes_new_curved, + do_work_of_slopes_new_light, +) from .utils.drop_capitals import ( adhere_drop_capital_region_into_corresponding_textline, - filter_small_drop_capitals_from_no_patch_layout) + filter_small_drop_capitals_from_no_patch_layout +) from .utils.marginals import get_marginals from .utils.resize import resize_image from .utils import ( @@ -82,7 +89,8 @@ from .utils import ( small_textlines_to_parent_adherence2, order_of_regions, find_number_of_columns_in_document, - return_boxes_of_images_by_order_of_reading_new) + return_boxes_of_images_by_order_of_reading_new +) from .utils.pil_cv2 import check_dpi, pil2cv from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter @@ -1504,381 +1512,73 @@ class Eynollah: all_box_coord.append(crop_coor) - return slopes, all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))) + return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): - self.logger.debug("enter get_slopes_and_deskew_new") + if not len(contours): + return [], [], [], [], [], [], [] + self.logger.debug("enter get_slopes_and_deskew_new_light") if len(contours)>15: num_cores = cpu_count() else: num_cores = 1 - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(boxes), num_cores + 1) - indexes_by_text_con = np.array(range(len(contours_par))) - for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new_light, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) - for i in range(num_cores): - processes[i].start() - - slopes = [] - all_found_textline_polygons = [] - all_found_text_regions = [] - all_found_text_regions_par = [] - boxes = [] - all_box_coord = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - slopes_for_sub_process = list_all_par[0] - polys_for_sub_process = list_all_par[1] - boxes_for_sub_process = list_all_par[2] - contours_for_subprocess = list_all_par[3] - contours_par_for_subprocess = list_all_par[4] - boxes_coord_for_subprocess = list_all_par[5] - indexes_for_subprocess = list_all_par[6] - for j in range(len(slopes_for_sub_process)): - slopes.append(slopes_for_sub_process[j]) - all_found_textline_polygons.append(polys_for_sub_process[j]) - boxes.append(boxes_for_sub_process[j]) - all_found_text_regions.append(contours_for_subprocess[j]) - all_found_text_regions_par.append(contours_par_for_subprocess[j]) - all_box_coord.append(boxes_coord_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - for i in range(num_cores): - processes[i].join() - self.logger.debug('slopes %s', slopes) - self.logger.debug("exit get_slopes_and_deskew_new") - return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_slopes_new_light, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + logger=self.logger, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + plotter=self.plotter,), + zip(boxes, contours, contours_par, range(len(contours_par)))) + #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + self.logger.debug("exit get_slopes_and_deskew_new_light") + return tuple(zip(*results)) def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + if not len(contours): + return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") num_cores = cpu_count() - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(boxes), num_cores + 1) - indexes_by_text_con = np.array(range(len(contours_par))) - for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) - for i in range(num_cores): - processes[i].start() - - slopes = [] - all_found_textline_polygons = [] - all_found_text_regions = [] - all_found_text_regions_par = [] - boxes = [] - all_box_coord = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - slopes_for_sub_process = list_all_par[0] - polys_for_sub_process = list_all_par[1] - boxes_for_sub_process = list_all_par[2] - contours_for_subprocess = list_all_par[3] - contours_par_for_subprocess = list_all_par[4] - boxes_coord_for_subprocess = list_all_par[5] - indexes_for_subprocess = list_all_par[6] - for j in range(len(slopes_for_sub_process)): - slopes.append(slopes_for_sub_process[j]) - all_found_textline_polygons.append(polys_for_sub_process[j]) - boxes.append(boxes_for_sub_process[j]) - all_found_text_regions.append(contours_for_subprocess[j]) - all_found_text_regions_par.append(contours_par_for_subprocess[j]) - all_box_coord.append(boxes_coord_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - for i in range(num_cores): - processes[i].join() - self.logger.debug('slopes %s', slopes) + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_slopes_new, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + logger=self.logger, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + plotter=self.plotter,), + zip(boxes, contours, contours_par, range(len(contours_par)))) + #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") - return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con + return tuple(zip(*results)) def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + if not len(contours): + return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") num_cores = cpu_count() - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(boxes), num_cores + 1) - indexes_by_text_con = np.array(range(len(contours_par))) - - for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new_curved, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_text_con_per_process, slope_deskew))) - - for i in range(num_cores): - processes[i].start() - - slopes = [] - all_found_textline_polygons = [] - all_found_text_regions = [] - all_found_text_regions_par = [] - boxes = [] - all_box_coord = [] - all_index_text_con = [] - - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - polys_for_sub_process = list_all_par[0] - boxes_for_sub_process = list_all_par[1] - contours_for_subprocess = list_all_par[2] - contours_par_for_subprocess = list_all_par[3] - boxes_coord_for_subprocess = list_all_par[4] - indexes_for_subprocess = list_all_par[5] - slopes_for_sub_process = list_all_par[6] - for j in range(len(polys_for_sub_process)): - slopes.append(slopes_for_sub_process[j]) - all_found_textline_polygons.append(polys_for_sub_process[j][::-1]) - boxes.append(boxes_for_sub_process[j]) - all_found_text_regions.append(contours_for_subprocess[j]) - all_found_text_regions_par.append(contours_par_for_subprocess[j]) - all_box_coord.append(boxes_coord_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - - for i in range(num_cores): - processes[i].join() - # print(slopes,'slopes') - return all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con, slopes - - def do_work_of_slopes_new_curved(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_r_con_per_pro, slope_deskew): - self.logger.debug("enter do_work_of_slopes_new_curved") - slopes_per_each_subprocess = [] - bounding_box_of_textregion_per_each_subprocess = [] - textlines_rectangles_per_each_subprocess = [] - contours_textregion_per_each_subprocess = [] - contours_textregion_par_per_each_subprocess = [] - all_box_coord_per_process = [] - index_by_text_region_contours = [] - - textline_cnt_separated = np.zeros(textline_mask_tot_ea.shape) - - for mv in range(len(boxes_text)): - - all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - all_text_region_raw = all_text_region_raw.astype(np.uint8) - img_int_p = all_text_region_raw[:, :] - - # img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) - # plt.imshow(img_int_p) - # plt.show() - - if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: - slopes_per_each_subprocess.append(0) - slope_for_all = [slope_deskew][0] - else: - try: - textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008) - y_diff_mean = find_contours_mean_y_diff(textline_con_fil) - if self.isNaN(y_diff_mean): - slope_for_all = MAX_SLOPE - else: - sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) - img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter) - - if abs(slope_for_all) < 0.5: - slope_for_all = [slope_deskew][0] - - except Exception as why: - self.logger.error(why) - slope_for_all = MAX_SLOPE - - if slope_for_all == MAX_SLOPE: - slope_for_all = [slope_deskew][0] - slopes_per_each_subprocess.append(slope_for_all) - - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - _, crop_coor = crop_image_inside_box(boxes_text[mv], image_page_rotated) - - if abs(slope_for_all) < 45: - # all_box_coord.append(crop_coor) - textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) - cnt_o_t_max = contours_par_per_process[mv] - x, y, w, h = cv2.boundingRect(cnt_o_t_max) - mask_biggest = np.zeros(mask_texts_only.shape) - mask_biggest = cv2.fillPoly(mask_biggest, pts=[cnt_o_t_max], color=(1, 1, 1)) - mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] - textline_biggest_region = mask_biggest * textline_mask_tot_ea - - # print(slope_for_all,'slope_for_all') - textline_rotated_separated = separate_lines_new2(textline_biggest_region[y : y + h, x : x + w], 0, num_col, slope_for_all, plotter=self.plotter) - - # new line added - ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) - textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0 - # till here - - textline_cnt_separated[y : y + h, x : x + w] = textline_rotated_separated - textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated - - # plt.imshow(textline_region_in_image) - # plt.show() - # plt.imshow(textline_cnt_separated) - # plt.show() - - pixel_img = 1 - cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img) - - textlines_cnt_per_region = [] - for jjjj in range(len(cnt_textlines_in_image)): - mask_biggest2 = np.zeros(mask_texts_only.shape) - mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1)) - if num_col + 1 == 1: - mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5) - else: - mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) - - pixel_img = 1 - mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) - cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) - try: - textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) - except Exception as why: - self.logger.error(why) - else: - add_boxes_coor_into_textlines = True - textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], add_boxes_coor_into_textlines) - add_boxes_coor_into_textlines = False - # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') - - textlines_rectangles_per_each_subprocess.append(textlines_cnt_per_region) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - contours_textregion_per_each_subprocess.append(contours_per_process[mv]) - contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) - all_box_coord_per_process.append(crop_coor) - - queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess]) - def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): - self.logger.debug('enter do_work_of_slopes_new_light') - slopes_per_each_subprocess = [] - bounding_box_of_textregion_per_each_subprocess = [] - textlines_rectangles_per_each_subprocess = [] - contours_textregion_per_each_subprocess = [] - contours_textregion_par_per_each_subprocess = [] - all_box_coord_per_process = [] - index_by_text_region_contours = [] - for mv in range(len(boxes_text)): - _, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated) - mask_textline = np.zeros((textline_mask_tot_ea.shape)) - mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) - all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] - all_text_region_raw=all_text_region_raw.astype(np.uint8) - - slopes_per_each_subprocess.append([slope_deskew][0]) - mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) - mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - - - if self.textline_light: - all_text_region_raw = np.copy(textline_mask_tot_ea) - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) - cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - else: - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) - - textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - - contours_textregion_per_each_subprocess.append(contours_per_process[mv]) - contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) - all_box_coord_per_process.append(crop_coor) - queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - - def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): - self.logger.debug('enter do_work_of_slopes_new') - slopes_per_each_subprocess = [] - bounding_box_of_textregion_per_each_subprocess = [] - textlines_rectangles_per_each_subprocess = [] - contours_textregion_per_each_subprocess = [] - contours_textregion_par_per_each_subprocess = [] - all_box_coord_per_process = [] - index_by_text_region_contours = [] - for mv in range(len(boxes_text)): - _, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated) - mask_textline = np.zeros((textline_mask_tot_ea.shape)) - mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) - all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] - all_text_region_raw=all_text_region_raw.astype(np.uint8) - img_int_p=all_text_region_raw[:,:]#self.all_text_region_raw[mv] - img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) - - if img_int_p.shape[0]/img_int_p.shape[1]<0.1: - slopes_per_each_subprocess.append(0) - slope_for_all = [slope_deskew][0] - all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], 0) - textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - else: - try: - textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) - y_diff_mean = find_contours_mean_y_diff(textline_con_fil) - if self.isNaN(y_diff_mean): - slope_for_all = MAX_SLOPE - else: - sigma_des = int(y_diff_mean * (4.0 / 40.0)) - if sigma_des < 1: - sigma_des = 1 - img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter) - if abs(slope_for_all) <= 0.5: - slope_for_all = [slope_deskew][0] - except Exception as why: - self.logger.error(why) - slope_for_all = MAX_SLOPE - if slope_for_all == MAX_SLOPE: - slope_for_all = [slope_deskew][0] - slopes_per_each_subprocess.append(slope_for_all) - mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) - mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - - # plt.imshow(mask_only_con_region) - # plt.show() - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - - ##plt.imshow(textline_mask_tot_ea) - ##plt.show() - ##plt.imshow(all_text_region_raw) - ##plt.show() - ##plt.imshow(mask_only_con_region) - ##plt.show() - - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv]) - - textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) - - contours_textregion_per_each_subprocess.append(contours_per_process[mv]) - contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) - all_box_coord_per_process.append(crop_coor) - queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_slopes_new_curved, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + mask_texts_only=mask_texts_only, + num_col=num_col, + scale_par=scale_par, + slope_deskew=slope_deskew, + logger=self.logger, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + plotter=self.plotter,), + zip(boxes, contours, contours_par, range(len(contours_par)))) + #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + self.logger.debug("exit get_slopes_and_deskew_new_curved") + return tuple(zip(*results)) def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') @@ -1923,6 +1623,7 @@ class Eynollah: prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) + self.logger.debug('exit textline_contours') return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') @@ -1959,6 +1660,7 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) + self.logger.debug('exit do_work_of_slopes') def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_extract_images_only") @@ -2069,6 +1771,7 @@ class Eynollah: polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + self.logger.debug("exit get_regions_extract_images_only") return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): @@ -2146,6 +1849,7 @@ class Eynollah: #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) + self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2269,9 +1973,11 @@ class Eynollah: #plt.imshow(textline_mask_tot_ea) #plt.show() #print("inside 4 ", time.time()-t_in) + self.logger.debug("exit get_regions_light_v") return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) + self.logger.debug("exit get_regions_light_v") return None, erosion_hurts, None, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): @@ -2392,6 +2098,7 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) + self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml except: @@ -2461,6 +2168,7 @@ class Eynollah: text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) erosion_hurts = True + self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2633,6 +2341,7 @@ class Eynollah: for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) + self.logger.debug("exit do_order_of_regions_full_layout") return order_text_new, id_of_texts_tot def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2743,6 +2452,7 @@ class Eynollah: for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) + self.logger.debug("exit do_order_of_regions_no_full_layout") return order_text_new, id_of_texts_tot def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, num_col_classifier): layout_org = np.copy(layout) @@ -5051,12 +4761,12 @@ class Eynollah: if not self.curved_line: if self.light_version: if self.textline_light: - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ @@ -5074,17 +4784,17 @@ class Eynollah: else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \ + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \ + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index c5d56b8..65331c2 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,10 +1,11 @@ +from functools import partial +from multiprocessing import cpu_count, Pool import cv2 import numpy as np from shapely import geometry from .rotate import rotate_image, rotation_image_new -from multiprocessing import Process, Queue, cpu_count -from multiprocessing import Pool + def contours_in_same_horizon(cy_main_hor): X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) @@ -29,7 +30,6 @@ def find_contours_mean_y_diff(contours_main): def get_text_region_boxes_by_given_contours(contours): - kernel = np.ones((5, 5), np.uint8) boxes = [] contours_new = [] @@ -144,73 +144,11 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): return contours_imgs -def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first): - cnts_org_per_each_subprocess = [] - index_by_text_region_contours = [] - for mv in range(len(contours_per_process)): - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contours_per_process[mv]], color=(1, 1, 1)) - - img_copy = rotation_image_new(img_copy, -slope_first) - - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) - cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - - - cnts_org_per_each_subprocess.append(cont_int[0]) - - queue_of_all_params.put([ cnts_org_per_each_subprocess, index_by_text_region_contours]) - - -def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): - - num_cores = cpu_count() - queue_of_all_params = Queue() - - processes = [] - nh = np.linspace(0, len(cnts), num_cores + 1) - indexes_by_text_con = np.array(range(len(cnts))) - for i in range(num_cores): - contours_per_process = cnts[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img,slope_first ))) - for i in range(num_cores): - processes[i].start() - cnts_org = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - contours_for_sub_process = list_all_par[0] - indexes_for_sub_process = list_all_par[1] - for j in range(len(contours_for_sub_process)): - cnts_org.append(contours_for_sub_process[j]) - all_index_text_con.append(indexes_for_sub_process[j]) - for i in range(num_cores): - processes[i].join() - - print(all_index_text_con) - return cnts_org -def loop_contour_image(index_l, cnts,img, slope_first): +def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1)) - - # plt.imshow(img_copy) - # plt.show() + img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) - # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') - # plt.imshow(img_copy) - # plt.show() img_copy = img_copy.astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) @@ -220,17 +158,22 @@ def loop_contour_image(index_l, cnts,img, slope_first): cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - # print(np.shape(cont_int[0])) - return cont_int[0] -def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): - cnts_org = [] - # print(cnts,'cnts') - with Pool(cpu_count()) as p: - cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) - - return cnts_org + return cont_int[0], index_r_con + +def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): + if not len(cnts): + return [], [] + num_cores = cpu_count() + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_work_of_contours_in_image, + img=img, + slope_first=slope_first, + ), + zip(cnts, range(len(cnts)))) + return tuple(zip(*results)) def get_textregion_contours_in_org_image(cnts, img, slope_first): @@ -292,69 +235,40 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): return cnts_org -def return_list_of_contours_with_desired_order(ls_cons, sorted_indexes): - return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] -def do_back_rotation_and_get_cnt_back(queue_of_all_params, contours_par_per_process,indexes_r_con_per_pro, img, slope_first): - contours_textregion_per_each_subprocess = [] - index_by_text_region_contours = [] - for mv in range(len(contours_par_per_process)): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) +def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first): + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) - img_copy = rotation_image_new(img_copy, -slope_first) + img_copy = rotation_image_new(img_copy, -slope_first) - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) - cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) - cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - # print(np.shape(cont_int[0])) - contours_textregion_per_each_subprocess.append(cont_int[0]*6) - index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) - - queue_of_all_params.put([contours_textregion_per_each_subprocess, index_by_text_region_contours]) + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + # print(np.shape(cont_int[0])) + return cont_int[0], index_r_con def get_textregion_contours_in_org_image_light(cnts, img, slope_first): - num_cores = cpu_count() - queue_of_all_params = Queue() - processes = [] - nh = np.linspace(0, len(cnts), num_cores + 1) - indexes_by_text_con = np.array(range(len(cnts))) - - h_o = img.shape[0] - w_o = img.shape[1] - - img = cv2.resize(img, (int(img.shape[1]/6.), int(img.shape[0]/6.)), interpolation=cv2.INTER_NEAREST) + if not len(cnts): + return [] + img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) #cnts = cnts/2 - cnts = [(i/ 6).astype(np.int32) for i in cnts] - - for i in range(num_cores): - contours_par_per_process = cnts[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - processes.append(Process(target=do_back_rotation_and_get_cnt_back, args=(queue_of_all_params, contours_par_per_process, indexes_text_con_per_process, img, slope_first))) - - for i in range(num_cores): - processes[i].start() - - cnts_org = [] - all_index_text_con = [] - for i in range(num_cores): - list_all_par = queue_of_all_params.get(True) - contours_for_subprocess = list_all_par[0] - indexes_for_subprocess = list_all_par[1] - for j in range(len(contours_for_subprocess)): - cnts_org.append(contours_for_subprocess[j]) - all_index_text_con.append(indexes_for_subprocess[j]) - for i in range(num_cores): - processes[i].join() - - cnts_org = return_list_of_contours_with_desired_order(cnts_org, all_index_text_con) - - return cnts_org + cnts = [(i/6).astype(np.int) for i in cnts] + num_cores = cpu_count() + with Pool(processes=num_cores) as pool: + results = pool.starmap( + partial(do_back_rotation_and_get_cnt_back, + img=img, + slope_first=slope_first, + ), + zip(cnts, range(len(cnts)))) + contours, indexes = tuple(zip(*results)) + return [i*6 for i in contours] def return_contours_of_interested_textline(region_pre_p, pixel): diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 36a1b01..922fa14 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1,22 +1,23 @@ +import os from functools import partial +from multiprocessing import Pool, cpu_count import numpy as np import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d -import os -from multiprocessing import Process, Queue, cpu_count -from multiprocessing import Pool from .rotate import rotate_image +from .resize import resize_image from .contour import ( return_parent_contours, filter_contours_area_of_image_tables, return_contours_of_image, - filter_contours_area_of_image + filter_contours_area_of_image, + return_contours_of_interested_textline, + find_contours_mean_y_diff, ) -from .is_nan import isNaN from . import ( find_num_col_deskew, - isNaN, + crop_image_inside_box, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): @@ -1249,13 +1250,13 @@ def separate_lines_new_inside_tiles(img_path, thetha): forest.append(peaks_neg[i + 1]) if diff_peaks[i] > cut_off: # print(forest[np.argmin(z[forest]) ] ) - if not isNaN(forest[np.argmin(z[forest])]): + if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) forest = [] forest.append(peaks_neg[i + 1]) if i == (len(peaks_neg) - 1): # print(print(forest[np.argmin(z[forest]) ] )) - if not isNaN(forest[np.argmin(z[forest])]): + if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) diff_peaks_pos = np.abs(np.diff(peaks)) @@ -1272,13 +1273,13 @@ def separate_lines_new_inside_tiles(img_path, thetha): forest.append(peaks[i + 1]) if diff_peaks_pos[i] > cut_off: # print(forest[np.argmin(z[forest]) ] ) - if not isNaN(forest[np.argmax(z[forest])]): + if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) forest = [] forest.append(peaks[i + 1]) if i == (len(peaks) - 1): # print(print(forest[np.argmin(z[forest]) ] )) - if not isNaN(forest[np.argmax(z[forest])]): + if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) # print(len(peaks_neg_true) ,len(peaks_pos_true) ,'lensss') @@ -1658,3 +1659,189 @@ def get_smallest_skew(img, sigma_des, angles, num_cores=1, plotter=None): except: angle = 0 return angle + +def do_work_of_slopes_new( + box_text, contour, contour_par, index_r_con, + textline_mask_tot_ea, image_page_rotated, slope_deskew, + logger, MAX_SLOPE=999, KERNEL=None, plotter=None +): + logger.debug('enter do_work_of_slopes_new') + if KERNEL is None: + KERNEL = np.ones((5, 5), np.uint8) + + x, y, w, h = box_text + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + mask_textline = np.zeros(textline_mask_tot_ea.shape) + mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) + all_text_region_raw = textline_mask_tot_ea * mask_textline + all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8) + img_int_p = all_text_region_raw[:,:] + img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) + + if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: + slope = 0 + slope_for_all = slope_deskew + all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, 0) + else: + try: + textline_con, hierarchy = return_contours_of_image(img_int_p) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) + y_diff_mean = find_contours_mean_y_diff(textline_con_fil) + if np.isnan(y_diff_mean): + slope_for_all = MAX_SLOPE + else: + sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) + img_int_p[img_int_p > 0] = 1 + slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + if abs(slope_for_all) <= 0.5: + slope_for_all = slope_deskew + except Exception as why: + logger.error(why) + slope_for_all = MAX_SLOPE + + if slope_for_all == MAX_SLOPE: + slope_for_all = slope_deskew + slope = slope_for_all + + mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) + mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1)) + + # plt.imshow(mask_only_con_region) + # plt.show() + all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy() + mask_only_con_region = mask_only_con_region[y: y + h, x: x + w] + + ##plt.imshow(textline_mask_tot_ea) + ##plt.show() + ##plt.imshow(all_text_region_raw) + ##plt.show() + ##plt.imshow(mask_only_con_region) + ##plt.show() + + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text) + + return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope + + +def do_work_of_slopes_new_curved( + box_text, contour, contour_par, index_r_con, + textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, + logger, MAX_SLOPE=999, KERNEL=None, plotter=None +): + logger.debug("enter do_work_of_slopes_new_curved") + if KERNEL is None: + KERNEL = np.ones((5, 5), np.uint8) + + x, y, w, h = box_text + all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].astype(np.uint8) + img_int_p = all_text_region_raw[:, :] + + # img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) + # plt.imshow(img_int_p) + # plt.show() + + if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: + slope = 0 + slope_for_all = slope_deskew + else: + try: + textline_con, hierarchy = return_contours_of_image(img_int_p) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008) + y_diff_mean = find_contours_mean_y_diff(textline_con_fil) + if np.isnan(y_diff_mean): + slope_for_all = MAX_SLOPE + else: + sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) + img_int_p[img_int_p > 0] = 1 + slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + if abs(slope_for_all) < 0.5: + slope_for_all = slope_deskew + except Exception as why: + logger.error(why) + slope_for_all = MAX_SLOPE + + if slope_for_all == MAX_SLOPE: + slope_for_all = slope_deskew + slope = slope_for_all + + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + + if abs(slope_for_all) < 45: + textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) + x, y, w, h = cv2.boundingRect(contour_par) + mask_biggest = np.zeros(mask_texts_only.shape) + mask_biggest = cv2.fillPoly(mask_biggest, pts=[contour_par], color=(1, 1, 1)) + mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] + textline_biggest_region = mask_biggest * textline_mask_tot_ea + + # print(slope_for_all,'slope_for_all') + textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all, + plotter=plotter) + + # new line added + ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) + textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0 + # till here + + textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated + + # plt.imshow(textline_region_in_image) + # plt.show() + + pixel_img = 1 + cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img) + + textlines_cnt_per_region = [] + for jjjj in range(len(cnt_textlines_in_image)): + mask_biggest2 = np.zeros(mask_texts_only.shape) + mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1)) + if num_col + 1 == 1: + mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5) + else: + mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) + + pixel_img = 1 + mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) + cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) + try: + textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) + except Exception as why: + logger.error(why) + else: + textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) + # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') + + return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope + +def do_work_of_slopes_new_light( + box_text, contour, contour_par, index_r_con, + textline_mask_tot_ea, image_page_rotated, slope_deskew, + logger +): + logger.debug('enter do_work_of_slopes_new_light') + + x, y, w, h = box_text + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + mask_textline = np.zeros(textline_mask_tot_ea.shape) + mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) + all_text_region_raw = textline_mask_tot_ea * mask_textline + all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8) + + mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) + mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1)) + + if self.textline_light: + all_text_region_raw = np.copy(textline_mask_tot_ea) + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) + cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, + max_area=1, min_area=0.00001) + else: + all_text_region_raw = np.copy(textline_mask_tot_ea[y: y + h, x: x + w]) + mask_only_con_region = mask_only_con_region[y: y + h, x: x + w] + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) + + return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope From 7e9ee90e6ec5e3e8455e75f24ec4a0c4e4b95d70 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 12:18:29 +0000 Subject: [PATCH 086/107] switch from (ad-hoc) mp.Pool to (attribute) concurrent.futures.ProcessPoolExecutor --- src/eynollah/eynollah.py | 88 ++++++++++++---------------- src/eynollah/utils/contour.py | 31 ++++------ src/eynollah/utils/separate_lines.py | 67 ++++++++++++--------- 3 files changed, 91 insertions(+), 95 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index ae292c6..8c92b92 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -13,7 +13,8 @@ import time import warnings from functools import partial from pathlib import Path -from multiprocessing import Pool, cpu_count +from multiprocessing import cpu_count +from concurrent.futures import ProcessPoolExecutor import gc from ocrd_utils import getLogger import cv2 @@ -251,6 +252,8 @@ class Eynollah: textline_light = self.textline_light, pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') + # for parallelization of CPU-intensive tasks: + self.executor = ProcessPoolExecutor(max_workers=cpu_count()) self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" @@ -1518,21 +1521,15 @@ class Eynollah: if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") - if len(contours)>15: - num_cores = cpu_count() - else: - num_cores = 1 - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_slopes_new_light, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - logger=self.logger, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - plotter=self.plotter,), - zip(boxes, contours, contours_par, range(len(contours_par)))) + results = self.executor.map(partial(do_work_of_slopes_new_light, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) @@ -1541,18 +1538,15 @@ class Eynollah: if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_slopes_new, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - logger=self.logger, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - plotter=self.plotter,), - zip(boxes, contours, contours_par, range(len(contours_par)))) + results = self.executor.map(partial(do_work_of_slopes_new, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) @@ -1561,21 +1555,18 @@ class Eynollah: if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_slopes_new_curved, - textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, - mask_texts_only=mask_texts_only, - num_col=num_col, - scale_par=scale_par, - slope_deskew=slope_deskew, - logger=self.logger, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - plotter=self.plotter,), - zip(boxes, contours, contours_par, range(len(contours_par)))) + results = self.executor.map(partial(do_work_of_slopes_new_curved, + textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, + mask_texts_only=mask_texts_only, + num_col=num_col, + scale_par=scale_par, + slope_deskew=slope_deskew, + MAX_SLOPE=MAX_SLOPE, + KERNEL=KERNEL, + logger=self.logger, + plotter=self.plotter,), + boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) @@ -1643,7 +1634,8 @@ class Eynollah: y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 - slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, plotter=self.plotter) + slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, + map=self.executor.map, logger=self.logger, plotter=self.plotter) except Exception as why: self.logger.error(why) slope_corresponding_textregion = MAX_SLOPE @@ -2932,10 +2924,8 @@ class Eynollah: def run_deskew(self, textline_mask_tot_ea): #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') - sigma = 2 - main_page_deskew = True - n_total_angles = 30 - slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, n_total_angles, main_page_deskew, plotter=self.plotter) + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, + map=self.executor.map, logger=self.logger, plotter=self.plotter) slope_first = 0 if self.plotter: @@ -4748,7 +4738,7 @@ class Eynollah: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 65331c2..e47c5e7 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,5 +1,4 @@ from functools import partial -from multiprocessing import cpu_count, Pool import cv2 import numpy as np from shapely import geometry @@ -162,17 +161,14 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): return cont_int[0], index_r_con -def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): +def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): if not len(cnts): return [], [] - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_work_of_contours_in_image, - img=img, - slope_first=slope_first, - ), - zip(cnts, range(len(cnts)))) + results = map(partial(do_work_of_contours_in_image, + img=img, + slope_first=slope_first, + ), + cnts, range(len(cnts))) return tuple(zip(*results)) def get_textregion_contours_in_org_image(cnts, img, slope_first): @@ -252,21 +248,18 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first # print(np.shape(cont_int[0])) return cont_int[0], index_r_con -def get_textregion_contours_in_org_image_light(cnts, img, slope_first): +def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map): if not len(cnts): return [] img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) #cnts = cnts/2 cnts = [(i/6).astype(np.int) for i in cnts] - num_cores = cpu_count() - with Pool(processes=num_cores) as pool: - results = pool.starmap( - partial(do_back_rotation_and_get_cnt_back, - img=img, - slope_first=slope_first, - ), - zip(cnts, range(len(cnts)))) + results = map(partial(do_back_rotation_and_get_cnt_back, + img=img, + slope_first=slope_first, + ), + cnts, range(len(cnts))) contours, indexes = tuple(zip(*results)) return [i*6 for i in contours] diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 922fa14..48e1c5b 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1,6 +1,6 @@ import os +from logging import getLogger from functools import partial -from multiprocessing import Pool, cpu_count import numpy as np import cv2 from scipy.signal import find_peaks @@ -1464,7 +1464,9 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest return contours_rotated_clean -def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): +def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): + if logger is None: + logger = getLogger(__package__) if num_col == 1: num_patches = int(img_path.shape[1] / 200.0) @@ -1572,18 +1574,20 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def do_image_rotation(angle, img, sigma_des): - print(f"rotating image by {angle}") +def do_image_rotation(angle, img, sigma_des, logger=None): + if logger is None: + logger = getLogger(__package__) img_rot = rotate_image(img, angle) img_rot[img_rot!=0] = 1 try: var = find_num_col_deskew(img_rot, sigma_des, 20.3) except: + logger.exception("cannot determine variance for angle %.2f°", angle) var = 0 return var -def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): - num_cores = cpu_count() +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, + main_page=False, logger=None, plotter=None, map=map): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1615,16 +1619,16 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angles = np.array([-45, 0, 45, 90,]) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) elif main_page: #plt.imshow(img_resized) #plt.show() angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=11 if abs(angle) > early_slope_edge: @@ -1632,11 +1636,11 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals angles = np.linspace(-90, -12, n_tot_angles) else: angles = np.linspace(90, 12, n_tot_angles) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) else: angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) early_slope_edge=22 if abs(angle) > early_slope_edge: @@ -1644,30 +1648,35 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) else: angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) - angle = get_smallest_skew(img_resized, sigma_des, angles, num_cores=num_cores, plotter=plotter) + angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) return angle -def get_smallest_skew(img, sigma_des, angles, num_cores=1, plotter=None): - with Pool(processes=num_cores) as pool: - results = pool.map(partial(do_image_rotation, img=img, sigma_des=sigma_des), angles) +def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): + if logger is None: + logger = getLogger(__package__) + results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles)) if plotter: plotter.save_plot_of_rotation_angle(angles, results) try: var_res = np.array(results) + assert var_res.any() angle = angles[np.argmax(var_res)] except: + logger.exception("cannot determine best angle among %s", str(angles)) angle = 0 return angle def do_work_of_slopes_new( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, slope_deskew, - logger, MAX_SLOPE=999, KERNEL=None, plotter=None + logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): - logger.debug('enter do_work_of_slopes_new') if KERNEL is None: KERNEL = np.ones((5, 5), np.uint8) + if logger is None: + logger = getLogger(__package__) + logger.debug('enter do_work_of_slopes_new') x, y, w, h = box_text _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) @@ -1693,11 +1702,11 @@ def do_work_of_slopes_new( else: sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + slope_for_all = return_deskew_slop(img_int_p, sigma_des, logger=logger, plotter=plotter) if abs(slope_for_all) <= 0.5: slope_for_all = slope_deskew - except Exception as why: - logger.error(why) + except: + logger.exception("cannot determine angle of contours") slope_for_all = MAX_SLOPE if slope_for_all == MAX_SLOPE: @@ -1728,11 +1737,13 @@ def do_work_of_slopes_new( def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, - logger, MAX_SLOPE=999, KERNEL=None, plotter=None + logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): - logger.debug("enter do_work_of_slopes_new_curved") if KERNEL is None: KERNEL = np.ones((5, 5), np.uint8) + if logger is None: + logger = getLogger(__package__) + logger.debug("enter do_work_of_slopes_new_curved") x, y, w, h = box_text all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].astype(np.uint8) @@ -1755,11 +1766,11 @@ def do_work_of_slopes_new_curved( else: sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) img_int_p[img_int_p > 0] = 1 - slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter) + slope_for_all = return_deskew_slop(img_int_p, sigma_des, logger=logger, plotter=plotter) if abs(slope_for_all) < 0.5: slope_for_all = slope_deskew - except Exception as why: - logger.error(why) + except: + logger.exception("cannot determine angle of contours") slope_for_all = MAX_SLOPE if slope_for_all == MAX_SLOPE: @@ -1778,7 +1789,7 @@ def do_work_of_slopes_new_curved( # print(slope_for_all,'slope_for_all') textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all, - plotter=plotter) + logger=logger, plotter=plotter) # new line added ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) @@ -1818,8 +1829,10 @@ def do_work_of_slopes_new_curved( def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, slope_deskew, - logger + logger=None ): + if logger is None: + logger = getLogger(__package__) logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text From 3b70b11ea6e18b00e70ec0169ebe4963d91fd364 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:36:20 +0000 Subject: [PATCH 087/107] avoid deskewing patches if binary-empty --- src/eynollah/utils/separate_lines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 48e1c5b..788a510 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1515,9 +1515,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] img_xline = img_patch_ineterst[:, index_x_d:index_x_u] - sigma = 2 try: - slope_xline = return_deskew_slop(img_xline, sigma, plotter=plotter) + assert img_xline.any() + slope_xline = return_deskew_slop(img_xline, 2, logger=logger, plotter=plotter) except: slope_xline = 0 From 9270ea4550a39a07699338693962c6dedf146097 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:37:20 +0000 Subject: [PATCH 088/107] annotate region angles in PAGE --- src/eynollah/writer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 496b3db..5f282f2 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -60,6 +60,7 @@ class EynollahXmlWriter(): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) marginal_region.add_TextLine(textline) + marginal_region.set_orientation(slopes_marginals[marginal_idx]) points_co = '' for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): if not (self.curved_line or self.textline_light): @@ -102,6 +103,7 @@ class EynollahXmlWriter(): if ocr_all_textlines_textregion: textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) text_region.add_TextLine(textline) + text_region.set_orientation(slopes[region_idx]) region_bboxes = all_box_coord[region_idx] points_co = '' for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): From b9ca7a6191f672c4b9f0da0179ff0d49cc3d63be Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:44:54 +0000 Subject: [PATCH 089/107] log num_cols-dependent resizing --- src/eynollah/eynollah.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8c92b92..8b8808c 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1861,6 +1861,8 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + self.logger.debug("resized to %dx%d for %d cols", + img_resized.shape[1], img_resized.shape[0], num_col_classifier) prediction_regions_org = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) @@ -1873,6 +1875,8 @@ class Eynollah: else: new_h = (900+ (num_col_classifier-3)*100) img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) + self.logger.debug("resized to %dx%d (new_h=%d) for %d cols", + img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) prediction_regions_org = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) From b4b0890294d2dc1fbf6ca84794587d5185a7546f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 18:45:18 +0000 Subject: [PATCH 090/107] add option to overwrite output xml, but skip by default if file exists --- src/eynollah/cli.py | 9 ++++++++- src/eynollah/eynollah.py | 13 +++++++++++-- src/eynollah/writer.py | 6 +++--- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 5f4b5a4..a9b5765 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -97,6 +97,12 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) type=click.Path(exists=True, file_okay=False), required=True, ) +@click.option( + "--overwrite", + "-O", + help="overwrite (instead of skipping) if output xml exists", + is_flag=True, +) @click.option( "--dir_in", "-di", @@ -253,7 +259,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): +def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): initLogging() if log_level: getLogger('eynollah').setLevel(getLevelName(log_level)) @@ -273,6 +279,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s sys.exit(1) eynollah = Eynollah( image_filename=image, + overwrite=overwrite, dir_out=out, dir_in=dir_in, dir_models=model, diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8b8808c..8883f19 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -165,6 +165,7 @@ class Eynollah: image_filename=None, image_pil=None, image_filename_stem=None, + overwrite=False, dir_out=None, dir_in=None, dir_of_cropped_images=None, @@ -203,6 +204,7 @@ class Eynollah: if override_dpi: self.dpi = override_dpi self.image_filename = image_filename + self.overwrite = overwrite self.dir_out = dir_out self.dir_in = dir_in self.dir_of_all = dir_of_all @@ -360,6 +362,7 @@ class Eynollah: curved_line=self.curved_line, textline_light = self.textline_light, pcgts=self.pcgts) + def imread(self, grayscale=False, uint8=True): key = 'img' if grayscale: @@ -4460,8 +4463,14 @@ class Eynollah: if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) #print("text region early -11 in %.1fs", time.time() - t0) - - + + if os.path.exists(self.writer.output_filename): + if self.overwrite: + self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename) + else: + self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) + continue + if self.extract_only_images: img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 5f282f2..dc5a5dc 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -28,6 +28,7 @@ class EynollahXmlWriter(): self.counter = EynollahIdCounter() self.dir_out = dir_out self.image_filename = image_filename + self.output_filename = os.path.join(self.dir_out, self.image_filename_stem) + ".xml" self.curved_line = curved_line self.textline_light = textline_light self.pcgts = pcgts @@ -163,9 +164,8 @@ class EynollahXmlWriter(): coords.set_points(points_co[:-1]) def write_pagexml(self, pcgts): - out_fname = os.path.join(self.dir_out, self.image_filename_stem) + ".xml" - self.logger.info("output filename: '%s'", out_fname) - with open(out_fname, 'w') as f: + self.logger.info("output filename: '%s'", self.output_filename) + with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines): From dcaf79628371d03e2eed790c792930ba30079545 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 23:07:56 +0000 Subject: [PATCH 091/107] change polarity of orientation angle (PAGE schema required cw=positive) --- src/eynollah/writer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index dc5a5dc..66747b1 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -61,7 +61,7 @@ class EynollahXmlWriter(): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) marginal_region.add_TextLine(textline) - marginal_region.set_orientation(slopes_marginals[marginal_idx]) + marginal_region.set_orientation(-slopes_marginals[marginal_idx]) points_co = '' for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): if not (self.curved_line or self.textline_light): @@ -104,7 +104,7 @@ class EynollahXmlWriter(): if ocr_all_textlines_textregion: textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) text_region.add_TextLine(textline) - text_region.set_orientation(slopes[region_idx]) + text_region.set_orientation(-slopes[region_idx]) region_bboxes = all_box_coord[region_idx] points_co = '' for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): From e9c0d716f62659466c66ae9c8b634cd22634e181 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 23:48:56 +0000 Subject: [PATCH 092/107] CI: install optional dependencies, too --- .github/workflows/test-eynollah.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 8a6941f..479c371 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -36,7 +36,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install . + pip install .[OCR,plotting] pip install -r requirements-test.txt - name: Test with pytest run: make test From 0e8c561618ce267259f8e9b354f151e4b2fd040d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 14 Dec 2024 00:24:29 +0100 Subject: [PATCH 093/107] debugging issues --- src/eynollah/eynollah.py | 831 ++++++++++++++------------- src/eynollah/utils/separate_lines.py | 6 +- 2 files changed, 419 insertions(+), 418 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8883f19..443b5e9 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -195,6 +195,8 @@ class Eynollah: logger=None, pcgts=None, ): + if skip_layout_and_reading_order: + textline_light = True self.light_version = light_version if not dir_in: if image_pil: @@ -1512,7 +1514,7 @@ class Eynollah: textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] all_found_textline_polygons.append(textlines_ins) - slopes.append(0) + slopes.append(slope_deskew) _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) @@ -1527,11 +1529,8 @@ class Eynollah: results = self.executor.map(partial(do_work_of_slopes_new_light, textline_mask_tot_ea=textline_mask_tot, image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew, - MAX_SLOPE=MAX_SLOPE, - KERNEL=KERNEL, - logger=self.logger, - plotter=self.plotter,), + slope_deskew=slope_deskew,textline_light=self.textline_light, + logger=self.logger,), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") @@ -4245,7 +4244,7 @@ class Eynollah: - def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline): + def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): ###contours_txtline_of_all_textregions = [] @@ -4282,8 +4281,9 @@ class Eynollah: contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) text_con_org.pop(ind_u_a_trs) + contours_only_text_parent_d_ordered.pop(ind_u_a_trs) - return contours, text_con_org, contours_textline + return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): @@ -4470,7 +4470,7 @@ class Eynollah: else: self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) continue - + if self.extract_only_images: img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) @@ -4487,12 +4487,9 @@ class Eynollah: continue else: return pcgts - - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) - t1 = time.time() if self.skip_layout_and_reading_order: + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) @@ -4522,467 +4519,471 @@ class Eynollah: polygons_lines_xml = [] contours_tables = [] ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if self.dir_in: + self.writer.write_pagexml(pcgts) continue else: return pcgts + if not self.extract_only_images and not self.skip_layout_and_reading_order: + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) + #print("text region early -1 in %.1fs", time.time() - t0) + t1 = time.time() + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - #plt.imshow(table_prediction) - #plt.show() + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + #plt.imshow(table_prediction) + #plt.show() - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - elif num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] - if num_col_classifier == 1: - img_w_new = 2000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 2400 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ - self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ - self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - - - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + elif num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + else: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - - if not len(contours_only_text_parent): - # stop early - empty_marginals = [[]] * len(polygons_of_marginals) - if self.full_layout: - pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) - else: - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) - self.logger.info("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + if not len(contours_only_text_parent): + # stop early + empty_marginals = [[]] * len(polygons_of_marginals) + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) + else: + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - if self.textline_light: - #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) - contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) + else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) + scale_param = 1 + textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - else: - scale_param = 1 - textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - fun = check_any_text_region_in_model_one_is_main_or_header_light - else: - fun = check_any_text_region_in_model_one_is_main_or_header - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ - all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ - fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, - all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + fun = check_any_text_region_in_model_one_is_main_or_header_light + else: + fun = check_any_text_region_in_model_one_is_main_or_header + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ + all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ + fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 + + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + else: + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() - if self.full_layout: + if self.full_layout: - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - if self.ocr: - ocr_all_textlines = [] - else: - ocr_all_textlines = None + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - - if self.ocr: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + + + if self.ocr: + + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + ocr_textline_in_textregion.append(text_ocr) - ocr_textline_in_textregion.append(text_ocr) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + if self.dir_in: + self.writer.write_pagexml(pcgts) self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) - - if self.dir_in: - self.writer.write_pagexml(pcgts) - self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs" % (time.time() - t0)) + #print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 788a510..f037a9f 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1828,7 +1828,7 @@ def do_work_of_slopes_new_curved( def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, + textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, logger=None ): if logger is None: @@ -1845,7 +1845,7 @@ def do_work_of_slopes_new_light( mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1)) - if self.textline_light: + if textline_light: all_text_region_raw = np.copy(textline_mask_tot_ea) all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) @@ -1857,4 +1857,4 @@ def do_work_of_slopes_new_light( all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) - return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope + return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope_deskew From f93c6c288d9525202957da5bb000202a657e6df8 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 14 Dec 2024 02:50:17 +0100 Subject: [PATCH 094/107] function of patch-wise inference with scatter_nd is added --- src/eynollah/eynollah.py | 107 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 443b5e9..28cb330 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1047,6 +1047,110 @@ class Eynollah: #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] return img_scaled_padded#, label_scaled_padded + def do_prediction_new_concept_scatter_nd(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): + self.logger.debug("enter do_prediction_new_concept") + + img_height_model = model.layers[-1].output_shape[1] + img_width_model = model.layers[-1].output_shape[2] + + if not patches: + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / 255.0 + img = resize_image(img, img_height_model, img_width_model) + + label_p_pred = model.predict(img[np.newaxis], verbose=0) + seg = np.argmax(label_p_pred, axis=3)[0] + + if thresholding_for_artificial_class_in_light_version: + #seg_text = label_p_pred[0,:,:,1] + #seg_text[seg_text<0.2] =0 + #seg_text[seg_text>0] =1 + #seg[seg_text==1]=1 + + seg_art = label_p_pred[0,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + seg[seg_art==1]=4 + + + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) + prediction_true = prediction_true.astype(np.uint8) + return prediction_true + + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) + + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) + + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) + ##margin = int(marginal_of_patch_percent * img_height_model) + #width_mid = img_width_model - 2 * margin + #height_mid = img_height_model - 2 * margin + img = img / float(255.0) + + img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + + stride_x = img_width_model - 100 + stride_y = img_height_model - 100 + + one_tensor = tf.ones_like(img) + img_patches = tf.image.extract_patches(images=[img,one_tensor], + sizes=[1, img_height_model, img_width_model, 1], + strides=[1, stride_y, stride_x, 1], + rates=[1, 1, 1, 1], + padding='SAME') + + one_patches = img_patches[1] + img_patches = img_patches[0] + img_patches = tf.squeeze(img_patches) + + img_patches_resh = tf.reshape(img_patches, shape = (img_patches.shape[0]*img_patches.shape[1], img_height_model, img_width_model, 3)) + + pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) + + one_patches = tf.squeeze(one_patches) + one_patches = tf.reshape(one_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model,img_width_model,3]) + + x = tf.range(img.shape[1]) + y = tf.range(img.shape[0]) + x, y = tf.meshgrid(x, y) + indices = tf.stack([y, x], axis=-1) + + indices_patches = tf.image.extract_patches(images=tf.expand_dims(indices, axis=0), sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') + indices_patches = tf.squeeze(indices_patches) + indices_patches = tf.reshape(indices_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model, img_width_model,2]) + + margin_y = int( (img_height_model - stride_y)/2. ) + margin_x = int( (img_width_model - stride_x)/2. ) + + mask_margin = np.zeros((img_height_model, img_width_model)) + + mask_margin[margin_y:img_height_model-margin_y, margin_x:img_width_model-margin_x] = 1 + + indices_patches_array = indices_patches.numpy() + + for i in range(indices_patches_array.shape[0]): + indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin + indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin + + reconstructed = tf.scatter_nd(indices=indices_patches_array, updates=pred_patches, shape=(img.shape[0],img.shape[1],pred_patches.shape[-1])) + reconstructed_argmax = reconstructed.numpy() + + prediction_true = np.argmax(reconstructed_argmax, axis=2) + prediction_true = prediction_true.astype(np.uint8) + + gc.collect() + return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2) + + + + + def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction_new_concept") @@ -4891,7 +4995,7 @@ class Eynollah: all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - print("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.writer.write_pagexml(pcgts) continue @@ -4975,6 +5079,7 @@ class Eynollah: pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + #print("Job done in %.1fs" % (time.time() - t0)) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts From 0ae28f7d3ef33d6bf3650b99bc7646c3234341d1 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 14 Dec 2024 12:15:56 +0000 Subject: [PATCH 095/107] switch from stdlib to loky.ProcessPoolExecutor, ensure shutdown --- requirements.txt | 1 + src/eynollah/eynollah.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index d72df29..ef3fe31 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ scikit-learn >= 0.23.2 tensorflow < 2.13 imutils >= 0.5.3 numba <= 0.58.1 +loky diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 28cb330..8139b11 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -10,11 +10,12 @@ import math import os import sys import time +import atexit import warnings from functools import partial from pathlib import Path from multiprocessing import cpu_count -from concurrent.futures import ProcessPoolExecutor +from loky import ProcessPoolExecutor import gc from ocrd_utils import getLogger import cv2 @@ -257,7 +258,8 @@ class Eynollah: pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') # for parallelization of CPU-intensive tasks: - self.executor = ProcessPoolExecutor(max_workers=cpu_count()) + self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) + atexit.register(self.executor.shutdown) self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" From fbeef79d50412ca15d71766af9c109ee6a16aa10 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 16 Dec 2024 01:11:54 +0100 Subject: [PATCH 096/107] adding scatter_nd inference --- src/eynollah/eynollah.py | 109 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index e802e29..006bfea 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1151,6 +1151,107 @@ class Eynollah: #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] return img_scaled_padded#, label_scaled_padded + + def do_prediction_new_concept_scatter_nd(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): + self.logger.debug("enter do_prediction_new_concept") + + img_height_model = model.layers[-1].output_shape[1] + img_width_model = model.layers[-1].output_shape[2] + + if not patches: + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / 255.0 + img = resize_image(img, img_height_model, img_width_model) + + label_p_pred = model.predict(img[np.newaxis], verbose=0) + seg = np.argmax(label_p_pred, axis=3)[0] + + if thresholding_for_artificial_class_in_light_version: + #seg_text = label_p_pred[0,:,:,1] + #seg_text[seg_text<0.2] =0 + #seg_text[seg_text>0] =1 + #seg[seg_text==1]=1 + + seg_art = label_p_pred[0,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + seg[seg_art==1]=4 + + + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) + prediction_true = prediction_true.astype(np.uint8) + return prediction_true + + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) + + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) + + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) + ##margin = int(marginal_of_patch_percent * img_height_model) + #width_mid = img_width_model - 2 * margin + #height_mid = img_height_model - 2 * margin + img = img / float(255.0) + + img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + + stride_x = img_width_model - 100 + stride_y = img_height_model - 100 + + one_tensor = tf.ones_like(img) + img_patches = tf.image.extract_patches(images=[img,one_tensor], + sizes=[1, img_height_model, img_width_model, 1], + strides=[1, stride_y, stride_x, 1], + rates=[1, 1, 1, 1], + padding='SAME') + + one_patches = img_patches[1] + img_patches = img_patches[0] + img_patches = tf.squeeze(img_patches) + + img_patches_resh = tf.reshape(img_patches, shape = (img_patches.shape[0]*img_patches.shape[1], img_height_model, img_width_model, 3)) + + pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) + + one_patches = tf.squeeze(one_patches) + one_patches = tf.reshape(one_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model,img_width_model,3]) + + x = tf.range(img.shape[1]) + y = tf.range(img.shape[0]) + x, y = tf.meshgrid(x, y) + indices = tf.stack([y, x], axis=-1) + + indices_patches = tf.image.extract_patches(images=tf.expand_dims(indices, axis=0), sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') + indices_patches = tf.squeeze(indices_patches) + indices_patches = tf.reshape(indices_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model, img_width_model,2]) + + margin_y = int( (img_height_model - stride_y)/2. ) + margin_x = int( (img_width_model - stride_x)/2. ) + + mask_margin = np.zeros((img_height_model, img_width_model)) + + mask_margin[margin_y:img_height_model-margin_y, margin_x:img_width_model-margin_x] = 1 + + indices_patches_array = indices_patches.numpy() + + for i in range(indices_patches_array.shape[0]): + indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin + indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin + + reconstructed = tf.scatter_nd(indices=indices_patches_array, updates=pred_patches, shape=(img.shape[0],img.shape[1],pred_patches.shape[-1])) + reconstructed_argmax = reconstructed.numpy() + + prediction_true = np.argmax(reconstructed_argmax, axis=2) + prediction_true = prediction_true.astype(np.uint8) + + gc.collect() + return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2) + def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") @@ -2089,12 +2190,16 @@ class Eynollah: if not self.dir_in: prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + ##prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, model_textline, n_batch_inference=3) + #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + + ###prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, self.model_textline, n_batch_inference=3) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) @@ -2374,14 +2479,17 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + ##prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + ##prediction_regions_page = self.do_prediction_new_concept_scatter_nd(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) + ###prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: @@ -5501,3 +5609,4 @@ class Eynollah: if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) + print("all Job done in %.1fs", time.time() - t0_tot) From 92bfac4b415520f9627ce06721179316998e7ce9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Dec 2024 15:47:21 +0100 Subject: [PATCH 097/107] Provide OCR as an option to process a directory of XML files, incorporating layout and text line coordinates. --- src/eynollah/cli.py | 56 +++++- src/eynollah/eynollah.py | 414 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 459 insertions(+), 11 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index bed0c03..4bf5257 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -1,7 +1,7 @@ import sys import click from ocrd_utils import initLogging, setOverrideLogLevel -from eynollah.eynollah import Eynollah +from eynollah.eynollah import Eynollah, Eynollah_ocr from eynollah.sbb_binarize import SbbBinarizer @click.group() @@ -305,6 +305,60 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s else: pcgts = eynollah.run() eynollah.writer.write_pagexml(pcgts) + + +@main.command() +@click.option( + "--dir_in", + "-di", + help="directory of images", + type=click.Path(exists=True, file_okay=False), +) +@click.option( + "--out", + "-o", + help="directory to write output xml data", + type=click.Path(exists=True, file_okay=False), + required=True, +) +@click.option( + "--dir_xmls", + "-dx", + help="directory of xmls", + type=click.Path(exists=True, file_okay=False), +) +@click.option( + "--model", + "-m", + help="directory of models", + type=click.Path(exists=True, file_okay=False), + required=True, +) +@click.option( + "--tr_ocr", + "-trocr/-notrocr", + is_flag=True, + help="if this parameter set to true, transformer ocr will be applied, otherwise cnn_rnn model.", +) +@click.option( + "--log_level", + "-l", + type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), + help="Override log level globally to this", +) + +def ocr(dir_in, out, dir_xmls, model, tr_ocr, log_level): + if log_level: + setOverrideLogLevel(log_level) + initLogging() + eynollah_ocr = Eynollah_ocr( + dir_xmls=dir_xmls, + dir_in=dir_in, + dir_out=out, + dir_models=model, + tr_ocr=tr_ocr, + ) + eynollah_ocr.run() if __name__ == "__main__": main() diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 006bfea..95033e9 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -41,6 +41,9 @@ import matplotlib.pyplot as plt # use tf1 compatibility for keras backend from tensorflow.compat.v1.keras.backend import set_session from tensorflow.keras import layers +import json +import xml.etree.ElementTree as ET +from tensorflow.keras.layers import StringLookup from .utils.contour import ( filter_contours_area_of_image, @@ -2188,18 +2191,18 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + ###prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - ##prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, model_textline, n_batch_inference=3) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + ##prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - ###prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, self.model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, self.model_textline, n_batch_inference=3) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) @@ -2479,17 +2482,17 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - ##prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) + prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) + ###prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - ##prediction_regions_page = self.do_prediction_new_concept_scatter_nd(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_page = self.do_prediction_new_concept_scatter_nd(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + ##prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) - ###prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) + ###prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) + prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: @@ -5610,3 +5613,394 @@ class Eynollah: if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) print("all Job done in %.1fs", time.time() - t0_tot) + + +class Eynollah_ocr: + def __init__( + self, + dir_models, + dir_xmls=None, + dir_in=None, + dir_out=None, + tr_ocr=False, + logger=None, + ): + self.dir_in = dir_in + self.dir_out = dir_out + self.dir_xmls = dir_xmls + self.dir_models = dir_models + self.tr_ocr = tr_ocr + if tr_ocr: + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" + self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + self.model_ocr.to(self.device) + + else: + self.model_ocr_dir = dir_models + "/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + model_ocr = load_model(self.model_ocr_dir , compile=False) + + self.prediction_model = tf.keras.models.Model( + model_ocr.get_layer(name = "image").input, + model_ocr.get_layer(name = "dense2").output) + + + with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: + characters = json.load(config_file) + + + AUTOTUNE = tf.data.AUTOTUNE + + # Mapping characters to integers. + char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) + + # Mapping integers back to original characters. + self.num_to_char = StringLookup( + vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True + ) + + def decode_batch_predictions(self, pred, max_len = 128): + # input_len is the product of the batch size and the + # number of time steps. + input_len = np.ones(pred.shape[0]) * pred.shape[1] + + # Decode CTC predictions using greedy search. + # decoded is a tuple with 2 elements. + decoded = tf.keras.backend.ctc_decode(pred, + input_length = input_len, + beam_width = 100) + # The outputs are in the first element of the tuple. + # Additionally, the first element is actually a list, + # therefore we take the first element of that list as well. + #print(decoded,'decoded') + decoded = decoded[0][0][:, :max_len] + + #print(decoded, decoded.shape,'decoded') + + output = [] + for d in decoded: + # Convert the predicted indices to the corresponding chars. + d = tf.strings.reduce_join(self.num_to_char(d)) + d = d.numpy().decode("utf-8") + output.append(d) + return output + + + def distortion_free_resize(self, image, img_size): + w, h = img_size + image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True) + + # Check tha amount of padding needed to be done. + pad_height = h - tf.shape(image)[0] + pad_width = w - tf.shape(image)[1] + + # Only necessary if you want to do same amount of padding on both sides. + if pad_height % 2 != 0: + height = pad_height // 2 + pad_height_top = height + 1 + pad_height_bottom = height + else: + pad_height_top = pad_height_bottom = pad_height // 2 + + if pad_width % 2 != 0: + width = pad_width // 2 + pad_width_left = width + 1 + pad_width_right = width + else: + pad_width_left = pad_width_right = pad_width // 2 + + image = tf.pad( + image, + paddings=[ + [pad_height_top, pad_height_bottom], + [pad_width_left, pad_width_right], + [0, 0], + ], + ) + + image = tf.transpose(image, (1, 0, 2)) + image = tf.image.flip_left_right(image) + return image + + def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.06*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_max = np.argmax(sum_smoothed[peaks_real]) + + peaks_final = peaks_real[arg_max] + + return peaks_final + else: + return None + + def return_textlines_split_if_needed(self, textline_image): + + split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image) + if split_point: + image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) + return [image1, image2] + else: + return None + + def run(self): + ls_imgs = os.listdir(self.dir_in) + + if self.tr_ocr: + b_s = 2 + for ind_img in ls_imgs: + t0 = time.time() + file_name = ind_img.split('.')[0] + dir_img = os.path.join(self.dir_in, ind_img) + dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') + out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + img = cv2.imread(dir_img) + + ##file_name = Path(dir_xmls).stem + tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding = 'iso-8859-5')) + root1=tree1.getroot() + alltags=[elem.tag for elem in root1.iter()] + link=alltags[0].split('}')[0]+'}' + + name_space = alltags[0].split('}')[0] + name_space = name_space.split('{')[1] + + region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) + + + + cropped_lines = [] + cropped_lines_region_indexer = [] + cropped_lines_meging_indexing = [] + + indexer_text_region = 0 + for nn in root1.iter(region_tags): + for child_textregion in nn: + if child_textregion.tag.endswith("TextLine"): + + for child_textlines in child_textregion: + if child_textlines.tag.endswith("Coords"): + cropped_lines_region_indexer.append(indexer_text_region) + p_h=child_textlines.attrib['points'].split(' ') + textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + x,y,w,h = cv2.boundingRect(textline_coords) + + h2w_ratio = h/float(w) + + img_poly_on_img = np.copy(img) + mask_poly = np.zeros(img.shape) + mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1)) + + mask_poly = mask_poly[y:y+h, x:x+w, :] + img_crop = img_poly_on_img[y:y+h, x:x+w, :] + img_crop[mask_poly==0] = 255 + + if h2w_ratio > 0.05: + cropped_lines.append(img_crop) + cropped_lines_meging_indexing.append(0) + else: + splited_images = self.return_textlines_split_if_needed(img_crop) + #print(splited_images) + if splited_images: + cropped_lines.append(splited_images[0]) + cropped_lines_meging_indexing.append(1) + cropped_lines.append(splited_images[1]) + cropped_lines_meging_indexing.append(-1) + else: + cropped_lines.append(img_crop) + cropped_lines_meging_indexing.append(0) + indexer_text_region = indexer_text_region +1 + + + extracted_texts = [] + n_iterations = math.ceil(len(cropped_lines) / b_s) + + for i in range(n_iterations): + if i==(n_iterations-1): + n_start = i*b_s + imgs = cropped_lines[n_start:] + else: + n_start = i*b_s + n_end = (i+1)*b_s + imgs = cropped_lines[n_start:n_end] + pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values + generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + extracted_texts = extracted_texts + generated_text_merged + + extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + + extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] + #print(extracted_texts_merged, len(extracted_texts_merged)) + + unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) + + #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') + text_by_textregion = [] + for ind in unique_cropped_lines_region_indexer: + extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + + text_by_textregion.append(" ".join(extracted_texts_merged_un)) + + #print(len(text_by_textregion) , indexer_text_region, "text_by_textregion") + + + #print(time.time() - t0 ,'elapsed time') + + + indexer = 0 + indexer_textregion = 0 + for nn in root1.iter(region_tags): + text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') + unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') + + + has_textline = False + for child_textregion in nn: + if child_textregion.tag.endswith("TextLine"): + text_subelement = ET.SubElement(child_textregion, 'TextEquiv') + unicode_textline = ET.SubElement(text_subelement, 'Unicode') + unicode_textline.text = extracted_texts_merged[indexer] + indexer = indexer + 1 + has_textline = True + if has_textline: + unicode_textregion.text = text_by_textregion[indexer_textregion] + indexer_textregion = indexer_textregion + 1 + + + + ET.register_namespace("",name_space) + tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) + #print("Job done in %.1fs", time.time() - t0) + else: + max_len = 512 + padding_token = 299 + image_width = max_len * 4 + image_height = 32 + b_s = 8 + + + img_size=(image_width, image_height) + + for ind_img in ls_imgs: + t0 = time.time() + file_name = ind_img.split('.')[0] + dir_img = os.path.join(self.dir_in, ind_img) + dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') + out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + img = cv2.imread(dir_img) + + tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding = 'iso-8859-5')) + root1=tree1.getroot() + alltags=[elem.tag for elem in root1.iter()] + link=alltags[0].split('}')[0]+'}' + + name_space = alltags[0].split('}')[0] + name_space = name_space.split('{')[1] + + region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) + + cropped_lines = [] + cropped_lines_region_indexer = [] + cropped_lines_meging_indexing = [] + + tinl = time.time() + indexer_text_region = 0 + for nn in root1.iter(region_tags): + for child_textregion in nn: + if child_textregion.tag.endswith("TextLine"): + + for child_textlines in child_textregion: + if child_textlines.tag.endswith("Coords"): + cropped_lines_region_indexer.append(indexer_text_region) + p_h=child_textlines.attrib['points'].split(' ') + textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + x,y,w,h = cv2.boundingRect(textline_coords) + + h2w_ratio = h/float(w) + + img_poly_on_img = np.copy(img) + mask_poly = np.zeros(img.shape) + mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1)) + + mask_poly = mask_poly[y:y+h, x:x+w, :] + img_crop = img_poly_on_img[y:y+h, x:x+w, :] + img_crop[mask_poly==0] = 255 + img_crop = tf.reverse(img_crop,axis=[-1]) + img_crop = self.distortion_free_resize(img_crop, img_size) + img_crop = tf.cast(img_crop, tf.float32) / 255.0 + cropped_lines.append(img_crop) + + indexer_text_region = indexer_text_region +1 + + + extracted_texts = [] + + n_iterations = math.ceil(len(cropped_lines) / b_s) + + for i in range(n_iterations): + if i==(n_iterations-1): + n_start = i*b_s + imgs = cropped_lines[n_start:] + imgs = np.array(imgs) + imgs = imgs.reshape(imgs.shape[0], image_width, image_height, 3) + else: + n_start = i*b_s + n_end = (i+1)*b_s + imgs = cropped_lines[n_start:n_end] + imgs = np.array(imgs).reshape(b_s, image_width, image_height, 3) + + + preds = self.prediction_model.predict(imgs, verbose=0) + pred_texts = self.decode_batch_predictions(preds) + + for ib in range(imgs.shape[0]): + pred_texts_ib = pred_texts[ib].strip("[UNK]") + extracted_texts.append(pred_texts_ib) + + unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) + + text_by_textregion = [] + for ind in unique_cropped_lines_region_indexer: + extracted_texts_merged_un = np.array(extracted_texts)[np.array(cropped_lines_region_indexer)==ind] + + text_by_textregion.append(" ".join(extracted_texts_merged_un)) + + indexer = 0 + indexer_textregion = 0 + for nn in root1.iter(region_tags): + text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') + unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') + + + has_textline = False + for child_textregion in nn: + if child_textregion.tag.endswith("TextLine"): + text_subelement = ET.SubElement(child_textregion, 'TextEquiv') + unicode_textline = ET.SubElement(text_subelement, 'Unicode') + unicode_textline.text = extracted_texts[indexer] + indexer = indexer + 1 + has_textline = True + if has_textline: + unicode_textregion.text = text_by_textregion[indexer_textregion] + indexer_textregion = indexer_textregion + 1 + + ET.register_namespace("",name_space) + tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) + #print("Job done in %.1fs", time.time() - t0) From 01376af9055440366fb7effece949e903a7de710 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 22 Dec 2024 13:10:05 +0000 Subject: [PATCH 098/107] do_order_of_regions_with_model: simplify --- src/eynollah/eynollah.py | 298 ++++++++------------------------------- 1 file changed, 59 insertions(+), 239 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 8139b11..651bd17 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -2771,6 +2771,7 @@ class Eynollah: for ijv in range(len(y_min_tab_col1)): image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table return image_revised_last + def do_order_of_regions(self, *args, **kwargs): if self.full_layout: return self.do_order_of_regions_full_layout(*args, **kwargs) @@ -3380,171 +3381,35 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model + def do_order_of_regions_with_model(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] - - img_poly = np.zeros((y_len,x_len), dtype='uint8') - - unique_pix = np.unique(text_regions_p) - + img_poly = np.zeros((y_len,x_len), dtype='uint8') img_poly[text_regions_p[:,:]==1] = 1 img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - if not self.dir_in: - self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) - - height1 =672#448 - width1 = 448#224 - - height2 =672#448 - width2= 448#224 - - height3 =672#448 - width3 = 448#224 - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - if contours_only_text_parent_h: _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) for j in range(len(cy_main)): img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - + co_text_all = contours_only_text_parent + contours_only_text_parent_h else: co_text_all = contours_only_text_parent + if not len(co_text_all): + return [], [] - labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + labels_con = np.zeros((y_len, x_len, len(co_text_all)), dtype=bool) for i in range(len(co_text_all)): - img_label = np.zeros((y_len,x_len,3),dtype='uint8') - img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) - labels_con[:,:,i] = img_label[:,:,0] - - - img3= np.copy(img_poly) - - labels_con = resize_image(labels_con, height1, width1) - - img_header_and_sep = resize_image(img_header_and_sep, height1, width1) - - img3= resize_image (img3, height3, width3) - - img3 = img3.astype(np.uint16) - - - order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 - inference_bs = 6 - tot_counter = 1 - batch_counter = 0 - i_indexer = [] - j_indexer =[] - - input_1= np.zeros( (inference_bs, height1, width1,3)) - - tot_iteration = int( ( labels_con.shape[2]*(labels_con.shape[2]-1) )/2. ) - full_bs_ite= tot_iteration//inference_bs - last_bs = tot_iteration % inference_bs - - #print(labels_con.shape[2],"number of regions for reading order") - for i in range(labels_con.shape[2]): - for j in range(labels_con.shape[2]): - if j>i: - img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) - img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) - - img2[:,:,0][img3[:,:]==5] = 2 - img2[:,:,0][img_header_and_sep[:,:]==1] = 3 - - img1[:,:,0][img3[:,:]==5] = 2 - img1[:,:,0][img_header_and_sep[:,:]==1] = 3 - - - i_indexer.append(i) - j_indexer.append(j) - - input_1[batch_counter,:,:,0] = img1[:,:,0]/3. - input_1[batch_counter,:,:,2] = img2[:,:,0]/3. - input_1[batch_counter,:,:,1] = img3[:,:]/5. - - batch_counter = batch_counter+1 - - if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr = self.model_reading_order.predict(input_1 , verbose=0) - - if batch_counter==inference_bs: - iteration_batches = inference_bs - else: - iteration_batches = last_bs - for jb in range(iteration_batches): - if y_pr[jb][0]>=0.5: - order_class = 1 - else: - order_class = 0 - - order_matrix[i_indexer[jb],j_indexer[jb]] = y_pr[jb][0]#order_class - order_matrix[j_indexer[jb],i_indexer[jb]] = 1-y_pr[jb][0]#int( 1 - order_class) - - batch_counter = 0 - - i_indexer = [] - j_indexer = [] - tot_counter = tot_counter+1 - - - sum_mat = np.sum(order_matrix, axis=1) - index_sort = np.argsort(sum_mat) - index_sort = index_sort[::-1] - - REGION_ID_TEMPLATE = 'region_%04d' - order_of_texts = [] - id_of_texts = [] - for order, id_text in enumerate(index_sort): - order_of_texts.append(id_text) - id_of_texts.append( REGION_ID_TEMPLATE % order ) - - - return order_of_texts, id_of_texts - - def update_list_and_return_first_with_length_bigger_than_one(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): - list_inp.pop(index_element_to_be_updated) - if len(pr_list)>0: - list_inp.insert(index_element_to_be_updated, pr_list) - else: - index_element_to_be_updated = index_element_to_be_updated -1 - - list_inp.insert(index_element_to_be_updated+1, [innner_index_pr_pos]) - if len(pos_list)>0: - list_inp.insert(index_element_to_be_updated+2, pos_list) - - len_all_elements = [len(i) for i in list_inp] - list_len_bigger_1 = np.where(np.array(len_all_elements)>1) - list_len_bigger_1 = list_len_bigger_1[0] - - if len(list_len_bigger_1)>0: - early_list_bigger_than_one = list_len_bigger_1[0] - else: - early_list_bigger_than_one = -20 - return list_inp, early_list_bigger_than_one - def do_order_of_regions_with_model_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): - y_len = text_regions_p.shape[0] - x_len = text_regions_p.shape[1] - - img_poly = np.zeros((y_len,x_len), dtype='uint8') - - unique_pix = np.unique(text_regions_p) - - - img_poly[text_regions_p[:,:]==1] = 1 - img_poly[text_regions_p[:,:]==2] = 2 - img_poly[text_regions_p[:,:]==3] = 4 - img_poly[text_regions_p[:,:]==6] = 5 - - if not self.dir_in: - self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) + img = labels_con[:,:,i].astype(np.uint8) + cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,)) + labels_con[:,:,i] = img height1 =672#448 width1 = 448#224 @@ -3554,112 +3419,67 @@ class Eynollah: height3 =672#448 width3 = 448#224 - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - if contours_only_text_parent_h: - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h - else: - co_text_all = contours_only_text_parent - - - labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') - for i in range(len(co_text_all)): - img_label = np.zeros((y_len,x_len,3),dtype='uint8') - img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) - labels_con[:,:,i] = img_label[:,:,0] - - - img3= np.copy(img_poly) - - labels_con = resize_image(labels_con, height1, width1) + labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool) img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + img_poly = resize_image(img_poly, height3, width3) - img3= resize_image (img3, height3, width3) + if not self.dir_in: + self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir) - img3 = img3.astype(np.uint16) - inference_bs = 3 - input_1= np.zeros( (inference_bs, height1, width1,3)) - starting_list_of_regions = [] - if len(co_text_all)<=1: - starting_list_of_regions.append( list(range(1)) ) - else: - starting_list_of_regions.append( list(range(labels_con.shape[2])) ) + input_1 = np.zeros((inference_bs, height1, width1, 3)) + ordered = [list(range(len(co_text_all)))] index_update = 0 - index_selected = starting_list_of_regions[0] #print(labels_con.shape[2],"number of regions for reading order") while index_update>=0: - ij_list = starting_list_of_regions[index_update] - i = ij_list[0] - ij_list.pop(0) - - pr_list = [] + ij_list = ordered.pop(index_update) + i = ij_list.pop(0) + + ante_list = [] post_list = [] - - batch_counter = 0 - tot_counter = 1 - - tot_iteration = len(ij_list) - full_bs_ite= tot_iteration//inference_bs - last_bs = tot_iteration % inference_bs - - jbatch_indexer =[] + tot_counter = 0 + batch = [] for j in ij_list: - img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) - img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) - - img2[:,:,0][img3[:,:]==5] = 2 - img2[:,:,0][img_header_and_sep[:,:]==1] = 3 - - img1[:,:,0][img3[:,:]==5] = 2 - img1[:,:,0][img_header_and_sep[:,:]==1] = 3 - - jbatch_indexer.append(j) - - input_1[batch_counter,:,:,0] = img1[:,:,0]/3. - input_1[batch_counter,:,:,2] = img2[:,:,0]/3. - input_1[batch_counter,:,:,1] = img3[:,:]/5. - - batch_counter = batch_counter+1 - - if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + img1 = labels_con[:,:,i].astype(float) + img2 = labels_con[:,:,j].astype(float) + img1[img_poly==5] = 2 + img2[img_poly==5] = 2 + img1[img_header_and_sep==1] = 3 + img2[img_header_and_sep==1] = 3 + + input_1[len(batch), :, :, 0] = img1 / 3. + input_1[len(batch), :, :, 2] = img2 / 3. + input_1[len(batch), :, :, 1] = img_poly / 5. + + tot_counter += 1 + batch.append(j) + if tot_counter % inference_bs == 0 or tot_counter == len(ij_list): y_pr = self.model_reading_order.predict(input_1 , verbose=0) - - if batch_counter==inference_bs: - iteration_batches = inference_bs - else: - iteration_batches = last_bs - for jb in range(iteration_batches): + for jb, j in enumerate(batch): if y_pr[jb][0]>=0.5: - post_list.append(jbatch_indexer[jb]) + post_list.append(j) else: - pr_list.append(jbatch_indexer[jb]) - - batch_counter = 0 - jbatch_indexer = [] - - tot_counter = tot_counter+1 - - starting_list_of_regions, index_update = self.update_list_and_return_first_with_length_bigger_than_one(index_update, i, pr_list, post_list,starting_list_of_regions) + ante_list.append(j) + batch = [] + + if len(ante_list): + ordered.insert(index_update, ante_list) + index_update += 1 + ordered.insert(index_update, [i]) + if len(post_list): + ordered.insert(index_update + 1, post_list) + + index_update = -1 + for index_next, ij_list in enumerate(ordered): + if len(ij_list) > 1: + index_update = index_next + break + + ordered = [i[0] for i in ordered] + region_ids = ['region_%04d' % i for i in range(len(co_text_all))] + return ordered, region_ids - index_sort = [i[0] for i in starting_list_of_regions ] - - REGION_ID_TEMPLATE = 'region_%04d' - order_of_texts = [] - id_of_texts = [] - for order, id_text in enumerate(index_sort): - order_of_texts.append(id_text) - id_of_texts.append( REGION_ID_TEMPLATE % order ) - - - return order_of_texts, id_of_texts def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] @@ -4980,7 +4800,7 @@ class Eynollah: if self.full_layout: if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) @@ -5007,7 +4827,7 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) From cfc65128b1d0bbf8cc4b0e66c2ba17b4b0729f90 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 22 Dec 2024 14:56:32 +0000 Subject: [PATCH 099/107] reduce redundancy/indentation --- src/eynollah/eynollah.py | 816 +++++++++++++++++++-------------------- 1 file changed, 406 insertions(+), 410 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 651bd17..c0603fc 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4397,10 +4397,9 @@ class Eynollah: self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) continue + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - text_regions_p_1 ,erosion_hurts, polygons_lines_xml,polygons_of_images,image_page, page_coord, cont_page = self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [], ocr_all_textlines) @@ -4413,9 +4412,8 @@ class Eynollah: continue else: return pcgts + if self.skip_layout_and_reading_order: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) @@ -4454,463 +4452,461 @@ class Eynollah: continue else: return pcgts - if not self.extract_only_images and not self.skip_layout_and_reading_order: - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) - t1 = time.time() - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) - - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) - else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - #plt.imshow(table_prediction) - #plt.show() - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + #print("text region early -1 in %.1fs", time.time() - t0) + t1 = time.time() + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) - t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - elif num_col_classifier in (1,2): - org_h_l_m = textline_mask_tot_ea.shape[0] - org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: - img_w_new = 2000 + img_w_new = 1000 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 2400 + img_w_new = 1300 img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - ## birdan sora chock chakir + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ - self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + #plt.imshow(table_prediction) + #plt.show() + + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ - self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) - if self.light_version: - drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + return pcgts + + #print("text region early in %.1fs", time.time() - t0) + t1 = time.time() + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) + elif num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + else: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + + + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - if not len(contours_only_text_parent): - # stop early - empty_marginals = [[]] * len(polygons_of_marginals) - if self.full_layout: - pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) - else: - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) - self.logger.info("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) - #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + if not len(contours_only_text_parent): + # stop early + empty_marginals = [[]] * len(polygons_of_marginals) + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - ## birdan sora chock chakir - if not self.curved_line: - if self.light_version: - if self.textline_light: - #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts - else: - textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + #print("text region early 3 in %.1fs", time.time() - t0) + if self.light_version: + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + else: + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir + if not self.curved_line: + if self.light_version: + if self.textline_light: + #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: - scale_param = 1 - textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: - #takes long timee - contours_only_text_parent_d_ordered = None - if self.light_version: - fun = check_any_text_region_in_model_one_is_main_or_header_light - else: - fun = check_any_text_region_in_model_one_is_main_or_header - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ - all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ - fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, - all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - - else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + else: + scale_param = 1 + textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ + self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ + self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + fun = check_any_text_region_in_model_one_is_main_or_header_light + else: + fun = check_any_text_region_in_model_one_is_main_or_header + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ + all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ + fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + pixel_lines = 6 + + if not self.reading_order_machine_based: + if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() - - if self.full_layout: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - if self.ocr: - ocr_all_textlines = [] + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs", time.time() - t0) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() + + if self.full_layout: + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - return pcgts + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + if self.ocr: + ocr_all_textlines = [] else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + + else: + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - - if self.ocr: - - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + if self.ocr: - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - ocr_textline_in_textregion.append(text_ocr) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - #print("Job done in %.1fs" % (time.time() - t0)) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + ocr_textline_in_textregion.append(text_ocr) - if self.dir_in: - self.writer.write_pagexml(pcgts) - self.logger.info("Job done in %.1fs", time.time() - t0) + + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) #print("Job done in %.1fs" % (time.time() - t0)) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) + + if self.dir_in: + self.writer.write_pagexml(pcgts) + self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs" % (time.time() - t0)) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From 335aa273a1c71587c42a55858730cebb5b82c55e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 23 Dec 2024 03:13:21 +0000 Subject: [PATCH 100/107] simplify, wrap extremely long lines --- src/eynollah/eynollah.py | 1971 +++++++++++++------------- src/eynollah/utils/__init__.py | 1357 ++++++++---------- src/eynollah/utils/contour.py | 156 +- src/eynollah/utils/separate_lines.py | 306 ++-- 4 files changed, 1799 insertions(+), 1991 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index c0603fc..25d5ec4 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -6,6 +6,7 @@ document layout analysis (segmentation) with output in PAGE-XML """ +import tracemalloc import math import os import sys @@ -266,20 +267,45 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + #"/modelens_full_lay_1_3_031124" + #"/modelens_full_lay_13__3_19_241024" + #"/model_full_lay_13_241024" + #"/modelens_full_lay_13_17_231024" + #"/modelens_full_lay_1_2_221024" + #"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + #"/modelens_12sp_elay_0_3_4__3_6_n" + #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" + #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" + #"/modelens_1_2_4_5_early_lay_1_2_spaltige" + #"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + #"/modelens_full_lay_1_3_031124" + #"/modelens_full_lay_13__3_19_241024" + #"/model_full_lay_13_241024" + #"/modelens_full_lay_13_17_231024" + #"/modelens_full_lay_1_2_221024" + #"/modelens_full_layout_24_till_28" + #"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124" if self.textline_light: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + #"/modelens_textline_1_4_16092024" + #"/model_textline_ens_3_4_5_6_artificial" + #"/modelens_textline_1_3_4_20240915" + #"/model_textline_ens_3_4_5_6_artificial" + #"/modelens_textline_9_12_13_14_15" + #"/eynollah-textline_light_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" else: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + #"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" if self.ocr: self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" @@ -320,13 +346,13 @@ class Eynollah: if self.ocr: self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") if self.tables: self.model_table = self.our_load_model(self.model_table_dir) self.ls_imgs = os.listdir(self.dir_in) - - + def _cache_images(self, image_filename=None, image_pil=None): ret = {} t_c0 = time.time() @@ -346,6 +372,7 @@ class Eynollah: for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret + def reset_file_name_dir(self, image_filename): t_c = time.time() self._imgs = self._cache_images(image_filename=image_filename) @@ -378,31 +405,27 @@ class Eynollah: def isNaN(self, num): return num != num - def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") if not self.dir_in: self.model_enhancement, _ = self.start_new_session_and_model(self.model_dir_of_enhancement) - img_height_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[1] - img_width_model = self.model_enhancement.layers[len(self.model_enhancement.layers) - 1].output_shape[2] + img_height_model = self.model_enhancement.layers[-1].output_shape[1] + img_width_model = self.model_enhancement.layers[-1].output_shape[2] if img.shape[0] < img_height_model: img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) - if img.shape[1] < img_width_model: img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST) margin = int(0 * img_width_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin - img = img / float(255.0) - + img = img / 255. img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) - nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) @@ -430,37 +453,53 @@ class Eynollah: img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) - - seg = label_p_pred[0, :, :, :] - seg = seg * 255 + seg = label_p_pred[0, :, :, :] * 255 if i == 0 and j == 0: - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg + prediction_true[index_y_d + 0:index_y_u - margin, + index_x_d + 0:index_x_u - margin] = \ + seg[0:-margin or None, + 0:-margin or None] elif i == nxf - 1 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg + prediction_true[index_y_d + margin:index_y_u - 0, + index_x_d + margin:index_x_u - 0] = \ + seg[margin:, + margin:] elif i == 0 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - 0, + index_x_d + 0:index_x_u - margin] = \ + seg[margin:, + 0:-margin or None] elif i == nxf - 1 and j == 0: - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg + prediction_true[index_y_d + 0:index_y_u - margin, + index_x_d + margin:index_x_u - 0] = \ + seg[0:-margin or None, + margin:] elif i == 0 and j != 0 and j != nyf - 1: - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - margin, + index_x_d + 0:index_x_u - margin] = \ + seg[margin:-margin or None, + 0:-margin or None] elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg + prediction_true[index_y_d + margin:index_y_u - margin, + index_x_d + margin:index_x_u - 0] = \ + seg[margin:-margin or None, + margin:] elif i != 0 and i != nxf - 1 and j == 0: - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg + prediction_true[index_y_d + 0:index_y_u - margin, + index_x_d + margin:index_x_u - margin] = \ + seg[0:-margin or None, + margin:-margin or None] elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - 0, + index_x_d + margin:index_x_u - margin] = \ + seg[margin:, + margin:-margin or None] else: - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg + prediction_true[index_y_d + margin:index_y_u - margin, + index_x_d + margin:index_x_u - margin] = \ + seg[margin:-margin or None, + margin:-margin or None] prediction_true = prediction_true.astype(int) return prediction_true @@ -469,55 +508,39 @@ class Eynollah: self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1 and width_early < 1100: img_w_new = 2000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 2500: img_w_new = 2000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 1100 and width_early < 2500: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 2 and width_early < 2000: img_w_new = 2400 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 3500: img_w_new = 2400 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 2000 and width_early < 3500: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 3 and width_early < 2000: img_w_new = 3000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 4000: img_w_new = 3000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 2000 and width_early < 4000: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 4 and width_early < 2500: img_w_new = 4000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 5000: img_w_new = 4000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 2500 and width_early < 5000: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 5 and width_early < 3700: img_w_new = 5000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 7000: img_w_new = 5000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 3700 and width_early < 7000: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 6 and width_early < 4500: img_w_new = 6500 # 5400 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 6500) else: img_w_new = width_early - img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) + img_h_new = img_w_new * img.shape[0] // img.shape[1] if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) @@ -536,10 +559,9 @@ class Eynollah: self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: img_w_new = 1000 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 1000) else: img_w_new = 1300 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) + img_h_new = img_w_new * img.shape[0] // img.shape[1] if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) @@ -568,7 +590,7 @@ class Eynollah: img_w_new = 2200 elif num_col == 6: img_w_new = 2500 - img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) + img_h_new = img_w_new * img.shape[0] // img.shape[1] img_new = resize_image(img, img_h_new, img_w_new) num_column_is_classified = True @@ -601,7 +623,6 @@ class Eynollah: # plt.imshow(img_1ch) # plt.show() img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) @@ -610,11 +631,9 @@ class Eynollah: img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = self.model_classifier.predict(img_in, verbose=0) - num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) - img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if img_new.shape[1] > img.shape[1]: @@ -623,7 +642,7 @@ class Eynollah: return img, img_new, is_image_enhanced - def resize_and_enhance_image_with_column_classifier(self,light_version): + def resize_and_enhance_image_with_column_classifier(self, light_version): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") dpi = self.dpi self.logger.info("Detected %s DPI", dpi) @@ -633,16 +652,10 @@ class Eynollah: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) - - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - prediction_bin = prediction_bin.astype(np.uint8) + prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) + img_bin = prediction_bin else: img = self.imread() img_bin = None @@ -663,8 +676,7 @@ class Eynollah: elif self.num_col_lower and not self.num_col_upper: num_col = self.num_col_lower label_p_pred = [np.ones(6)] - - elif (not self.num_col_upper and not self.num_col_lower): + elif not self.num_col_upper and not self.num_col_lower: if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 @@ -682,7 +694,6 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): @@ -703,7 +714,6 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 @@ -713,20 +723,19 @@ class Eynollah: if num_col < self.num_col_lower: num_col = self.num_col_lower label_p_pred = [np.ones(6)] - else: num_col = self.num_col_upper label_p_pred = [np.ones(6)] - self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) - if not self.extract_only_images: if dpi < DPI_THRESHOLD: if light_version and num_col in (1,2): - img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( + img, num_col, width_early, label_p_pred) else: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + img_new, num_column_is_classified = self.calculate_width_height_by_columns( + img, num_col, width_early, label_p_pred) if light_version: image_res = np.copy(img_new) else: @@ -734,7 +743,8 @@ class Eynollah: is_image_enhanced = True else: if light_version and num_col in (1,2): - img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( + img, num_col, width_early, label_p_pred) image_res = np.copy(img_new) is_image_enhanced = True else: @@ -809,7 +819,6 @@ class Eynollah: return model, session - def start_new_session_and_model(self, model_dir): self.logger.debug("enter start_new_session_and_model (model_dir=%s)", model_dir) #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) @@ -830,17 +839,20 @@ class Eynollah: else: try: model = load_model(model_dir, compile=False) - self.models[model_dir] = model except: - model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) - self.models[model_dir] = model - + model = load_model(model_dir , compile=False, custom_objects={ + "PatchEncoder": PatchEncoder, "Patches": Patches}) + self.models[model_dir] = model return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction") + def do_prediction( + self, patches, img, model, + n_batch_inference=1, marginal_of_patch_percent=0.1, + thresholding_for_some_classes_in_light_version=False, + thresholding_for_artificial_class_in_light_version=False): + self.logger.debug("enter do_prediction") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] @@ -851,7 +863,6 @@ class Eynollah: img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img[np.newaxis], verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: @@ -862,13 +873,11 @@ class Eynollah: seg[seg_art==1]=2 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = resize_image(seg_color, img_h_page, img_w_page) - prediction_true = prediction_true.astype(np.uint8) + prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) - if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) @@ -876,7 +885,7 @@ class Eynollah: margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin - img = img / float(255.0) + img = img / 255. #img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] @@ -895,7 +904,6 @@ class Eynollah: list_y_d = [] batch_indexer = 0 - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): @@ -925,17 +933,14 @@ class Eynollah: list_y_d.append(index_y_d) list_y_u.append(index_y_u) - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 + batch_indexer += 1 if (batch_indexer == n_batch_inference or # last batch i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) - label_p_pred = model.predict(img_patch,verbose=0) - + label_p_pred = model.predict(img_patch, verbose=0) seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: @@ -964,8 +969,7 @@ class Eynollah: indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + seg_in = seg[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] @@ -974,34 +978,60 @@ class Eynollah: index_x_d_in = list_x_d[indexer_inside_batch] if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:, + margin:, + np.newaxis] elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[0:-margin or None, + margin:, + np.newaxis] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:-margin or None, + margin:, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + margin:-margin or None, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:, + margin:-margin or None, + np.newaxis] else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + margin:-margin or None, + np.newaxis] + indexer_inside_batch += 1 list_i_s = [] @@ -1012,15 +1042,14 @@ class Eynollah: list_y_d = [] batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + img_patch[:] = 0 prediction_true = prediction_true.astype(np.uint8) #del model gc.collect() return prediction_true - def do_padding_with_scale(self,img, scale): + def do_padding_with_scale(self, img, scale): h_n = int(img.shape[0]*scale) w_n = int(img.shape[1]*scale) @@ -1031,8 +1060,8 @@ class Eynollah: h_diff = img.shape[0] - h_n w_diff = img.shape[1] - w_n - h_start = int(h_diff / 2.) - w_start = int(w_diff / 2.) + h_start = int(0.5 * h_diff) + w_start = int(0.5 * w_diff) img_res = resize_image(img, h_n, w_n) #label_res = resize_image(label, h_n, w_n) @@ -1049,9 +1078,14 @@ class Eynollah: #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] return img_scaled_padded#, label_scaled_padded - def do_prediction_new_concept_scatter_nd(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction_new_concept") + def do_prediction_new_concept_scatter_nd( + self, patches, img, model, + n_batch_inference=1, marginal_of_patch_percent=0.1, + thresholding_for_some_classes_in_light_version=False, + thresholding_for_artificial_class_in_light_version=False): + + self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] @@ -1074,16 +1108,13 @@ class Eynollah: seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = resize_image(seg_color, img_h_page, img_w_page) - prediction_true = prediction_true.astype(np.uint8) + prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) - if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) @@ -1091,8 +1122,7 @@ class Eynollah: ##margin = int(marginal_of_patch_percent * img_height_model) #width_mid = img_width_model - 2 * margin #height_mid = img_height_model - 2 * margin - img = img / float(255.0) - + img = img / 255.0 img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] @@ -1101,61 +1131,61 @@ class Eynollah: stride_y = img_height_model - 100 one_tensor = tf.ones_like(img) - img_patches = tf.image.extract_patches(images=[img,one_tensor], - sizes=[1, img_height_model, img_width_model, 1], - strides=[1, stride_y, stride_x, 1], - rates=[1, 1, 1, 1], - padding='SAME') - - one_patches = img_patches[1] - img_patches = img_patches[0] + img_patches, one_patches = tf.image.extract_patches( + images=[img, one_tensor], + sizes=[1, img_height_model, img_width_model, 1], + strides=[1, stride_y, stride_x, 1], + rates=[1, 1, 1, 1], + padding='SAME') img_patches = tf.squeeze(img_patches) - - img_patches_resh = tf.reshape(img_patches, shape = (img_patches.shape[0]*img_patches.shape[1], img_height_model, img_width_model, 3)) - - pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) - one_patches = tf.squeeze(one_patches) - one_patches = tf.reshape(one_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model,img_width_model,3]) - + img_patches_resh = tf.reshape(img_patches, shape=(img_patches.shape[0] * img_patches.shape[1], + img_height_model, img_width_model, 3)) + pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) + one_patches = tf.reshape(one_patches, shape=(img_patches.shape[0] * img_patches.shape[1], + img_height_model, img_width_model, 3)) x = tf.range(img.shape[1]) y = tf.range(img.shape[0]) x, y = tf.meshgrid(x, y) indices = tf.stack([y, x], axis=-1) - indices_patches = tf.image.extract_patches(images=tf.expand_dims(indices, axis=0), sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') + indices_patches = tf.image.extract_patches( + images=tf.expand_dims(indices, axis=0), + sizes=[1, img_height_model, img_width_model, 1], + strides=[1, stride_y, stride_x, 1], + rates=[1, 1, 1, 1], + padding='SAME') indices_patches = tf.squeeze(indices_patches) - indices_patches = tf.reshape(indices_patches, [img_patches.shape[0]*img_patches.shape[1],img_height_model, img_width_model,2]) - - margin_y = int( (img_height_model - stride_y)/2. ) - margin_x = int( (img_width_model - stride_x)/2. ) + indices_patches = tf.reshape(indices_patches, shape=(img_patches.shape[0] * img_patches.shape[1], + img_height_model, img_width_model, 2)) + margin_y = int( 0.5 * (img_height_model - stride_y) ) + margin_x = int( 0.5 * (img_width_model - stride_x) ) mask_margin = np.zeros((img_height_model, img_width_model)) - - mask_margin[margin_y:img_height_model-margin_y, margin_x:img_width_model-margin_x] = 1 + mask_margin[margin_y:img_height_model - margin_y, + margin_x:img_width_model - margin_x] = 1 indices_patches_array = indices_patches.numpy() - for i in range(indices_patches_array.shape[0]): indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin - reconstructed = tf.scatter_nd(indices=indices_patches_array, updates=pred_patches, shape=(img.shape[0],img.shape[1],pred_patches.shape[-1])) - reconstructed_argmax = reconstructed.numpy() - - prediction_true = np.argmax(reconstructed_argmax, axis=2) - prediction_true = prediction_true.astype(np.uint8) + reconstructed = tf.scatter_nd( + indices=indices_patches_array, + updates=pred_patches, + shape=(img.shape[0], img.shape[1], pred_patches.shape[-1])).numpy() + prediction_true = np.argmax(reconstructed, axis=2).astype(np.uint8) gc.collect() return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2) - - - - - - def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): - self.logger.debug("enter do_prediction_new_concept") + def do_prediction_new_concept( + self, patches, img, model, + n_batch_inference=1, marginal_of_patch_percent=0.1, + thresholding_for_some_classes_in_light_version=False, + thresholding_for_artificial_class_in_light_version=False): + + self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] @@ -1178,16 +1208,13 @@ class Eynollah: seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = resize_image(seg_color, img_h_page, img_w_page) - prediction_true = prediction_true.astype(np.uint8) + prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) - if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) @@ -1195,7 +1222,7 @@ class Eynollah: margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin - img = img / float(255.0) + img = img / 255.0 img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] @@ -1215,7 +1242,6 @@ class Eynollah: batch_indexer = 0 img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) - for i in range(nxf): for j in range(nyf): if i == 0: @@ -1237,7 +1263,6 @@ class Eynollah: index_y_u = img_h index_y_d = img_h - img_height_model - list_i_s.append(i) list_j_s.append(j) list_x_u.append(index_x_u) @@ -1245,17 +1270,14 @@ class Eynollah: list_y_d.append(index_y_d) list_y_u.append(index_y_u) - - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - batch_indexer = batch_indexer + 1 + img_patch[batch_indexer] = img[index_y_d:index_y_u, index_x_d:index_x_u] + batch_indexer += 1 if (batch_indexer == n_batch_inference or # last batch i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch,verbose=0) - seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: @@ -1279,8 +1301,7 @@ class Eynollah: indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] - seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + seg_in = seg[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] @@ -1289,35 +1310,60 @@ class Eynollah: index_x_d_in = list_x_d[indexer_inside_batch] if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:, + margin:, + np.newaxis] elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[0:-margin or None, + margin:, + np.newaxis] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:-margin or None, + margin:, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + 0:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + margin:-margin or None, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + prediction_true[index_y_d_in + margin:index_y_u_in - 0, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:, + margin:-margin or None, + np.newaxis] else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - + prediction_true[index_y_d_in + margin:index_y_u_in - margin, + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + margin:-margin or None, + np.newaxis] + indexer_inside_batch += 1 list_i_s = [] list_j_s = [] @@ -1327,8 +1373,7 @@ class Eynollah: list_y_d = [] batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + img_patch[:] = 0 prediction_true = prediction_true.astype(np.uint8) gc.collect() @@ -1338,11 +1383,10 @@ class Eynollah: self.logger.debug("enter extract_page") cont_page = [] if not self.ignore_page_extraction: - img = cv2.GaussianBlur(self.image, (5, 5), 0) - if not self.dir_in: self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) - + + img = cv2.GaussianBlur(self.image, (5, 5), 0) img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -1350,7 +1394,8 @@ class Eynollah: contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(contours[j]) + for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) if x <= 30: @@ -1363,32 +1408,34 @@ class Eynollah: y = 0 if (self.image.shape[0] - (y + h)) <= 30: h = h + (self.image.shape[0] - (y + h)) - box = [x, y, w, h] else: box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - + cropped_page, page_coord = crop_image_inside_box(box, self.image) + cont_page.append(np.array([[page_coord[2], page_coord[0]], + [page_coord[3], page_coord[0]], + [page_coord[3], page_coord[1]], + [page_coord[2], page_coord[1]]])) self.logger.debug("exit extract_page") else: box = [0, 0, self.image.shape[1], self.image.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - return croped_page, page_coord, cont_page + cropped_page, page_coord = crop_image_inside_box(box, self.image) + cont_page.append(np.array([[page_coord[2], page_coord[0]], + [page_coord[3], page_coord[0]], + [page_coord[3], page_coord[1]], + [page_coord[2], page_coord[1]]])) + return cropped_page, page_coord, cont_page def early_page_for_num_of_column_classification(self,img_bin): if not self.ignore_page_extraction: self.logger.debug("enter early_page_for_num_of_column_classification") if self.input_binary: - img =np.copy(img_bin) - img = img.astype(np.uint8) + img = np.copy(img_bin).astype(np.uint8) else: img = self.imread() if not self.dir_in: self.model_page, _ = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) - img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) @@ -1396,20 +1443,20 @@ class Eynollah: thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(contours[j]) + for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] - x, y, w, h = cv2.boundingRect(cnt) - box = [x, y, w, h] + box = cv2.boundingRect(cnt) else: box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, img) + cropped_page, page_coord = crop_image_inside_box(box, img) self.logger.debug("exit early_page_for_num_of_column_classification") else: img = self.imread() box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, img) - return croped_page, page_coord + cropped_page, page_coord = crop_image_inside_box(box, img) + return cropped_page, page_coord def extract_text_regions_new(self, img, patches, cols): self.logger.debug("enter extract_text_regions") @@ -1420,84 +1467,33 @@ class Eynollah: self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) else: self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) - model_region = self.model_region_fl if patches else self.model_region_fl_np - if not patches: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - #img = img.astype(np.uint8) - prediction_regions2 = None - else: + if self.light_version: + pass + elif not patches: + img = otsu_copy_binary(img).astype(np.uint8) + prediction_regions = None + elif cols: + img = otsu_copy_binary(img).astype(np.uint8) if cols == 1: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - - img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) - img = img.astype(np.uint8) - - if cols == 2: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) - img = img.astype(np.uint8) - - if cols == 3: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) - img = img.astype(np.uint8) - - if cols == 4: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) - img = img.astype(np.uint8) - - if cols == 5: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) - img = img.astype(np.uint8) - - if cols >= 6: - if self.light_version: - pass - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) - img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000).astype(np.uint8) + elif cols == 2: + img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300).astype(np.uint8) + elif cols == 3: + img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600).astype(np.uint8) + elif cols == 4: + img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900).astype(np.uint8) + elif cols == 5: + img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200).astype(np.uint8) + else: + img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) - marginal_of_patch_percent = 0.1 - - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) - - - ##prediction_regions = self.do_prediction(False, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) - + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions - - + def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] @@ -1507,92 +1503,51 @@ class Eynollah: self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully) else: self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np) - model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: img = otsu_copy_binary(img) img = img.astype(np.uint8) prediction_regions2 = None - else: + elif cols: if cols == 1: - img2 = otsu_copy_binary(img) - img2 = img2.astype(np.uint8) - img2 = resize_image(img2, int(img_height_h * 0.7), int(img_width_h * 0.7)) - marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) - prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) - - if cols == 2: - img2 = otsu_copy_binary(img) - img2 = img2.astype(np.uint8) - img2 = resize_image(img2, int(img_height_h * 0.4), int(img_width_h * 0.4)) - marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) - prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) - - elif cols > 2: - img2 = otsu_copy_binary(img) - img2 = img2.astype(np.uint8) - img2 = resize_image(img2, int(img_height_h * 0.3), int(img_width_h * 0.3)) - marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) - prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) - - if cols == 2: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - if img_width_h >= 2000: - img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) - img = img.astype(np.uint8) - + img_height_new = int(img_height_h * 0.7) + img_width_new = int(img_width_h * 0.7) + elif cols == 2: + img_height_new = int(img_height_h * 0.4) + img_width_new = int(img_width_h * 0.4) + else: + img_height_new = int(img_height_h * 0.3) + img_width_new = int(img_width_h * 0.3) + img2 = otsu_copy_binary(img) + img2 = img2.astype(np.uint8) + img2 = resize_image(img2, img_height_new, img_width_new) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=0.1) + prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) + + img = otsu_copy_binary(img).astype(np.uint8) if cols == 1: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 0.5), int(img_width_h * 0.5)) - img = img.astype(np.uint8) - - if cols == 3: - if (self.scale_x == 1 and img_width_h > 3000) or (self.scale_x != 1 and img_width_h > 2800): - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img = resize_image(img, int(img_height_h * 2800 / float(img_width_h)), 2800) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - - if cols == 4: - if (self.scale_x == 1 and img_width_h > 4000) or (self.scale_x != 1 and img_width_h > 3700): - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 3700 / float(img_width_h)), 3700) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) - - if cols == 5: - if self.scale_x == 1 and img_width_h > 5000: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9) ) - - if cols >= 6: - if img_width_h > 5600: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 5600 / float(img_width_h)), 5600) - else: - img = otsu_copy_binary(img) - img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) + img = resize_image(img, int(img_height_h * 0.5), int(img_width_h * 0.5)).astype(np.uint8) + elif cols == 2 and img_width_h >= 2000: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) + elif cols == 3 and ((self.scale_x == 1 and img_width_h > 3000) or + (self.scale_x != 1 and img_width_h > 2800)): + img = resize_image(img, 2800 * img_height_h // img_width_h, 2800).astype(np.uint8) + elif cols == 4 and ((self.scale_x == 1 and img_width_h > 4000) or + (self.scale_x != 1 and img_width_h > 3700)): + img = resize_image(img, 3700 * img_height_h // img_width_h, 3700).astype(np.uint8) + elif cols == 4: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) + elif cols == 5 and self.scale_x == 1 and img_width_h > 5000: + img = resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)).astype(np.uint8) + elif cols == 5: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) + elif img_width_h > 5600: + img = resize_image(img, 5600 * img_height_h // img_width_h, 5600).astype(np.uint8) + else: + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) - marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 @@ -1600,9 +1555,8 @@ class Eynollah: def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) - - - M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] + M_main_tot = [cv2.moments(polygons_of_textlines[j]) + for j in range(len(polygons_of_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] @@ -1612,18 +1566,16 @@ class Eynollah: all_box_coord =[] for index, con_region_ind in enumerate(contours_par): - results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) for ind in args_textlines ] + results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) + for ind in args_textlines ] results = np.array(results) - indexes_in = args_textlines[results==1] - textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] all_found_textline_polygons.append(textlines_ins) slopes.append(slope_deskew) _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) - all_box_coord.append(crop_coor) return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes @@ -1690,32 +1642,29 @@ class Eynollah: img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction(use_patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, - thresholding_for_artificial_class_in_light_version=self.textline_light) + prediction_textline = self.do_prediction( + use_patches, img, self.model_textline, + marginal_of_patch_percent=0.15, n_batch_inference=3, + thresholding_for_artificial_class_in_light_version=self.textline_light) #if not self.textline_light: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) - textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 old_art = np.copy(textline_mask_tot_ea_art) - if not self.textline_light: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) - prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') - if not self.textline_light: textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 - if not self.textline_light: prediction_textline[:,:][old_art[:,:]==1]=2 @@ -1723,7 +1672,8 @@ class Eynollah: prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) self.logger.debug('exit textline_contours') - return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') + return ((prediction_textline[:, :, 0]==1).astype(np.uint8), + (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8)) def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): @@ -1752,7 +1702,8 @@ class Eynollah: slope_corresponding_textregion = slope_biggest slopes_sub.append(slope_corresponding_textregion) - cnt_clean_rot = textline_contours_postprocessing(crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) + cnt_clean_rot = textline_contours_postprocessing( + crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) poly_sub.append(cnt_clean_rot) boxes_sub_new.append(boxes_per_process[mv]) @@ -1782,55 +1733,41 @@ class Eynollah: elif num_col_classifier == 6: img_w_new = 2500 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) - img_resized = resize_image(img,img_h_new, img_w_new ) - - if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - image_page, page_coord, cont_page = self.extract_page() - prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - - prediction_regions_org=prediction_regions_org[:,:,0] mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - - + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1)) text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0 text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0 ##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001) polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001) - image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) ###image_boundary_of_doc[:6, :] = 1 @@ -1839,14 +1776,13 @@ class Eynollah: ###image_boundary_of_doc[:, :6] = 1 ###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1 - polygons_of_images_fin = [] for ploy_img_ind in polygons_of_images: """ test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) - test_poly_image = cv2.fillPoly(test_poly_image, pts = [ploy_img_ind], color=(1,1,1)) + test_poly_image = cv2.fillPoly(test_poly_image, pts=[ploy_img_ind], color=(1,1,1)) - test_poly_image = test_poly_image[:,:] + image_boundary_of_doc[:,:] + test_poly_image = test_poly_image + image_boundary_of_doc test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1 test_poly_image_intersected_area = test_poly_image_intersected_area.sum() @@ -1854,22 +1790,30 @@ class Eynollah: if test_poly_image_intersected_area==0: ##polygons_of_images_fin.append(ploy_img_ind) - x, y, w, h = cv2.boundingRect(ploy_img_ind) - box = [x, y, w, h] + box = cv2.boundingRect(ploy_img_ind) _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) - #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - - polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + # cont_page.append(np.array([[page_coord[2], page_coord[0]], + # [page_coord[3], page_coord[0]], + # [page_coord[3], page_coord[1]], + # [page_coord[2], page_coord[1]]])) + polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], + [page_coord_img[3], page_coord_img[0]], + [page_coord_img[3], page_coord_img[1]], + [page_coord_img[2], page_coord_img[1]]]) ) """ - x, y, w, h = cv2.boundingRect(ploy_img_ind) + box = x, y, w, h = cv2.boundingRect(ploy_img_ind) if h < 150 or w < 150: pass else: - box = [x, y, w, h] _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) - #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - - polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) + # cont_page.append(np.array([[page_coord[2], page_coord[0]], + # [page_coord[3], page_coord[0]], + # [page_coord[3], page_coord[1]], + # [page_coord[2], page_coord[1]]])) + polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], + [page_coord_img[3], page_coord_img[0]], + [page_coord_img[3], page_coord_img[1]], + [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page @@ -1883,34 +1827,24 @@ class Eynollah: img_width_h = img_org.shape[1] #model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) - #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: img_w_new = 1000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - elif num_col_classifier == 2: img_w_new = 1500#1500 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - elif num_col_classifier == 3: img_w_new = 2000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - elif num_col_classifier == 4: img_w_new = 2500 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: img_w_new = 3000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: img_w_new = 4000 - img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + img_h_new = img_w_new * img_org.shape[0] // img_org.shape[1] img_resized = resize_image(img,img_h_new, img_w_new ) t_bin = time.time() - #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) @@ -1935,12 +1869,8 @@ class Eynollah: if not self.dir_in: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + prediction_bin = 255 * (prediction_bin[:,:,0] == 0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = prediction_bin.astype(np.uint16) #img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) @@ -1951,11 +1881,8 @@ class Eynollah: ###textline_mask_tot_ea = self.run_textline(img_bin) self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) - - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - #print(self.image_org.shape) #cv2.imwrite('out_13.png', self.image_page_org_size) @@ -1979,7 +1906,9 @@ class Eynollah: prediction_regions_page = self.do_prediction_new_concept( False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + ys = slice(*self.page_coord[0:2]) + xs = slice(*self.page_coord[2:4]) + prediction_regions_org[ys, xs] = prediction_regions_page else: new_h = (900+ (num_col_classifier-3)*100) img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) @@ -1989,26 +1918,16 @@ class Eynollah: True, img_resized, self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - #print("inside 3 ", time.time()-t_in) - #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - img_bin = resize_image(img_bin,img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - mask_texts_only = mask_texts_only.astype('uint8') ##if num_col_classifier == 1 or num_col_classifier == 2: @@ -2016,57 +1935,39 @@ class Eynollah: ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - + test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) + #plt.imshow(test_khat[:,:]) #plt.show() - #for jv in range(1): #print(jv, hir_lines_xml[0][232][3]) #test_khat = np.zeros(prediction_regions_org.shape) - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) - - #plt.imshow(test_khat[:,:]) #plt.show() - - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - #plt.imshow(test_khat[:,:]) #plt.show() #sys.exit() polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) - - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) #plt.imshow(textline_mask_tot_ea) @@ -2107,14 +2008,10 @@ class Eynollah: mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1 ##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1 - img_only_regions_with_sep = ( prediction_regions_org_y[:,:] == 1 )*1 - img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + img_only_regions_with_sep = (prediction_regions_org_y == 1).astype(np.uint8) try: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20) - _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) - img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) prediction_regions_org = self.do_prediction(True, img, self.model_region) @@ -2122,8 +2019,7 @@ class Eynollah: prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 - - + if not self.dir_in: self.model_region_p2, _ = self.start_new_session_and_model(self.model_region_dir_p2) @@ -2132,30 +2028,23 @@ class Eynollah: prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) - mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) mask_lines2 = (prediction_regions_org2[:,:,0] == 3) text_sume_early = (prediction_regions_org[:,:] == 1).sum() prediction_regions_org_copy = np.copy(prediction_regions_org) prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0 text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum() - - rate_two_models = text_sume_second / float(text_sume_early) * 100 + rate_two_models = 100. * text_sume_second / text_sume_early self.logger.info("ratio_of_two_models: %s", rate_two_models) if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): prediction_regions_org = np.copy(prediction_regions_org_copy) - - prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 mask_lines_only=(prediction_regions_org[:,:]==3)*1 prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) - - prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) - - + if rate_two_models<=40: if self.input_binary: prediction_bin = np.copy(img_org) @@ -2164,19 +2053,14 @@ class Eynollah: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1 ratio_x=1 - img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) prediction_regions_org = self.do_prediction(True, img, self.model_region) @@ -2188,10 +2072,9 @@ class Eynollah: mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 - - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -2205,7 +2088,6 @@ class Eynollah: self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml except: - if self.input_binary: prediction_bin = np.copy(img_org) @@ -2213,14 +2095,8 @@ class Eynollah: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - prediction_bin=prediction_bin[:,:,0] - - prediction_bin = (prediction_bin[:,:]==0)*1 - - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) @@ -2248,49 +2124,53 @@ class Eynollah: #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 + mask_lines_only = (prediction_regions_org == 3)*1 + mask_texts_only = (prediction_regions_org == 1)*1 + mask_images_only= (prediction_regions_org == 2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml - def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + def do_order_of_regions_full_layout( + self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + self.logger.debug("enter do_order_of_regions_full_layout") - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(contours_only_text_parent) - cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours(contours_only_text_parent_h) + boxes = np.array(boxes, dtype=int) # to be on the safe side + cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + contours_only_text_parent) + cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours( + contours_only_text_parent_h) try: arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: + if (x_min_text_only[ii] + 80 >= boxes[jj][0] and + x_min_text_only[ii] + 80 < boxes[jj][1] and + y_cor_x_min_main[ii] >= boxes[jj][2] and + y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) @@ -2298,12 +2178,17 @@ class Eynollah: for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only_h[ii] + 80) >= boxes[jj][0] and (x_min_text_only_h[ii] + 80) < boxes[jj][1] and y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]: + if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and + x_min_text_only_h[ii] + 80 < boxes[jj][1] and + y_cor_x_min_main_h[ii] >= boxes[jj][2] and + y_cor_x_min_main_h[ii] < boxes[jj][3]): arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) @@ -2315,7 +2200,8 @@ class Eynollah: order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): - + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] @@ -2327,9 +2213,12 @@ class Eynollah: for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] @@ -2338,11 +2227,13 @@ class Eynollah: for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2366,17 +2257,22 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if (cx_text_only[ii] >= boxes[jj][0] and + cx_text_only[ii] < boxes[jj][1] and + cy_text_only[ii] >= boxes[jj][2] and + cy_text_only[ii] < boxes[jj][3]): + # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) ############################# head @@ -2385,22 +2281,29 @@ class Eynollah: for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if (cx_text_only_h[ii] >= boxes[jj][0] and + cx_text_only_h[ii] < boxes[jj][1] and + cy_text_only_h[ii] >= boxes[jj][2] and + cy_text_only_h[ii] < boxes[jj][3]): + # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) - order_by_con_head = np.zeros(len(arg_text_con_h)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij, _ in enumerate(boxes): + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] @@ -2412,9 +2315,12 @@ class Eynollah: for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] @@ -2423,11 +2329,13 @@ class Eynollah: for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2448,21 +2356,30 @@ class Eynollah: self.logger.debug("exit do_order_of_regions_full_layout") return order_text_new, id_of_texts_tot - def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + def do_order_of_regions_no_full_layout( + self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + self.logger.debug("enter do_order_of_regions_no_full_layout") - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(contours_only_text_parent) + boxes = np.array(boxes, dtype=int) # to be on the safe side + cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + contours_only_text_parent) try: arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: + if (x_min_text_only[ii] + 80 >= boxes[jj][0] and + x_min_text_only[ii] + 80 < boxes[jj][1] and + y_cor_x_min_main[ii] >= boxes[jj][2] and + y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) @@ -2472,22 +2389,28 @@ class Eynollah: order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2508,39 +2431,49 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if (cx_text_only[ii] >= boxes[jj][0] and + cx_text_only[ii] < boxes[jj][1] and + cy_text_only[ii] >= boxes[jj][2] and + cy_text_only[ii] < boxes[jj][3]): + # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + + (cy_text_only[ii] - boxes[jj][2]) ** 2) + for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): + ys = slice(*boxes[iij][2:4]) + xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] - for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts( + con_inter_box, con_inter_box_h, + matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2558,10 +2491,13 @@ class Eynollah: self.logger.debug("exit do_order_of_regions_no_full_layout") return order_text_new, id_of_texts_tot - def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, num_col_classifier): + + def check_iou_of_bounding_box_and_contour_for_tables( + self, layout, table_prediction_early, pixel_table, num_col_classifier): + layout_org = np.copy(layout) - layout_org[:,:,0][layout_org[:,:,0]==pixel_tabel] = 0 - layout = (layout[:,:,0]==pixel_tabel)*1 + layout_org[:,:,0][layout_org[:,:,0]==pixel_table] = 0 + layout = (layout[:,:,0]==pixel_table)*1 layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2) layout = layout.astype(np.uint8) @@ -2569,18 +2505,17 @@ class Eynollah: _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(contours[j]) + for j in range(len(contours))]) contours_new = [] for i in range(len(contours)): x, y, w, h = cv2.boundingRect(contours[i]) iou = cnt_size[i] /float(w*h) *100 - if iou<80: layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1])) layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1)) - - + layout_contour_sum = layout_contour.sum(axis=0) layout_contour_sum_diff = np.diff(layout_contour_sum) layout_contour_sum_diff= np.abs(layout_contour_sum_diff) @@ -2607,65 +2542,77 @@ class Eynollah: contours_new.append(contours_sep[ji]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours_sep[ji]] ,color=(1,1,1)) - table_pixels_masked_from_early_pre = only_recent_contour_image[:,:]*table_prediction_early[:,:] - iou_in = table_pixels_masked_from_early_pre.sum() /float(only_recent_contour_image.sum()) *100 + only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1)) + table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early + iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') if iou_in>30: - layout_org= cv2.fillPoly(layout_org,pts=[contours_sep[ji]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: pass else: - - layout_org= cv2.fillPoly(layout_org,pts=[contours_sep[ji]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) - + layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: contours_new.append(contours[i]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1)) - table_pixels_masked_from_early_pre = only_recent_contour_image[:,:]*table_prediction_early[:,:] - iou_in = table_pixels_masked_from_early_pre.sum() /float(only_recent_contour_image.sum()) *100 + table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early + iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in') if iou_in>30: - layout_org= cv2.fillPoly(layout_org,pts=[contours[i]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) else: pass else: - layout_org= cv2.fillPoly(layout_org,pts=[contours[i]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) return layout_org, contours_new - def delete_separator_around(self,spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): + + def delete_separator_around(self, spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): # format of subboxes: box=[x1, x2 , y1, y2] pix_del = 100 if len(image_by_region.shape)==3: for i in range(len(spliter_y)-1): for j in range(1,len(peaks_neg[i])-1): - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0]==pixel_line ]=0 - image_by_region[spliter_y[i]:spliter_y[i+1],peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,1]==pixel_line ]=0 - image_by_region[spliter_y[i]:spliter_y[i+1],peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,2]==pixel_line ]=0 + ys = slice(int(spliter_y[i]), + int(spliter_y[i+1])) + xs = slice(peaks_neg[i][j] - pix_del, + peaks_neg[i][j] + pix_del) + image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_line] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_line] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_line] = 0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0]==pixel_table ]=0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,1]==pixel_table ]=0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,2]==pixel_table ]=0 + image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_table] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_table] = 0 + image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_table] = 0 else: for i in range(len(spliter_y)-1): for j in range(1,len(peaks_neg[i])-1): - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del]==pixel_line ]=0 - - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del]==pixel_table ]=0 + ys = slice(int(spliter_y[i]), + int(spliter_y[i+1])) + xs = slice(peaks_neg[i][j] - pix_del, + peaks_neg[i][j] + pix_del) + image_by_region[ys,xs][image_by_region[ys,xs]==pixel_line] = 0 + image_by_region[ys,xs][image_by_region[ys,xs]==pixel_table] = 0 return image_by_region - def add_tables_heuristic_to_layout(self, image_regions_eraly_p,boxes, slope_mean_hor, spliter_y,peaks_neg_tot, image_revised, num_col_classifier, min_area, pixel_line): + + def add_tables_heuristic_to_layout( + self, image_regions_eraly_p, boxes, + slope_mean_hor, spliter_y, peaks_neg_tot, image_revised, + num_col_classifier, min_area, pixel_line): + pixel_table =10 image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table) try: image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0 - image_revised_1[:,image_revised_1.shape[1]-30:][image_revised_1[:,image_revised_1.shape[1]-30:]==pixel_line] = 0 + image_revised_1[:,-30:][image_revised_1[:,-30:]==pixel_line] = 0 except: pass + boxes = np.array(boxes, dtype=int) # to be on the safe side img_comm_e = np.zeros(image_revised_1.shape) img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2) @@ -2690,7 +2637,9 @@ class Eynollah: if not self.isNaN(slope_mean_hor): image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3)) for i in range(len(boxes)): - image_box=img_comm[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:] + box_ys = slice(*boxes[i][2:4]) + box_xs = slice(*boxes[i][0:2]) + image_box = img_comm[box_ys, box_xs] try: image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1 contours_tab,_=return_contours_of_image(image_box_tabels_1) @@ -2753,17 +2702,17 @@ class Eynollah: for ii in range(len(y_up_tabs)): image_box[y_up_tabs[ii]:y_down_tabs[ii],:,0]=pixel_table - image_revised_last[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:]=image_box[:,:,:] + image_revised_last[box_ys, box_xs] = image_box else: for i in range(len(boxes)): - - image_box=img_comm[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:] - image_revised_last[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:]=image_box[:,:,:] + box_ys = slice(*boxes[i][2:4]) + box_xs = slice(*boxes[i][0:2]) + image_box = img_comm[box_ys, box_xs] + image_revised_last[box_ys, box_xs] = image_box if num_col_classifier==1: - img_tables_col_1=( image_revised_last[:,:,0]==pixel_table )*1 - img_tables_col_1=img_tables_col_1.astype(np.uint8) - contours_table_col1,_=return_contours_of_image(img_tables_col_1) + img_tables_col_1 = (image_revised_last[:,:,0] == pixel_table).astype(np.uint8) + contours_table_col1, _ = return_contours_of_image(img_tables_col_1) _,_ ,_ , _, y_min_tab_col1 ,y_max_tab_col1, _= find_new_features_of_contours(contours_table_col1) @@ -2779,17 +2728,13 @@ class Eynollah: def get_tables_from_model(self, img, num_col_classifier): img_org = np.copy(img) - img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - - if not self.dir_in: self.model_table, _ = self.start_new_session_and_model(self.model_table_dir) patches = False - if self.light_version: prediction_table = self.do_prediction_new_concept(patches, img, self.model_table) prediction_table = prediction_table.astype(np.int16) @@ -2804,52 +2749,52 @@ class Eynollah: prediction_table = prediction_table.astype(np.int16) elif num_col_classifier ==2: - height_ext = 0#int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/8. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_ext = 0 # img.shape[0] // 4 + h_start = height_ext // 2 + width_ext = img.shape[1] // 8 + w_start = width_ext // 2 + + img_new = np.zeros((img.shape[0] + height_ext, + img.shape[1] + width_ext, + img.shape[2])).astype(float) + ys = slice(h_start, h_start + img.shape[0]) + xs = slice(w_start, w_start + img.shape[1]) + img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table = prediction_ext[ys, xs] + prediction_table_updown = pre_updown[ys, xs] prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 prediction_table = prediction_table.astype(np.int16) - elif num_col_classifier ==1: - height_ext = 0# int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/4. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_ext = 0 # img.shape[0] // 4 + h_start = height_ext // 2 + width_ext = img.shape[1] // 4 + w_start = width_ext // 2 + + img_new =np.zeros((img.shape[0] + height_ext, + img.shape[1] + width_ext, + img.shape[2])).astype(float) + ys = slice(h_start, h_start + img.shape[0]) + xs = slice(w_start, w_start + img.shape[1]) + img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table = prediction_ext[ys, xs] + prediction_table_updown = pre_updown[ys, xs] prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 prediction_table = prediction_table.astype(np.int16) - else: prediction_table = np.zeros(img.shape) - img_w_half = int(img.shape[1]/2.) + img_w_half = img.shape[1] // 2 pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table) pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table) @@ -2877,7 +2822,10 @@ class Eynollah: prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) - def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): + def run_graphics_and_columns_light( + self, text_regions_p_1, textline_mask_tot_ea, + num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') t_in_gr = time.time() @@ -2894,14 +2842,13 @@ class Eynollah: if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: - table_prediction = (np.zeros((image_page.shape[0], image_page.shape[1]))).astype(np.int16) + table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) if self.plotter: self.plotter.save_page_image(image_page) text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 @@ -2931,10 +2878,10 @@ class Eynollah: self.logger.error(why) num_col = None #print("inside graphics 3 ", time.time() - t_in_gr) - return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light + return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, + text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light) def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): - #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') t_in_gr = time.time() @@ -2950,11 +2897,14 @@ class Eynollah: #print("inside graphics 1 ", time.time() - t_in_gr) textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page - def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): + + def run_graphics_and_columns( + self, text_regions_p_1, + num_col_classifier, num_column_is_classified, erosion_hurts): + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) @@ -2969,7 +2919,7 @@ class Eynollah: if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: - table_prediction = (np.zeros((image_page.shape[0], image_page.shape[1]))).astype(np.int16) + table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) if self.plotter: self.plotter.save_page_image(image_page) @@ -2987,7 +2937,6 @@ class Eynollah: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2996,12 +2945,14 @@ class Eynollah: except Exception as why: self.logger.error(why) num_col = None - return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction + return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, + text_regions_p_1, cont_page, table_prediction) - def run_enhancement(self,light_version): + def run_enhancement(self, light_version): t_in = time.time() self.logger.info("Resizing and enhancing image...") - is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) + is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = \ + self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') scale = 1 if is_image_enhanced: @@ -3046,7 +2997,10 @@ class Eynollah: self.logger.info("slope_deskew: %.2f°", slope_deskew) return slope_deskew, slope_first - def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): + def run_marginals( + self, image_page, textline_mask_tot_ea, mask_images, mask_lines, + num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): + image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 @@ -3060,7 +3014,9 @@ class Eynollah: if self.tables: regions_without_separators[table_prediction==1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) - text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) + text_regions_p = get_marginals( + rotate_image(regions_without_separators, slope_deskew), text_regions_p, + num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) @@ -3069,11 +3025,15 @@ class Eynollah: self.plotter.save_plot_of_layout_main(text_regions_p, image_page) return textline_mask_tot, text_regions_p, image_page_rotated - def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts): + def run_boxes_no_full_layout( + self, image_page, textline_mask_tot, text_regions_p, + slope_deskew, num_col_classifier, table_prediction, erosion_hurts): + self.logger.debug('enter run_boxes_no_full_layout') t_0_box = time.time() if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func( + image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -3090,10 +3050,14 @@ class Eynollah: regions_without_separators_d = None pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) @@ -3107,7 +3071,9 @@ class Eynollah: #print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new, regions_without_separators, matrix_of_lines_ch, + num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) #print(time.time()-t_0_box,'time box in 3.1') @@ -3119,12 +3085,17 @@ class Eynollah: text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, + num_col_classifier , 0.000005, pixel_line) #print(time.time()-t_0_box,'time box in 3.2') - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction, 10, num_col_classifier) #print(time.time()-t_0_box,'time box in 3.3') else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, + num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) @@ -3137,8 +3108,11 @@ class Eynollah: text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) @@ -3185,55 +3159,71 @@ class Eynollah: contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) #print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') - return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables + return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, + regions_without_separators_d, boxes, boxes_d, + polygons_of_marginals, contours_tables) + + def run_boxes_full_layout( + self, image_page, textline_mask_tot, text_regions_p, + slope_deskew, num_col_classifier, img_only_regions, + table_prediction, erosion_hurts, img_bin_light): - def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') t_full0 = time.time() if self.tables: if self.light_version: text_regions_p[:,:][table_prediction[:,:]==1] = 10 - img_revised_tab=text_regions_p[:,:] + img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None - regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 + #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators = (text_regions_p[:,:] == 1)*1 regions_without_separators[table_prediction == 1] = 1 else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None - - regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + + # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 + #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators = (text_regions_p[:,:] == 1)*1 regions_without_separators[table_prediction == 1] = 1 pixel_lines=3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) + num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_lines) if num_col_classifier>=3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3247,33 +3237,40 @@ class Eynollah: pass if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new, regions_without_separators, matrix_of_lines_ch, + num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - - img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, + num_col_classifier , 0.000005, pixel_line) + img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction, 10, num_col_classifier) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, + num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.round(text_regions_p_tables) text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + img_revised_tab2 = self.add_tables_heuristic_to_layout( + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction_n, 10, num_col_classifier) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - if np.abs(slope_deskew) < 0.13: img_revised_tab = np.copy(img_revised_tab2[:,:,0]) else: @@ -3281,7 +3278,6 @@ class Eynollah: img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - ##img_revised_tab=img_revised_tab2[:,:,0] #img_revised_tab=text_regions_p[:,:] text_regions_p[:,:][text_regions_p[:,:]==10] = 0 @@ -3310,10 +3306,9 @@ class Eynollah: image_page = image_page.astype(np.uint8) #print("full inside 1", time.time()- t_full0) - if self.light_version: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, False, cols=num_col_classifier) - else: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, False, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new( + img_bin_light if self.light_version else image_page, + False, cols=num_col_classifier) #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model @@ -3328,7 +3323,6 @@ class Eynollah: drop_capital_label_in_full_layout_model = 3 drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 - drops= drops.astype(np.uint8) regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1 @@ -3336,8 +3330,8 @@ class Eynollah: drops = cv2.erode(drops[:,:], KERNEL, iterations=1) regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout( + regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -3353,7 +3347,8 @@ class Eynollah: #plt.show() ####if not self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout( + image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -3371,18 +3366,19 @@ class Eynollah: self.logger.debug('exit run_boxes_full_layout') #print("full inside 3", time.time()- t_full0) - return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables + return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, + regions_without_separators_d, regions_fully, regions_without_separators, + polygons_of_marginals, contours_tables) def our_load_model(self, model_file): - try: model = load_model(model_file, compile=False) except: - model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) - + model = load_model(model_file, compile=False, custom_objects={ + "PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def do_order_of_regions_with_model(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -3394,10 +3390,11 @@ class Eynollah: img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') if contours_only_text_parent_h: - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours( + contours_only_text_parent_h) for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12, + int(x_min_main[j]):int(x_max_main[j])] = 1 co_text_all = contours_only_text_parent + contours_only_text_parent_h else: co_text_all = contours_only_text_parent @@ -3480,7 +3477,7 @@ class Eynollah: region_ids = ['region_%04d' % i for i in range(len(co_text_all))] return ordered, region_ids - def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): + def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.2*width) @@ -3492,18 +3489,14 @@ class Eynollah: sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: print(len(peaks_real), 'len(peaks_real)') peaks_real = peaks_real[(peaks_realwidth1)] arg_sort = np.argsort(sum_smoothed[peaks_real]) - arg_sort4 =arg_sort[::-1][:4] - peaks_sort_4 = peaks_real[arg_sort][::-1][:4] - argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] @@ -3522,9 +3515,8 @@ class Eynollah: return peaks_final[0], peaks_final[1] else: pass - - - def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self,textline_image, ind_tot): + + def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.06*width) @@ -3536,14 +3528,12 @@ class Eynollah: sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: #print(len(peaks_real), 'len(peaks_real)') peaks_real = peaks_real[(peaks_realwidth1)] arg_max = np.argmax(sum_smoothed[peaks_real]) - peaks_final = peaks_real[arg_max] #plt.figure(ind_tot) @@ -3555,15 +3545,15 @@ class Eynollah: return peaks_final else: return None - def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(self,peaks_real, sum_smoothed, start_split, end_split): + + def return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + self, peaks_real, sum_smoothed, start_split, end_split): + peaks_real = peaks_real[(peaks_realstart_split)] arg_sort = np.argsort(sum_smoothed[peaks_real]) - arg_sort4 =arg_sort[::-1][:4] - peaks_sort_4 = peaks_real[arg_sort][::-1][:4] - argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] @@ -3573,8 +3563,8 @@ class Eynollah: arg_sortnew = np.argsort(y_4_sorted) peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) return peaks_final[0] - - def return_start_and_end_of_common_text_of_textline_ocr_new(self,textline_image, ind_tot): + + def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.15*width) @@ -3587,11 +3577,11 @@ class Eynollah: sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: - peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, width1, mid+2) - - peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, mid-2, width2) + peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, width1, mid+2) + peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( + peaks_real, sum_smoothed, mid-2, width2) #plt.figure(ind_tot) #plt.imshow(textline_image) @@ -3602,23 +3592,23 @@ class Eynollah: return peak_start, peak_end else: pass - - def return_ocr_of_textline_without_common_section(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + def return_ocr_of_textline_without_common_section( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] else: - #width = np.shape(textline_image)[1] #height = np.shape(textline_image)[0] #common_window = int(0.3*width) - #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) - - split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image, ind_tot) + split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section( + textline_image, ind_tot) if split_point: image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) @@ -3652,7 +3642,10 @@ class Eynollah: #print(generated_text,'generated_text') #print('########################################') return generated_text - def return_ocr_of_textline(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + + def return_ocr_of_textline( + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) @@ -3661,7 +3654,6 @@ class Eynollah: #width = np.shape(textline_image)[1] #height = np.shape(textline_image)[0] #common_window = int(0.3*width) - #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) @@ -3683,8 +3675,8 @@ class Eynollah: #print(generated_text2, 'generated_text2') #print('########################################') - match = sq(None, generated_text1, generated_text2).find_longest_match(0, len(generated_text1), 0, len(generated_text2)) - + match = sq(None, generated_text1, generated_text2).find_longest_match( + 0, len(generated_text1), 0, len(generated_text2)) generated_text = generated_text1 + generated_text2[match.b+match.size:] except: pixel_values = processor(textline_image, return_tensors="pt").pixel_values @@ -3692,43 +3684,44 @@ class Eynollah: generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text - + def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): textline_contour[:,0] = textline_contour[:,0] + box_ind[2] textline_contour[:,1] = textline_contour[:,1] + box_ind[0] return textline_contour + def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - - def return_it_in_two_groups(self,x_differential): - split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 for ind in range(len(x_differential)-1)] + def return_it_in_two_groups(self, x_differential): + split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 + for ind in range(len(x_differential)-1)] split_masked = list( np.array(split[:])[np.array(split[:])!=-1] ) - if 0 not in split_masked: split_masked.insert(0, -1) - split_masked.append(len(x_differential)-1) split_masked = np.array(split_masked) +1 - sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) for ind in range(len(split_masked)-1)] - - indexes_to_bec_changed = [ind if ( np.abs(sums[ind-1]) > np.abs(sums[ind]) and np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 for ind in range(1,len(sums)-1) ] + sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) + for ind in range(len(split_masked)-1)] + indexes_to_bec_changed = [ind if (np.abs(sums[ind-1]) > np.abs(sums[ind]) and + np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 + for ind in range(1,len(sums)-1)] indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1] x_differential_new = np.copy(x_differential) for i in indexes_to_bec_changed_filtered: - x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] + i_slice = slice(split_masked[i], split_masked[i+1]) + x_differential_new[i_slice] = -1 * np.array(x_differential)[i_slice] return x_differential_new - def dilate_textregions_contours_textline_version(self,all_found_textline_polygons): + + def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -3736,7 +3729,6 @@ class Eynollah: x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) @@ -3754,7 +3746,6 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) - if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.12) else: @@ -3786,7 +3777,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -3802,20 +3792,16 @@ class Eynollah: con_ind = con_ind.astype(np.int32) - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] - + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) + for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) - #print(results,'results') - results[results==0] = 1 - diff_result = np.diff(results) indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] @@ -3823,27 +3809,22 @@ class Eynollah: #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons - def dilate_textregions_contours(self,all_found_textline_polygons): + + def dilate_textregions_contours(self, all_found_textline_polygons): #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): - con_ind = all_found_textline_polygons[j] #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) @@ -3852,7 +3833,6 @@ class Eynollah: x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) @@ -3870,7 +3850,6 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) - if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.12) else: @@ -3902,7 +3881,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -3918,50 +3896,38 @@ class Eynollah: con_ind = con_ind.astype(np.int32) - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] - + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) + for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) - #print(results,'results') - results[results==0] = 1 - diff_result = np.diff(results) - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons - - def dilate_textline_contours(self,all_found_textline_polygons): + def dilate_textline_contours(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) @@ -3991,7 +3957,6 @@ class Eynollah: dilation_m1 = round(area / (x_max-x_min) * 0.35) else: dilation_m1 = round(area / (y_max-y_min) * 0.35) - if dilation_m1>12: dilation_m1 = 12 @@ -4017,7 +3982,6 @@ class Eynollah: else: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4030,16 +3994,13 @@ class Eynollah: con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - con_ind = con_ind.astype(np.int32) - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] - + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) + for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) - results[results==0] = 1 - diff_result = np.diff(results) indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] @@ -4050,13 +4011,10 @@ class Eynollah: con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] indices_2 = indices_2[:-1] - for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] @@ -4071,12 +4029,11 @@ class Eynollah: areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] area_tot = image.shape[0]*image.shape[1] - M_main = [cv2.moments(contours[j]) for j in range(len(contours))] + M_main = [cv2.moments(contours[j]) + for j in range(len(contours))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - - areas_ratio = np.array(areas)/ area_tot contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] @@ -4084,9 +4041,11 @@ class Eynollah: #contours_> = [contours[ind] for ind in contours_index_big] indexes_to_be_removed = [] for ind_small in contours_index_small: - results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big ] + results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) + for ind in contours_index_big] if marginal_cnts: - results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in range(len(marginal_cnts)) ] + results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) + for ind in range(len(marginal_cnts))] results_marginal = np.array(results_marginal) if np.any(results_marginal==1): @@ -4096,7 +4055,6 @@ class Eynollah: if np.any(results==1): indexes_to_be_removed.append(ind_small) - if len(indexes_to_be_removed)>0: indexes_to_be_removed = np.unique(indexes_to_be_removed) @@ -4105,8 +4063,7 @@ class Eynollah: contours.pop(ind) return contours - - + else: contours_txtline_of_all_textregions = [] indexes_of_textline_tot = [] @@ -4115,32 +4072,23 @@ class Eynollah: for jj in range(len(contours)): contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] - ind_ins = np.zeros( len(contours[jj]) ) + jj - list_ind_ins = list(ind_ins) - - ind_textline_inside_tr = np.array (range(len(contours[jj])) ) + ind_textline_inside_tr = list(range(len(contours[jj]))) + index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr + ind_ins = [0] * len(contours[jj]) + jj + indexes_of_textline_tot = indexes_of_textline_tot + ind_ins - list_ind_textline_inside_tr = list(ind_textline_inside_tr) - - index_textline_inside_textregion = index_textline_inside_textregion + list_ind_textline_inside_tr - - indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins - - - M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) + for j in range(len(contours_txtline_of_all_textregions))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] area_tot_tot = image.shape[0]*image.shape[1] textregion_index_to_del = [] textline_in_textregion_index_to_del = [] for ij in range(len(contours_txtline_of_all_textregions)): - args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) - args_all.pop(ij) areas_without = np.array(areas_tot)[args_all] @@ -4149,38 +4097,38 @@ class Eynollah: args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) + for ind in args_with_bigger_area ] results = np.array(results) if np.any(results==1): #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) - - uniqe_args_trs = np.unique(textregion_index_to_del) - - for ind_u_a_trs in uniqe_args_trs: - textline_in_textregion_index_to_del_ind = np.array(textline_in_textregion_index_to_del)[np.array(textregion_index_to_del)==ind_u_a_trs] + + textregion_index_to_del = np.array(textregion_index_to_del) + textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) + for ind_u_a_trs in np.unique(textregion_index_to_del): + textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] - for ittrd in textline_in_textregion_index_to_del_ind: contours[ind_u_a_trs].pop(ittrd) return contours - - - def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): - + def filter_contours_without_textline_inside( + self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): + ###contours_txtline_of_all_textregions = [] - ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] - ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] - ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] - ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] - + ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) + ### for j in range(len(contours_txtline_of_all_textregions))] + ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) + ### for j in range(len(M_main_textline))] + ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) + ### for j in range(len(M_main_textline))] ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] @@ -4188,8 +4136,8 @@ class Eynollah: ###contours_with_textline = [] ###for ind_tr, con_tr in enumerate(contours): - ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] - + ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) + ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] ###results = np.array(results) ###if np.any(results==1): ###contours_with_textline.append(con_tr) @@ -4202,7 +4150,6 @@ class Eynollah: uniqe_args_trs = np.unique(textregion_index_to_del) uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] - for ind_u_a_trs in uniqe_args_trs_sorted: contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) @@ -4211,11 +4158,10 @@ class Eynollah: return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) - def dilate_textlines(self,all_found_textline_polygons): + def dilate_textlines(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][i] - con_ind = con_ind.astype(np.float) x_differential = np.diff( con_ind[:,0,0]) @@ -4227,11 +4173,8 @@ class Eynollah: x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) - mult = x_biger_than_x*x_differential arg_min_mult = np.argmin(mult) @@ -4239,33 +4182,25 @@ class Eynollah: if y_differential[0]==0: y_differential[0] = 0.1 - if y_differential[-1]==0: y_differential[-1]= 0.1 - - - - y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - + y_differential = [y_differential[ind] if y_differential[ind] != 0 + else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) + for ind in range(len(y_differential))] if y_differential[0]==0.1: y_differential[0] = y_differential[1] if y_differential[-1]==0.1: y_differential[-1] = y_differential[-2] - y_differential.append(y_differential[0]) - y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] - + y_differential = [-1 if y_differential[ind] < 0 else 1 + for ind in range(len(y_differential))] y_differential = self.return_it_in_two_groups(y_differential) - y_differential = np.array(y_differential) - con_scaled = con_ind*1 - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 @@ -4284,10 +4219,8 @@ class Eynollah: except: pass - else: y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - mult = y_biger_than_x*y_differential arg_min_mult = np.argmin(mult) @@ -4295,32 +4228,25 @@ class Eynollah: if x_differential[0]==0: x_differential[0] = 0.1 - if x_differential[-1]==0: x_differential[-1]= 0.1 - - - - x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - + x_differential = [x_differential[ind] if x_differential[ind] != 0 + else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) + for ind in range(len(x_differential))] if x_differential[0]==0.1: x_differential[0] = x_differential[1] if x_differential[-1]==0.1: x_differential[-1] = x_differential[-2] - x_differential.append(x_differential[0]) - x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] - + x_differential = [-1 if x_differential[ind] < 0 else 1 + for ind in range(len(x_differential))] x_differential = self.return_it_in_two_groups(x_differential) x_differential = np.array(x_differential) - con_scaled = con_ind*1 - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 @@ -4338,17 +4264,19 @@ class Eynollah: con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 except: pass - - + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] - + return all_found_textline_polygons - def delete_regions_without_textlines(self,slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): + def delete_regions_without_textlines( + self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, + contours_only_text_parent, index_by_text_par_con): + slopes_rem = [] all_found_textline_polygons_rem = [] boxes_text_rem = [] @@ -4368,9 +4296,11 @@ class Eynollah: index_sort = np.argsort(index_by_text_par_con_rem) indexes_new = np.array(range(len(index_by_text_par_con_rem))) - index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))] + index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] + for j in range(len(index_by_text_par_con_rem))] - return slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort + return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, + contours_only_text_parent_rem, index_by_text_par_con_rem_sort) def run(self): """ @@ -4400,10 +4330,13 @@ class Eynollah: img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml,polygons_of_images,image_page, page_coord, cont_page = self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ + self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - + pcgts = self.writer.build_pagexml_no_full_layout( + [], page_coord, [], [], [], [], + polygons_of_images, [], [], [], [], [], + cont_page, [], [], ocr_all_textlines) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) @@ -4414,21 +4347,26 @@ class Eynollah: return pcgts if self.skip_layout_and_reading_order: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, - skip_layout_and_reading_order=self.skip_layout_and_reading_order) + _ ,_, _, textline_mask_tot_ea, img_bin_light = \ + self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, + skip_layout_and_reading_order=self.skip_layout_and_reading_order) - page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \ + self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + all_found_textline_polygons = filter_contours_area_of_image( + textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") + all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( + all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") order_text_new = [0] @@ -4443,10 +4381,11 @@ class Eynollah: polygons_lines_xml = [] contours_tables = [] ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout( + cont_page, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if self.dir_in: self.writer.write_pagexml(pcgts) continue @@ -4456,17 +4395,16 @@ class Eynollah: #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = \ + self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: + else: img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) @@ -4475,18 +4413,23 @@ class Eynollah: slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ + text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, + num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) #print("text region early -4 in %.1fs", time.time() - t0) else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ + self.get_regions_from_xy_2models(img_res, is_image_enhanced, + num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ + text_regions_p_1, cont_page, table_prediction = \ self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) self.logger.info("Graphics detection took %.1fs ", time.time() - t1) #self.logger.info('cont_page %s', cont_page) @@ -4496,7 +4439,9 @@ class Eynollah: if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout( + [], page_coord, [], [], [], [], [], [], [], [], [], [], + cont_page, [], [], ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t1) if self.dir_in: self.writer.write_pagexml(pcgts) @@ -4517,11 +4462,9 @@ class Eynollah: org_w_l_m = textline_mask_tot_ea.shape[1] if num_col_classifier == 1: img_w_new = 2000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: + else: img_w_new = 2400 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] image_page = resize_image(image_page,img_h_new, img_w_new ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) @@ -4530,7 +4473,9 @@ class Eynollah: text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + textline_mask_tot, text_regions_p, image_page_rotated = \ + self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, + num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) if self.light_version and num_col_classifier in (1,2): image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) @@ -4546,12 +4491,17 @@ class Eynollah: ## birdan sora chock chakir t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = \ - self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + boxes, boxes_d, polygons_of_marginals, contours_tables = \ + self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, + num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) else: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ - self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ + regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ + self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, + num_col_classifier, img_only_regions, table_prediction, erosion_hurts, + img_bin_light if self.light_version else None) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 @@ -4572,18 +4522,23 @@ class Eynollah: areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) #self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) + if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + contours_only_text_parent = self.return_list_of_contours_with_desired_order( + contours_only_text_parent, index_con_parents) ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##contours_only_text_parent = \ + ##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##contours_only_text_parent = \ + ##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order( + areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -4598,14 +4553,17 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order( + contours_only_text_parent_d, index_con_parents_d) #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #contours_only_text_parent_d = \ + #list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - + #contours_only_text_parent_d = \ + #list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order( + areas_cnt_text_d, index_con_parents_d) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) @@ -4613,12 +4571,16 @@ class Eynollah: if len(cx_bigest_d) >= 5: cx_bigest_d_last5 = cx_bigest_d[-5:] cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) + for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) else: cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) + for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) cx_bigest_d_big[0] = cx_bigest_d[ind_largest] @@ -4639,7 +4601,9 @@ class Eynollah: p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) p[0] = p[0] - x_diff[0] p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + + (p[1] - cy_biggest_d[j]) ** 2) + for j in range(len(cx_bigest_d))] contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) @@ -4659,9 +4623,17 @@ class Eynollah: # stop early empty_marginals = [[]] * len(polygons_of_marginals) if self.full_layout: - pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) + pcgts = self.writer.build_pagexml_full_layout( + [], [], page_coord, [], [], [], [], [], [], + polygons_of_images, contours_tables, [], + polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], + cont_page, polygons_lines_xml, []) else: - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + pcgts = self.writer.build_pagexml_no_full_layout( + [], page_coord, [], [], [], [], + polygons_of_images, + polygons_of_marginals, empty_marginals, empty_marginals, [], [], + cont_page, polygons_lines_xml, contours_tables, []) self.logger.info("Job done in %.1fs", time.time() - t0) if self.dir_in: self.writer.write_pagexml(pcgts) @@ -4671,14 +4643,18 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) - contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + contours_only_text_parent = self.dilate_textregions_contours( + contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one( + contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first, map=self.executor.map) + txt_con_org = get_textregion_contours_in_org_image_light( + contours_only_text_parent, self.image, slope_first, map=self.executor.map) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + txt_con_org = get_textregion_contours_in_org_image( + contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) @@ -4687,59 +4663,84 @@ class Eynollah: if not self.curved_line: if self.light_version: if self.textline_light: - #all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - # self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light2(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ + all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, + image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, + image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - + # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, + # boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) + # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, + # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) - all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) - - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, index_by_text_par_con = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) - + all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( + all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons_marginals) + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ + index_by_text_par_con = self.filter_contours_without_textline_inside( + contours_only_text_parent, txt_con_org, all_found_textline_polygons, + contours_only_text_parent_d_ordered) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ + index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, + image_page_rotated, boxes_marginals, slope_deskew) + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( + # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ + all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + image_page_rotated, boxes_text, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, + image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \ - self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \ - self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ + all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, + image_page_rotated, boxes_text, text_only, + num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2( + all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ + all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, + image_page_rotated, boxes_marginals, text_only, + num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( + all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( + contours_only_text_parent_d_ordered, index_by_text_par_con) #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) else: #takes long timee contours_only_text_parent_d_ordered = None @@ -4749,8 +4750,9 @@ class Eynollah: fun = check_any_text_region_in_model_one_is_main_or_header text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = \ - fun(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = fun( + text_regions_p, regions_fully, contours_only_text_parent, + all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) @@ -4758,60 +4760,76 @@ class Eynollah: pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, - all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) - pixel_lines = 6 + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( + text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) if not self.reading_order_machine_based: + pixel_seps = 6 if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), + num_col_classifier, self.tables, pixel_seps) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new, regions_without_separators, matrix_of_lines_ch, + num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( + splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, + num_col_classifier, erosion_hurts, self.tables, self.right2left) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() if self.full_layout: - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( + contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) if self.ocr: ocr_all_textlines = [] else: ocr_all_textlines = None - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines) + pcgts = self.writer.build_pagexml_full_layout( + contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, + polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, + cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) #print("Job done in %.1fs", time.time() - t0) if self.dir_in: @@ -4823,21 +4841,25 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( + contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( + contours_only_text_parent_d_ordered, index_by_text_par_con) #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) if self.ocr: - device = cuda.get_current_device() device.reset() gc.collect() @@ -4849,7 +4871,6 @@ class Eynollah: ind_tot = 0 #cv2.imwrite('./img_out.png', image_page) - ocr_all_textlines = [] for indexing, ind_poly_first in enumerate(all_found_textline_polygons): ocr_textline_in_textregion = [] @@ -4871,7 +4892,6 @@ class Eynollah: img_poly_on_img = np.copy(image_page) else: img_poly_on_img = np.copy(img_bin_light) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) if self.textline_light: @@ -4883,10 +4903,7 @@ class Eynollah: img_croped = img_poly_on_img[y:y+h, x:x+w, :] #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - ocr_textline_in_textregion.append(text_ocr) - - ind_tot = ind_tot +1 ocr_all_textlines.append(ocr_textline_in_textregion) @@ -4894,9 +4911,11 @@ class Eynollah: ocr_all_textlines = None #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + pcgts = self.writer.build_pagexml_no_full_layout( + txt_con_org, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, + all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) #print("Job done in %.1fs" % (time.time() - t0)) self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index d7f9ccd..feab341 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -14,9 +14,9 @@ from .contour import (contours_in_same_horizon, return_contours_of_image, return_parent_contours) -def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peak_points,cy_hor_diff): - - +def return_x_start_end_mothers_childs_and_type_of_reading_order( + x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): + x_start=[] x_end=[] kind=[]#if covers 2 and more than 2 columns set it to 1 otherwise 0 @@ -30,15 +30,12 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x starting=x_min_hor_some[i]-peak_points starting=starting[starting>=0] min_start=np.argmin(starting) - - ending=peak_points-x_max_hor_some[i] len_ending_neg=len(ending[ending<=0]) ending=ending[ending>0] max_end=np.argmin(ending)+len_ending_neg - if (max_end-min_start)>=2: if (max_end-min_start)==(len(peak_points)-1): new_main_sep_y.append(indexer) @@ -57,18 +54,13 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x kind.append(1) indexer+=1 + + x_start_returned = np.array(x_start, dtype=int) + x_end_returned = np.array(x_end, dtype=int) + y_sep_returned = np.array(y_sep, dtype=int) + y_diff_returned = np.array(y_diff, dtype=int) - - x_start_returned=np.copy(x_start) - x_end_returned=np.copy(x_end) - y_sep_returned=np.copy(y_sep) - y_diff_returned=np.copy(y_diff) - - - - - all_args_uniq=contours_in_same_horizon(y_sep_returned) - + all_args_uniq = contours_in_same_horizon(y_sep_returned) args_to_be_unified=[] y_unified=[] y_diff_unified=[] @@ -84,7 +76,10 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x y_sep_same_hor=np.array(y_sep_returned)[all_args_uniq[dd]] y_diff_same_hor=np.array(y_diff_returned)[all_args_uniq[dd]] #print('burda2') - if x_s_same_hor[0]==(x_e_same_hor[1]-1) or x_s_same_hor[1]==(x_e_same_hor[0]-1) and x_s_same_hor[0]!=x_s_same_hor[1] and x_e_same_hor[0]!=x_e_same_hor[1]: + if (x_s_same_hor[0]==x_e_same_hor[1]-1 or + x_s_same_hor[1]==x_e_same_hor[0]-1 and + x_s_same_hor[0]!=x_s_same_hor[1] and + x_e_same_hor[0]!=x_e_same_hor[1]): #print('burda3') for arg_in in all_args_uniq[dd]: #print(arg_in,'arg_in') @@ -98,19 +93,14 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x x_e_unified.append(x_e_selected) y_unified.append(y_selected) y_diff_unified.append(y_diff_selected) - - - #print(x_s_same_hor,'x_s_same_hor') #print(x_e_same_hor[:]-1,'x_e_same_hor') #print('#############################') - #print(x_s_unified,'y_selected') #print(x_e_unified,'x_s_selected') #print(y_unified,'x_e_same_hor') - + args_lines_not_unified=list( set(range(len(y_sep_returned)))-set(args_to_be_unified) ) - #print(args_lines_not_unified,'args_lines_not_unified') x_start_returned_not_unified=list( np.array(x_start_returned)[args_lines_not_unified] ) @@ -128,11 +118,10 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x #print(x_start_returned,'x_start_returned') #print(x_end_returned,'x_end_returned') - x_start_returned=np.copy(x_start_returned_not_unified) - x_end_returned=np.copy(x_end_returned_not_unified) - y_sep_returned=np.copy(y_sep_returned_not_unified) - y_diff_returned=np.copy(y_diff_returned_not_unified) - + x_start_returned = np.array(x_start_returned_not_unified, dtype=int) + x_end_returned = np.array(x_end_returned_not_unified, dtype=int) + y_sep_returned = np.array(y_sep_returned_not_unified, dtype=int) + y_diff_returned = np.array(y_diff_returned_not_unified, dtype=int) #print(y_sep_returned,'y_sep_returned2') #print(x_start_returned,'x_start_returned2') @@ -165,19 +154,19 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x #print(y_min_new,'y_min_new') #print(y_max_new,'y_max_new') - - #print(y_sep[new_main_sep_y[0]],y_sep,'yseps') x_start=np.array(x_start) x_end=np.array(x_end) kind=np.array(kind) y_sep=np.array(y_sep) - if (y_min_new in y_mains_sep_ohne_grenzen) and (y_max_new in y_mains_sep_ohne_grenzen): + if (y_min_new in y_mains_sep_ohne_grenzen and + y_max_new in y_mains_sep_ohne_grenzen): x_start=x_start[(y_sep>y_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sep<=y_max_new)] #print('burda1') @@ -185,7 +174,8 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x #print('burda2') kind=kind[(y_sep>y_min_new) & (y_sep<=y_max_new)] y_sep=y_sep[(y_sep>y_min_new) & (y_sep<=y_max_new)] - elif (y_min_new not in y_mains_sep_ohne_grenzen) and (y_max_new in y_mains_sep_ohne_grenzen): + elif (y_min_new not in y_mains_sep_ohne_grenzen and + y_max_new in y_mains_sep_ohne_grenzen): x_start=x_start[(y_sep>=y_min_new) & (y_sep=y_min_new) & (y_sep=y_min_new) & (y_sep1: #print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)') #print(np.array(mother),'mother') - remained_sep_indexes_without_mother=np.array(list(remained_sep_indexes))[np.array(mother)==0] - remained_sep_indexes_with_child_without_mother=np.array(list(remained_sep_indexes))[(np.array(mother)==0) & (np.array(child)==1)] + remained_sep_indexes_without_mother = remained_sep_indexes[mother==0] + remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1] #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') - - - x_end_with_child_without_mother=np.array(x_end)[np.array(remained_sep_indexes_with_child_without_mother)] - - x_start_with_child_without_mother=np.array(x_start)[np.array(remained_sep_indexes_with_child_without_mother)] - - y_lines_with_child_without_mother=np.array(y_sep)[np.array(remained_sep_indexes_with_child_without_mother)] - - + x_end_with_child_without_mother = x_end[remained_sep_indexes_with_child_without_mother] + x_start_with_child_without_mother = x_start[remained_sep_indexes_with_child_without_mother] + y_lines_with_child_without_mother = y_sep[remained_sep_indexes_with_child_without_mother] + reading_orther_type=0 - - - x_end_without_mother=np.array(x_end)[np.array(remained_sep_indexes_without_mother)] - x_start_without_mother=np.array(x_start)[np.array(remained_sep_indexes_without_mother)] - y_lines_without_mother=np.array(y_sep)[np.array(remained_sep_indexes_without_mother)] + x_end_without_mother = x_end[remained_sep_indexes_without_mother] + x_start_without_mother = x_start[remained_sep_indexes_without_mother] + y_lines_without_mother = y_sep[remained_sep_indexes_without_mother] if len(remained_sep_indexes_without_mother)>=2: for i in range(len(remained_sep_indexes_without_mother)-1): - ##nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]],x_end[remained_sep_indexes_without_mother[i]]+1)) - nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]],x_end[remained_sep_indexes_without_mother[i]])) + nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]], + x_end[remained_sep_indexes_without_mother[i]] + # + 1 + )) for j in range(i+1,len(remained_sep_indexes_without_mother)): - #nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]],x_end[remained_sep_indexes_without_mother[j]]+1)) - nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]],x_end[remained_sep_indexes_without_mother[j]])) - - set_diff=nodes_i-nodes_j - - if set_diff!=nodes_i: - reading_orther_type=1 + nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]], + x_end[remained_sep_indexes_without_mother[j]] + # + 1 + )) + set_diff = nodes_i - nodes_j + if set_diff != nodes_i: + reading_orther_type = 1 else: - reading_orther_type=0 + reading_orther_type = 0 #print(reading_orther_type,'javab') - #print(y_lines_with_child_without_mother,'y_lines_with_child_without_mother') #print(x_start_with_child_without_mother,'x_start_with_child_without_mother') #print(x_end_with_child_without_mother,'x_end_with_hild_without_mother') - len_sep_with_child=len(np.array(child)[np.array(child)==1]) + len_sep_with_child = len(child[child==1]) #print(len_sep_with_child,'len_sep_with_child') - there_is_sep_with_child=0 - - if len_sep_with_child>=1: - there_is_sep_with_child=1 - + there_is_sep_with_child = 0 + if len_sep_with_child >= 1: + there_is_sep_with_child = 1 #print(all_args_uniq,'all_args_uniq') #print(args_to_be_unified,'args_to_be_unified') - - return reading_orther_type,x_start_returned, x_end_returned ,y_sep_returned,y_diff_returned,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y + return (reading_orther_type, + x_start_returned, + x_end_returned, + y_sep_returned, + y_diff_returned, + y_lines_without_mother, + x_start_without_mother, + x_end_without_mother, + there_is_sep_with_child, + y_lines_with_child_without_mother, + x_start_with_child_without_mother, + x_end_with_child_without_mother, + new_main_sep_y) + def crop_image_inside_box(box, img_org_copy): image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] @@ -304,7 +303,6 @@ def otsu_copy_binary(img): img1 = img[:, :, 0] retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) - img_r[:, :, 0] = threshold1 img_r[:, :, 1] = threshold1 img_r[:, :, 2] = threshold1 @@ -312,9 +310,7 @@ def otsu_copy_binary(img): img_r = img_r / float(np.max(img_r)) * 255 return img_r - def find_features_of_lines(contours_main): - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] @@ -326,7 +322,6 @@ def find_features_of_lines(contours_main): y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) slope_lines = [] - for kk in range(len(contours_main)): [vx, vy, x, y] = cv2.fitLine(contours_main[kk], cv2.DIST_L2, 0, 0.01, 0.01) slope_lines.append(((vy / vx) / np.pi * 180)[0]) @@ -339,29 +334,42 @@ def find_features_of_lines(contours_main): slope_lines[(slope_lines != 0) & (slope_lines != 1)] = 2 dis_x = np.abs(x_max_main - x_min_main) - return slope_lines, dis_x, x_min_main, x_max_main, np.array(cy_main), np.array(slope_lines_org), y_min_main, y_max_main, np.array(cx_main) + return (slope_lines, + dis_x, + x_min_main, + x_max_main, + np.array(cy_main), + np.array(slope_lines_org), + y_min_main, + y_max_main, + np.array(cx_main)) def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregion_pre_np, img_only_text): textregion_pre_p_org = np.copy(textregion_pre_p) # 4 is drop capitals - headers_in_longshot = (textregion_pre_np[:, :, 0] == 2) * 1 - # headers_in_longshot= ( (textregion_pre_np[:,:,0]==2) | (textregion_pre_np[:,:,0]==1) )*1 - textregion_pre_p[:, :, 0][(headers_in_longshot[:, :] == 1) & (textregion_pre_p[:, :, 0] != 4)] = 2 + headers_in_longshot = textregion_pre_np[:, :, 0] == 2 + #headers_in_longshot = ((textregion_pre_np[:,:,0]==2) | + # (textregion_pre_np[:,:,0]==1)) + textregion_pre_p[:, :, 0][headers_in_longshot & + (textregion_pre_p[:, :, 0] != 4)] = 2 textregion_pre_p[:, :, 0][textregion_pre_p[:, :, 0] == 1] = 0 # earlier it was so, but by this manner the drop capitals are also deleted - # textregion_pre_p[:,:,0][( img_only_text[:,:]==1) & (textregion_pre_p[:,:,0]!=7) & (textregion_pre_p[:,:,0]!=2)]=1 - textregion_pre_p[:, :, 0][(img_only_text[:, :] == 1) & (textregion_pre_p[:, :, 0] != 7) & (textregion_pre_p[:, :, 0] != 4) & (textregion_pre_p[:, :, 0] != 2)] = 1 + # textregion_pre_p[:,:,0][(img_only_text[:,:]==1) & + # (textregion_pre_p[:,:,0]!=7) & + # (textregion_pre_p[:,:,0]!=2)] = 1 + textregion_pre_p[:, :, 0][(img_only_text[:, :] == 1) & + (textregion_pre_p[:, :, 0] != 7) & + (textregion_pre_p[:, :, 0] != 4) & + (textregion_pre_p[:, :, 0] != 2)] = 1 return textregion_pre_p - def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8): - regions_without_separators_0 = regions_without_separators[:,:].sum(axis=1) + regions_without_separators_0 = regions_without_separators.sum(axis=1) z = gaussian_filter1d(regions_without_separators_0, sigma_) return np.std(z) - def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): - regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0) + regions_without_separators_0 = regions_without_separators.sum(axis=0) ##plt.plot(regions_without_separators_0) ##plt.show() sigma_ = 35 # 70#35 @@ -372,7 +380,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl y = regions_without_separators_0 # [first_nonzero:last_nonzero] y_help = np.zeros(len(y) + 20) y_help[10 : len(y) + 10] = y - x = np.array(range(len(y))) + x = np.arange(len(y)) zneg_rev = -y_help + np.max(y_help) zneg = np.zeros(len(zneg_rev) + 20) zneg[10 : len(zneg_rev) + 10] = zneg_rev @@ -386,9 +394,12 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl last_nonzero = last_nonzero - 100 first_nonzero = first_nonzero + 200 - peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)] - peaks = peaks[(peaks > 0.06 * regions_without_separators.shape[1]) & (peaks < 0.94 * regions_without_separators.shape[1])] - peaks_neg = peaks_neg[(peaks_neg > 370) & (peaks_neg < (regions_without_separators.shape[1] - 370))] + peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & + (peaks_neg < last_nonzero)] + peaks = peaks[(peaks > 0.06 * regions_without_separators.shape[1]) & + (peaks < 0.94 * regions_without_separators.shape[1])] + peaks_neg = peaks_neg[(peaks_neg > 370) & + (peaks_neg < (regions_without_separators.shape[1] - 370))] interest_pos = z[peaks] interest_pos = interest_pos[interest_pos > 10] # plt.plot(z) @@ -405,7 +416,8 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl # print(np.min(interest_pos),np.max(interest_pos),np.max(interest_pos)/np.min(interest_pos),'minmax') dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier - grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 + grenze = min_peaks_pos - dis_talaei + # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 # print(interest_neg,'interest_neg') # print(grenze,'grenze') @@ -441,19 +453,26 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl p_g_u = len(y) - int(len(y) / 4.0) if num_col == 3: - if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ((peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ((peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m): + if ((peaks_neg_fin[0] > p_g_u and + peaks_neg_fin[1] > p_g_u) or + (peaks_neg_fin[0] < p_g_l and + peaks_neg_fin[1] < p_g_l) or + (peaks_neg_fin[0] + 200 < p_m and + peaks_neg_fin[1] < p_m) or + (peaks_neg_fin[0] - 200 > p_m and + peaks_neg_fin[1] > p_m)): num_col = 1 peaks_neg_fin = [] if num_col == 2: - if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l): + if (peaks_neg_fin[0] > p_g_u or + peaks_neg_fin[0] < p_g_l): num_col = 1 peaks_neg_fin = [] ##print(len(peaks_neg_fin)) diff_peaks = np.abs(np.diff(peaks_neg_fin)) - cut_off = 400 peaks_neg_true = [] forest = [] @@ -489,23 +508,35 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl ##print(num_col,'early') if num_col == 3: - if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and (peaks_neg_true[1] + 200) < p_m) or ((peaks_neg_true[0] - 200) > p_m and peaks_neg_true[1] > p_m): + if ((peaks_neg_true[0] > p_g_u and + peaks_neg_true[1] > p_g_u) or + (peaks_neg_true[0] < p_g_l and + peaks_neg_true[1] < p_g_l) or + (peaks_neg_true[0] < p_m and + peaks_neg_true[1] + 200 < p_m) or + (peaks_neg_true[0] - 200 > p_m and + peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] - elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter): + elif (peaks_neg_true[0] < p_g_u and + peaks_neg_true[0] > p_g_l and + peaks_neg_true[1] > p_u_quarter): peaks_neg_true = [peaks_neg_true[0]] - elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter): + elif (peaks_neg_true[1] < p_g_u and + peaks_neg_true[1] > p_g_l and + peaks_neg_true[0] < p_quarter): peaks_neg_true = [peaks_neg_true[1]] if num_col == 2: - if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l): + if (peaks_neg_true[0] > p_g_u or + peaks_neg_true[0] < p_g_l): num_col = 1 peaks_neg_true = [] diff_peaks_abnormal = diff_peaks[diff_peaks < 360] if len(diff_peaks_abnormal) > 0: - arg_help = np.array(range(len(diff_peaks))) + arg_help = np.arange(len(diff_peaks)) arg_help_ann = arg_help[diff_peaks < 360] peaks_neg_fin_new = [] @@ -527,7 +558,6 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl # plt.plot(peaks_neg_true,z[peaks_neg_true],'*') # plt.plot([0,len(y)], [grenze,grenze]) # plt.show() - ##print(len(peaks_neg_true)) return len(peaks_neg_true), peaks_neg_true @@ -536,7 +566,6 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): ##plt.plot(regions_without_separators_0) ##plt.show() - sigma_ = 15 meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] @@ -547,32 +576,24 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): last_nonzero = len(regions_without_separators_0) - last_nonzero y = regions_without_separators_0 # [first_nonzero:last_nonzero] - y_help = np.zeros(len(y) + 20) - y_help[10 : len(y) + 10] = y - - x = np.array(range(len(y))) + x = np.arange(len(y)) zneg_rev = -y_help + np.max(y_help) - zneg = np.zeros(len(zneg_rev) + 20) - zneg[10 : len(zneg_rev) + 10] = zneg_rev - z = gaussian_filter1d(y, sigma_) zneg = gaussian_filter1d(zneg, sigma_) peaks_neg, _ = find_peaks(zneg, height=0) peaks, _ = find_peaks(z, height=0) - peaks_neg = peaks_neg - 10 - 10 - peaks_neg_org = np.copy(peaks_neg) - - peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)] - - peaks = peaks[(peaks > 0.09 * regions_without_separators.shape[1]) & (peaks < 0.91 * regions_without_separators.shape[1])] + peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & + (peaks_neg < last_nonzero)] + peaks = peaks[(peaks > 0.09 * regions_without_separators.shape[1]) & + (peaks < 0.91 * regions_without_separators.shape[1])] peaks_neg = peaks_neg[(peaks_neg > 500) & (peaks_neg < (regions_without_separators.shape[1] - 500))] # print(peaks) @@ -587,7 +608,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): # $print(min_peaks_pos) dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier # print(interest_pos) - grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 + grenze = min_peaks_pos - dis_talaei + # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 interest_neg_fin = interest_neg[(interest_neg < grenze)] peaks_neg_fin = peaks_neg[(interest_neg < grenze)] @@ -601,13 +623,21 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): p_g_u = len(y) - int(len(y) / 3.0) if num_col == 3: - if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or (peaks_neg_fin[0] < p_m and peaks_neg_fin[1] < p_m) or (peaks_neg_fin[0] > p_m and peaks_neg_fin[1] > p_m): + if ((peaks_neg_fin[0] > p_g_u and + peaks_neg_fin[1] > p_g_u) or + (peaks_neg_fin[0] < p_g_l and + peaks_neg_fin[1] < p_g_l) or + (peaks_neg_fin[0] < p_m and + peaks_neg_fin[1] < p_m) or + (peaks_neg_fin[0] > p_m and + peaks_neg_fin[1] > p_m)): num_col = 1 else: pass if num_col == 2: - if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l): + if (peaks_neg_fin[0] > p_g_u or + peaks_neg_fin[0] < p_g_l): num_col = 1 else: pass @@ -646,23 +676,36 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): p_u_quarter = len(y) - p_quarter if num_col == 3: - if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and peaks_neg_true[1] < p_m) or (peaks_neg_true[0] > p_m and peaks_neg_true[1] > p_m): + if ((peaks_neg_true[0] > p_g_u and + peaks_neg_true[1] > p_g_u) or + (peaks_neg_true[0] < p_g_l and + peaks_neg_true[1] < p_g_l) or + (peaks_neg_true[0] < p_m and + peaks_neg_true[1] < p_m) or + (peaks_neg_true[0] > p_m and + peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] - elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter): + elif (peaks_neg_true[0] < p_g_u and + peaks_neg_true[0] > p_g_l and + peaks_neg_true[1] > p_u_quarter): peaks_neg_true = [peaks_neg_true[0]] - elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter): + elif (peaks_neg_true[1] < p_g_u and + peaks_neg_true[1] > p_g_l and + peaks_neg_true[0] < p_quarter): peaks_neg_true = [peaks_neg_true[1]] else: pass if num_col == 2: - if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l): + if (peaks_neg_true[0] > p_g_u or + peaks_neg_true[0] < p_g_l): num_col = 1 peaks_neg_true = [] if num_col == 4: - if len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2: + if (len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or + len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2): num_col = 1 peaks_neg_true = [] else: @@ -674,7 +717,10 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): for i in range(len(peaks_neg_true)): hill_main = peaks_neg_true[i] # deep_depth=z[peaks_neg] - hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & (peaks_neg_org <= hill_main + 400)) | ((peaks_neg_org < hill_main) & (peaks_neg_org >= hill_main - 400))] + hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & + (peaks_neg_org <= hill_main + 400)) | + ((peaks_neg_org < hill_main) & + (peaks_neg_org >= hill_main - 400))] deep_depth_around = z[hills_around] # print(hill_main,z[hill_main],hills_around,deep_depth_around,'manoooo') @@ -687,13 +733,11 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): pass diff_peaks_annormal = diff_peaks[diff_peaks < 360] - if len(diff_peaks_annormal) > 0: - arg_help = np.array(range(len(diff_peaks))) + arg_help = np.arange(len(diff_peaks)) arg_help_ann = arg_help[diff_peaks < 360] peaks_neg_fin_new = [] - for ii in range(len(peaks_neg_fin)): if ii in arg_help_ann: arg_min = np.argmin([interest_neg_fin[ii], interest_neg_fin[ii + 1]]) @@ -701,7 +745,6 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): peaks_neg_fin_new.append(peaks_neg_fin[ii]) else: peaks_neg_fin_new.append(peaks_neg_fin[ii + 1]) - elif (ii - 1) in arg_help_ann: pass else: @@ -711,7 +754,6 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): # sometime pages with one columns gives also some negative peaks. delete those peaks param = z[peaks_neg_true] / float(min_peaks_pos) * 100 - if len(param[param <= 41]) == 0: peaks_neg_true = [] @@ -722,11 +764,9 @@ def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): ##plt.plot(regions_without_separators_0) ##plt.show() - sigma_ = 35 # 70#35 z = gaussian_filter1d(regions_without_separators_0, sigma_) - peaks, _ = find_peaks(z, height=0) # print(peaks,'peaksnew') @@ -734,39 +774,43 @@ def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): def return_regions_without_separators(regions_pre): kernel = np.ones((5, 5), np.uint8) - regions_without_separators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1 - # regions_without_separators=( (image_regions_eraly_p[:,:,:]!=6) & (image_regions_eraly_p[:,:,:]!=0) & (image_regions_eraly_p[:,:,:]!=5) & (image_regions_eraly_p[:,:,:]!=8) & (image_regions_eraly_p[:,:,:]!=7))*1 + regions_without_separators = ((regions_pre[:, :] != 6) & + (regions_pre[:, :] != 0)) + # regions_without_separators=( (image_regions_eraly_p[:,:,:]!=6) & + # (image_regions_eraly_p[:,:,:]!=0) & + # (image_regions_eraly_p[:,:,:]!=5) & + # (image_regions_eraly_p[:,:,:]!=8) & + # (image_regions_eraly_p[:,:,:]!=7)) - regions_without_separators = regions_without_separators.astype(np.uint8) - - regions_without_separators = cv2.erode(regions_without_separators, kernel, iterations=6) + regions_without_separators = cv2.erode(regions_without_separators.astype(np.uint8), kernel, iterations=6) return regions_without_separators - def put_drop_out_from_only_drop_model(layout_no_patch, layout1): - drop_only = (layout_no_patch[:, :, 0] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) - areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))]) + areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) + for j in range(len(contours_drop_parent))]) areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1]) - - contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] - - areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] + contours_drop_parent = [contours_drop_parent[jz] + for jz in range(len(contours_drop_parent)) + if areas_cnt_text[jz] > 0.00001] + areas_cnt_text = [areas_cnt_text[jz] + for jz in range(len(areas_cnt_text)) + if areas_cnt_text[jz] > 0.00001] contours_drop_parent_final = [] - for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) # boxes.append([int(x), int(y), int(w), int(h)]) map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1])) map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w] - - if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15: + if (100. * + (map_of_drop_contour_bb == 1).sum() / + (map_of_drop_contour_bb == 5).sum()) >= 15: contours_drop_parent_final.append(contours_drop_parent[jj]) layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0 @@ -780,49 +824,53 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) - areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))]) + areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) + for j in range(len(contours_drop_parent))]) areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1]) - - contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] - - areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] + contours_drop_parent = [contours_drop_parent[jz] + for jz in range(len(contours_drop_parent)) + if areas_cnt_text[jz] > 0.00001] + areas_cnt_text = [areas_cnt_text[jz] + for jz in range(len(areas_cnt_text)) + if areas_cnt_text[jz] > 0.00001] contours_drop_parent_final = [] - for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) + box = slice(y, y + h), slice(x, x + w) + box0 = box + (0,) mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])) + mask_of_drop_cpaital_in_early_layout[box] = text_regions_p[box] - mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w] = text_regions_p[y : y + h, x : x + w] - - all_drop_capital_pixels_which_is_text_in_early_lo = np.sum( mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]==1 ) - - mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]=1 - all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1 ) + all_drop_capital_pixels_which_is_text_in_early_lo = np.sum(mask_of_drop_cpaital_in_early_layout[box]==1) + mask_of_drop_cpaital_in_early_layout[box] = 1 + all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1) percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) - - - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.6 and percent_text_to_all_in_drop>=0.3: - - layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label + if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and + percent_text_to_all_in_drop >= 0.3): + layout_in_patch[box0] = drop_capital_label else: - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 0] = drop_capital_label - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 4] = drop_capital_label# images - #layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label + layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = drop_capital_label + layout_in_patch[box0][layout_in_patch[box0] == 0] = drop_capital_label + layout_in_patch[box0][layout_in_patch[box0] == 4] = drop_capital_label# images + #layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch -def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered): - - cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) +def check_any_text_region_in_model_one_is_main_or_header( + regions_model_1, regions_model_full, + contours_only_text_parent, + all_box_coord, all_found_textline_polygons, + slopes, + contours_only_text_parent_d_ordered): + + cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ + find_new_features_of_contours(contours_only_text_parent) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main - - all_found_textline_polygons_main=[] all_found_textline_polygons_head=[] @@ -843,14 +891,10 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) - - all_pixels=((img[:,:,0]==255)*1).sum() - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() pixels_main=all_pixels-pixels_header - if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 contours_only_text_parent_head.append(con) @@ -870,28 +914,44 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions #print(all_pixels,pixels_main,pixels_header) - return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d - + return (regions_model_1, + contours_only_text_parent_main, + contours_only_text_parent_head, + all_box_coord_main, + all_box_coord_head, + all_found_textline_polygons_main, + all_found_textline_polygons_head, + slopes_main, + slopes_head, + contours_only_text_parent_main_d, + contours_only_text_parent_head_d) + +def check_any_text_region_in_model_one_is_main_or_header_light( + regions_model_1, regions_model_full, + contours_only_text_parent, + all_box_coord, all_found_textline_polygons, + slopes, + contours_only_text_parent_d_ordered): -def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered): - ### to make it faster h_o = regions_model_1.shape[0] w_o = regions_model_1.shape[1] - - regions_model_1 = cv2.resize(regions_model_1, (int(regions_model_1.shape[1]/3.), int(regions_model_1.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - regions_model_full = cv2.resize(regions_model_full, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent = [ (i/3.).astype(np.int32) for i in contours_only_text_parent] + zoom = 3 + regions_model_1 = cv2.resize(regions_model_1, (regions_model_1.shape[1] // zoom, + regions_model_1.shape[0] // zoom), + interpolation=cv2.INTER_NEAREST) + regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, + regions_model_full.shape[0] // zoom), + interpolation=cv2.INTER_NEAREST) + contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] ### - - cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) + cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ + find_new_features_of_contours(contours_only_text_parent) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main - - all_found_textline_polygons_main=[] all_found_textline_polygons_head=[] @@ -909,16 +969,13 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r for ii in range(len(contours_only_text_parent)): con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) + img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) - - - all_pixels=((img[:,:,0]==255)*1).sum() - - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() - pixels_main=all_pixels-pixels_header - + all_pixels = (img[:,:,0]==255).sum() + pixels_header=((img[:,:,0]==255) & + (regions_model_full[:,:,0]==2)).sum() + pixels_main = all_pixels - pixels_header if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 @@ -939,22 +996,30 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r #print(all_pixels,pixels_main,pixels_header) - - ### to make it faster - regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST) - #regions_model_full = cv2.resize(img, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent_head = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_head] - contours_only_text_parent_main = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_main] + # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, + # regions_model_full.shape[0] // zoom), + # interpolation=cv2.INTER_NEAREST) + contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head] + contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main] ### - return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d + return (regions_model_1, + contours_only_text_parent_main, + contours_only_text_parent_head, + all_box_coord_main, + all_box_coord_head, + all_found_textline_polygons_main, + all_found_textline_polygons_head, + slopes_main, + slopes_head, + contours_only_text_parent_main_d, + contours_only_text_parent_head_d) def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col): # print(textlines_con) # textlines_con=textlines_con.astype(np.uint32) - textlines_con_changed = [] for m1 in range(len(textlines_con)): @@ -973,9 +1038,10 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) ##plt.imshow(img_text_all) ##plt.show() - areas_cnt_text = np.array([cv2.contourArea(textlines_tot[j]) for j in range(len(textlines_tot))]) + areas_cnt_text = np.array([cv2.contourArea(textlines_tot[j]) + for j in range(len(textlines_tot))]) areas_cnt_text = areas_cnt_text / float(textline_iamge.shape[0] * textline_iamge.shape[1]) - indexes_textlines = np.array(range(len(textlines_tot))) + indexes_textlines = np.arange(len(textlines_tot)) # print(areas_cnt_text,np.min(areas_cnt_text),np.max(areas_cnt_text)) if num_col == 0: @@ -1010,9 +1076,7 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) sum_small_big_all2 = (sum_small_big_all[:, :] == 2) * 1 sum_intersection_sb = sum_small_big_all2.sum(axis=1).sum() - if sum_intersection_sb > 0: - dis_small_from_bigs_tot = [] for z1 in range(len(textlines_small)): # print(len(textlines_small),'small') @@ -1028,27 +1092,22 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) sum_small_big_2 = (sum_small_big[:, :] == 2) * 1 sum_intersection = sum_small_big_2.sum(axis=1).sum() - # print(sum_intersection) - intersections.append(sum_intersection) if len(np.array(intersections)[np.array(intersections) > 0]) == 0: intersections = [] - try: dis_small_from_bigs_tot.append(np.argmax(intersections)) except: dis_small_from_bigs_tot.append(-1) smalls_list = np.array(dis_small_from_bigs_tot)[np.array(dis_small_from_bigs_tot) >= 0] - # index_small_textlines_rest=list( set(indexes_textlines_small)-set(smalls_list) ) textlines_big_with_change = [] textlines_big_with_change_con = [] textlines_small_with_change = [] - for z in list(set(smalls_list)): index_small_textlines = list(np.where(np.array(dis_small_from_bigs_tot) == z)[0]) # print(z,index_small_textlines) @@ -1068,7 +1127,6 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) cont, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(cont[0],type(cont)) - textlines_big_with_change_con.append(cont) textlines_big_org_form[z] = cont[0] @@ -1079,13 +1137,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) # print(textlines_small_with_change,'textlines_small_with_change') # print(textlines_big) textlines_con_changed.append(textlines_big_org_form) - else: textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed def order_of_regions(textline_mask, contours_main, contours_header, y_ref): - ##plt.imshow(textline_mask) ##plt.show() """ @@ -1095,59 +1151,47 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): y_help=np.zeros(len(y)+40) y_help[20:len(y)+20]=y - x=np.array( range(len(y)) ) - + x=np.arange(len(y)) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - ##plt.imshow(textline_mask[:,:]) ##plt.show() - sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) zneg_rev=-y_help+np.max(y_help) - zneg=np.zeros(len(zneg_rev)+40) zneg[20:len(zneg_rev)+20]=zneg_rev zneg= gaussian_filter1d(zneg, sigma_gaus) - peaks, _ = find_peaks(z, height=0) peaks_neg, _ = find_peaks(zneg, height=0) - peaks_neg=peaks_neg-20-20 peaks=peaks-20 """ - textline_sum_along_width = textline_mask.sum(axis=1) y = textline_sum_along_width[:] y_padded = np.zeros(len(y) + 40) y_padded[20 : len(y) + 20] = y - x = np.array(range(len(y))) + x = np.arange(len(y)) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) sigma_gaus = 8 - z = gaussian_filter1d(y_padded, sigma_gaus) zneg_rev = -y_padded + np.max(y_padded) - zneg = np.zeros(len(zneg_rev) + 40) zneg[20 : len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) peaks, _ = find_peaks(z, height=0) peaks_neg, _ = find_peaks(zneg, height=0) - peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 ##plt.plot(z) ##plt.show() - if contours_main != None: areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] @@ -1173,42 +1217,32 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): # print(cy_main,'mainy') peaks_neg_new = [] - peaks_neg_new.append(0 + y_ref) for iii in range(len(peaks_neg)): peaks_neg_new.append(peaks_neg[iii] + y_ref) - peaks_neg_new.append(textline_mask.shape[0] + y_ref) if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new): cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10 - if contours_main != None: - indexer_main = np.array(range(len(contours_main))) - + indexer_main = np.arange(len(contours_main)) if contours_main != None: len_main = len(contours_main) else: len_main = 0 matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5)) - - matrix_of_orders[:, 0] = np.array(range(len(contours_main) + len(contours_header))) - + matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header)) matrix_of_orders[: len(contours_main), 1] = 1 matrix_of_orders[len(contours_main) :, 1] = 2 - matrix_of_orders[: len(contours_main), 2] = cx_main matrix_of_orders[len(contours_main) :, 2] = cx_header - matrix_of_orders[: len(contours_main), 3] = cy_main matrix_of_orders[len(contours_main) :, 3] = cy_header - - matrix_of_orders[: len(contours_main), 4] = np.array(range(len(contours_main))) - matrix_of_orders[len(contours_main) :, 4] = np.array(range(len(contours_header))) + matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) + matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) # print(peaks_neg_new,'peaks_neg_new') - # print(matrix_of_orders,'matrix_of_orders') # print(peaks_neg_new,np.max(peaks_neg_new)) final_indexers_sorted = [] @@ -1217,19 +1251,20 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): for i in range(len(peaks_neg_new) - 1): top = peaks_neg_new[i] down = peaks_neg_new[i + 1] - - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - + indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & + ((matrix_of_orders[:, 3] < down))] + cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & + ((matrix_of_orders[:, 3] < down))] + cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & + ((matrix_of_orders[:, 3] < down))] + types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & + (matrix_of_orders[:, 3] < down)] + index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & + (matrix_of_orders[:, 3] < down)] sorted_inside = np.argsort(cxs_in) - ind_in_int = indexes_in[sorted_inside] ind_in_type = types_of_text[sorted_inside] ind_ind_type = index_types_of_text[sorted_inside] - for j in range(len(ind_in_int)): final_indexers_sorted.append(int(ind_in_int[j])) final_types.append(int(ind_in_type[j])) @@ -1237,20 +1272,22 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] - # This fix is applied if the sum of the lengths of contours and contours_h does not match final_indexers_sorted. However, this is not the optimal solution.. - if (len(cy_main)+len(cy_header) ) == len(final_index_type): + # This fix is applied if the sum of the lengths of contours and contours_h + # does not match final_indexers_sorted. However, this is not the optimal solution.. + if len(cy_main) + len(cy_header) == len(final_index_type): pass else: - indexes_missed = set(list( np.array( range((len(cy_main)+len(cy_header) ) )) )) - set(final_indexers_sorted) + indexes_missed = set(np.arange(len(cy_main) + len(cy_header))) - set(final_indexers_sorted) for ind_missed in indexes_missed: final_indexers_sorted.append(ind_missed) final_types.append(1) final_index_type.append(ind_missed) - - + return final_indexers_sorted, matrix_of_orders, final_types, final_index_type -def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier): +def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( + img_p_in_ver, img_in_hor,num_col_classifier): + #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) img_p_in_ver=img_p_in_ver.astype(np.uint8) img_p_in_ver=np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) @@ -1258,33 +1295,33 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(im ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_lines_ver,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - slope_lines_ver,dist_x_ver, x_min_main_ver ,x_max_main_ver ,cy_main_ver,slope_lines_org_ver,y_min_main_ver, y_max_main_ver, cx_main_ver=find_features_of_lines(contours_lines_ver) - + slope_lines_ver, _, x_min_main_ver, _, _, _, y_min_main_ver, y_max_main_ver, cx_main_ver = \ + find_features_of_lines(contours_lines_ver) for i in range(len(x_min_main_ver)): - img_p_in_ver[int(y_min_main_ver[i]):int(y_min_main_ver[i])+30,int(cx_main_ver[i])-25:int(cx_main_ver[i])+25,0]=0 - img_p_in_ver[int(y_max_main_ver[i])-30:int(y_max_main_ver[i]),int(cx_main_ver[i])-25:int(cx_main_ver[i])+25,0]=0 - - + img_p_in_ver[int(y_min_main_ver[i]): + int(y_min_main_ver[i])+30, + int(cx_main_ver[i])-25: + int(cx_main_ver[i])+25, 0] = 0 + img_p_in_ver[int(y_max_main_ver[i])-30: + int(y_max_main_ver[i]), + int(cx_main_ver[i])-25: + int(cx_main_ver[i])+25, 0] = 0 + img_in_hor=img_in_hor.astype(np.uint8) img_in_hor=np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(img_in_hor, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_hor,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines_hor,dist_x_hor, x_min_main_hor ,x_max_main_hor ,cy_main_hor,slope_lines_org_hor,y_min_main_hor, y_max_main_hor, cx_main_hor=find_features_of_lines(contours_lines_hor) - - + slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \ + find_features_of_lines(contours_lines_hor) x_width_smaller_than_acolumn_width=img_in_hor.shape[1]/float(num_col_classifier+1.) len_lines_bigger_than_x_width_smaller_than_acolumn_width=len( dist_x_hor[dist_x_hor>=x_width_smaller_than_acolumn_width] ) - - len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column=int( len_lines_bigger_than_x_width_smaller_than_acolumn_width/float(num_col_classifier) ) - - - if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column<10: - args_hor=np.array( range(len(slope_lines_hor) )) + len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column=int(len_lines_bigger_than_x_width_smaller_than_acolumn_width / + float(num_col_classifier)) + if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column < 10: + args_hor=np.arange(len(slope_lines_hor)) all_args_uniq=contours_in_same_horizon(cy_main_hor) #print(all_args_uniq,'all_args_uniq') if len(all_args_uniq)>0: @@ -1302,51 +1339,50 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(im #print(img_p_in_ver.shape[1],some_x_max-some_x_min,'xdiff') diff_x_some=some_x_max-some_x_min for jv in range(len(some_args)): - - img_p_in=cv2.fillPoly(img_in_hor, pts =[contours_lines_hor[some_args[jv]]], color=(1,1,1)) - + img_p_in=cv2.fillPoly(img_in_hor, pts=[contours_lines_hor[some_args[jv]]], color=(1,1,1)) if any(i_diff>(img_p_in_ver.shape[1]/float(3.3)) for i_diff in diff_x_some): - img_p_in[int(np.mean(some_cy))-5:int(np.mean(some_cy))+5, int(np.min(some_x_min)):int(np.max(some_x_max)) ]=1 - + img_p_in[int(np.mean(some_cy))-5: + int(np.mean(some_cy))+5, + int(np.min(some_x_min)): + int(np.max(some_x_max)) ]=1 sum_dis=dist_x_hor[some_args].sum() diff_max_min_uniques=np.max(x_max_main_hor[some_args])-np.min(x_min_main_hor[some_args]) - - if diff_max_min_uniques>sum_dis and ( (sum_dis/float(diff_max_min_uniques) ) >0.85 ) and ( (diff_max_min_uniques/float(img_p_in_ver.shape[1]))>0.85 ) and np.std( dist_x_hor[some_args] )<(0.55*np.mean( dist_x_hor[some_args] )): - #print(dist_x_hor[some_args],dist_x_hor[some_args].sum(),np.min(x_min_main_hor[some_args]) ,np.max(x_max_main_hor[some_args]),'jalibdi') - #print(np.mean( dist_x_hor[some_args] ),np.std( dist_x_hor[some_args] ),np.var( dist_x_hor[some_args] ),'jalibdiha') + if (diff_max_min_uniques > sum_dis and + sum_dis / float(diff_max_min_uniques) > 0.85 and + diff_max_min_uniques / float(img_p_in_ver.shape[1]) > 0.85 and + np.std(dist_x_hor[some_args]) < 0.55 * np.mean(dist_x_hor[some_args])): + # print(dist_x_hor[some_args], + # dist_x_hor[some_args].sum(), + # np.min(x_min_main_hor[some_args]), + # np.max(x_max_main_hor[some_args]),'jalibdi') + # print(np.mean( dist_x_hor[some_args] ), + # np.std( dist_x_hor[some_args] ), + # np.var( dist_x_hor[some_args] ),'jalibdiha') special_separators.append(np.mean(cy_main_hor[some_args])) - else: img_p_in=img_in_hor special_separators=[] else: img_p_in=img_in_hor special_separators=[] - img_p_in_ver[:,:,0][img_p_in_ver[:,:,0]==255]=1 sep_ver_hor=img_p_in+img_p_in_ver - - sep_ver_hor_cross=(sep_ver_hor[:,:,0]==2)*1 - sep_ver_hor_cross=np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) sep_ver_hor_cross=sep_ver_hor_cross.astype(np.uint8) imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) - for ii in range(len(cx_cross)): img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0 - else: img_p_in=np.copy(img_in_hor) special_separators=[] - return img_p_in[:,:,0],special_separators + return img_p_in[:,:,0], special_separators def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot = [] @@ -1359,62 +1395,49 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): t_ins_c0 = time.time() separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 - separators_closeup[0:110,:,:]=0 separators_closeup[separators_closeup.shape[0]-150:,:,:]=0 kernel = np.ones((5,5),np.uint8) - separators_closeup=separators_closeup.astype(np.uint8) separators_closeup = cv2.dilate(separators_closeup,kernel,iterations = 1) separators_closeup = cv2.erode(separators_closeup,kernel,iterations = 1) - separators_closeup_new=np.zeros((separators_closeup.shape[0] ,separators_closeup.shape[1] )) separators_closeup_n=np.copy(separators_closeup) - separators_closeup_n=separators_closeup_n.astype(np.uint8) - + separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] - separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) gray_early=gray_early.astype(np.uint8) - imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - slope_linese,dist_xe, x_min_maine ,x_max_maine ,cy_maine,slope_lines_orge,y_min_maine, y_max_maine, cx_maine=find_features_of_lines(contours_line_e) - - dist_ye=y_max_maine-y_min_maine - - - args_e=np.array(range(len(contours_line_e))) - args_hor_e=args_e[(dist_ye<=50) & (dist_xe>=3*dist_ye)] - - + _, dist_xe, _, _, _, _, y_min_main, y_max_main, _ = \ + find_features_of_lines(contours_line_e) + dist_ye = y_max_main - y_min_main + args_e=np.arange(len(contours_line_e)) + args_hor_e=args_e[(dist_ye<=50) & + (dist_xe>=3*dist_ye)] cnts_hor_e=[] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - figs_e=np.zeros(thresh_e.shape) figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary,pts=cnts_hor_e,color=(0,0,0)) - + separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0,0,0)) gray = cv2.bitwise_not(separators_closeup_n_binary) gray=gray.astype(np.uint8) bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \ - cv2.THRESH_BINARY, 15, -2) - + cv2.THRESH_BINARY, 15, -2) horizontal = np.copy(bw) vertical = np.copy(bw) - + cols = horizontal.shape[1] horizontal_size = cols // 30 # Create structure element for extracting horizontal lines through morphology operations @@ -1424,12 +1447,9 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, horizontal = cv2.dilate(horizontal, horizontalStructure) kernel = np.ones((5,5),np.uint8) - - horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2) - - horizontal=cv2.fillPoly(horizontal,pts=cnts_hor_e,color=(255,255,255)) + horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255,255,255)) rows = vertical.shape[0] verticalsize = rows // 30 @@ -1438,10 +1458,11 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, # Apply morphology operations vertical = cv2.erode(vertical, verticalStructure) vertical = cv2.dilate(vertical, verticalStructure) - vertical = cv2.dilate(vertical,kernel,iterations = 1) - horizontal,special_separators=combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(vertical,horizontal,num_col_classifier) + horizontal, special_separators = \ + combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( + vertical, horizontal, num_col_classifier) separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1 @@ -1453,9 +1474,10 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_vers) + slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ + find_features_of_lines(contours_line_vers) - args=np.array( range(len(slope_lines) )) + args=np.arange(len(slope_lines)) args_ver=args[slope_lines==1] dist_x_ver=dist_x[slope_lines==1] y_min_main_ver=y_min_main[slope_lines==1] @@ -1466,19 +1488,17 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, dist_y_ver=y_max_main_ver-y_min_main_ver len_y=separators_closeup.shape[0]/3.0 - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) horizontal=horizontal.astype(np.uint8) imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_line_hors,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_hors) + slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ + find_features_of_lines(contours_line_hors) slope_lines_org_hor=slope_lines_org[slope_lines==0] - args=np.array( range(len(slope_lines) )) + args=np.arange(len(slope_lines)) len_x=separators_closeup.shape[1]/5.0 - dist_y=np.abs(y_max_main-y_min_main) args_hor=args[slope_lines==0] @@ -1497,109 +1517,84 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, y_min_main_hor=y_min_main_hor[dist_x_hor>=len_x/2.0] y_max_main_hor=y_max_main_hor[dist_x_hor>=len_x/2.0] dist_y_hor=dist_y_hor[dist_x_hor>=len_x/2.0] - slope_lines_org_hor=slope_lines_org_hor[dist_x_hor>=len_x/2.0] dist_x_hor=dist_x_hor[dist_x_hor>=len_x/2.0] - matrix_of_lines_ch=np.zeros((len(cy_main_hor)+len(cx_main_ver),10)) - matrix_of_lines_ch[:len(cy_main_hor),0]=args_hor matrix_of_lines_ch[len(cy_main_hor):,0]=args_ver - - matrix_of_lines_ch[len(cy_main_hor):,1]=cx_main_ver - matrix_of_lines_ch[:len(cy_main_hor),2]=x_min_main_hor+50#x_min_main_hor+150 matrix_of_lines_ch[len(cy_main_hor):,2]=x_min_main_ver - matrix_of_lines_ch[:len(cy_main_hor),3]=x_max_main_hor-50#x_max_main_hor-150 matrix_of_lines_ch[len(cy_main_hor):,3]=x_max_main_ver - matrix_of_lines_ch[:len(cy_main_hor),4]=dist_x_hor matrix_of_lines_ch[len(cy_main_hor):,4]=dist_x_ver - matrix_of_lines_ch[:len(cy_main_hor),5]=cy_main_hor - - matrix_of_lines_ch[:len(cy_main_hor),6]=y_min_main_hor matrix_of_lines_ch[len(cy_main_hor):,6]=y_min_main_ver - matrix_of_lines_ch[:len(cy_main_hor),7]=y_max_main_hor matrix_of_lines_ch[len(cy_main_hor):,7]=y_max_main_ver - matrix_of_lines_ch[:len(cy_main_hor),8]=dist_y_hor matrix_of_lines_ch[len(cy_main_hor):,8]=dist_y_ver - - matrix_of_lines_ch[len(cy_main_hor):,9]=1 if contours_h is not None: - slope_lines_head,dist_x_head, x_min_main_head ,x_max_main_head ,cy_main_head,slope_lines_org_head,y_min_main_head, y_max_main_head, cx_main_head=find_features_of_lines(contours_h) + _, dist_x_head, x_min_main_head, x_max_main_head, cy_main_head, _, y_min_main_head, y_max_main_head, _ = \ + find_features_of_lines(contours_h) matrix_l_n=np.zeros((matrix_of_lines_ch.shape[0]+len(cy_main_head),matrix_of_lines_ch.shape[1])) matrix_l_n[:matrix_of_lines_ch.shape[0],:]=np.copy(matrix_of_lines_ch[:,:]) - args_head=np.array(range(len(cy_main_head)))+len(cy_main_hor) + args_head=np.arange(len(cy_main_head)) + len(cy_main_hor) matrix_l_n[matrix_of_lines_ch.shape[0]:,0]=args_head matrix_l_n[matrix_of_lines_ch.shape[0]:,2]=x_min_main_head+30 matrix_l_n[matrix_of_lines_ch.shape[0]:,3]=x_max_main_head-30 - matrix_l_n[matrix_of_lines_ch.shape[0]:,4]=dist_x_head - matrix_l_n[matrix_of_lines_ch.shape[0]:,5]=y_min_main_head-3-8 matrix_l_n[matrix_of_lines_ch.shape[0]:,6]=y_min_main_head-5-8 matrix_l_n[matrix_of_lines_ch.shape[0]:,7]=y_max_main_head#y_min_main_head+1-8 matrix_l_n[matrix_of_lines_ch.shape[0]:,8]=4 - matrix_of_lines_ch=np.copy(matrix_l_n) - - - cy_main_splitters=cy_main_hor[ (x_min_main_hor<=.16*region_pre_p.shape[1]) & (x_max_main_hor>=.84*region_pre_p.shape[1] )] + cy_main_splitters=cy_main_hor[(x_min_main_hor<=.16*region_pre_p.shape[1]) & + (x_max_main_hor>=.84*region_pre_p.shape[1])] cy_main_splitters=np.array( list(cy_main_splitters)+list(special_separators)) - if contours_h is not None: try: - cy_main_splitters_head=cy_main_head[ (x_min_main_head<=.16*region_pre_p.shape[1]) & (x_max_main_head>=.84*region_pre_p.shape[1] )] + cy_main_splitters_head=cy_main_head[(x_min_main_head<=.16*region_pre_p.shape[1]) & + (x_max_main_head>=.84*region_pre_p.shape[1])] cy_main_splitters=np.array( list(cy_main_splitters)+list(cy_main_splitters_head)) except: pass args_cy_splitter=np.argsort(cy_main_splitters) - cy_main_splitters_sort=cy_main_splitters[args_cy_splitter] splitter_y_new=[] splitter_y_new.append(0) for i in range(len(cy_main_splitters_sort)): splitter_y_new.append( cy_main_splitters_sort[i] ) - splitter_y_new.append(region_pre_p.shape[0]) - splitter_y_new_diff=np.diff(splitter_y_new)/float(region_pre_p.shape[0])*100 - args_big_parts=np.array(range(len(splitter_y_new_diff))) [ splitter_y_new_diff>22 ] - + args_big_parts=np.arange(len(splitter_y_new_diff))[ splitter_y_new_diff>22 ] + regions_without_separators=return_regions_without_separators(region_pre_p) - - length_y_threshold=regions_without_separators.shape[0]/4.0 num_col_fin=0 peaks_neg_fin_fin=[] - for itiles in args_big_parts: - regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:,0] - + regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]): + int(splitter_y_new[itiles+1]),:,0] try: - num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) + num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, + num_col_classifier, tables, multiplier=7.0) except: num_col = 0 peaks_neg_fin = [] - if num_col>num_col_fin: num_col_fin=num_col peaks_neg_fin_fin=peaks_neg_fin - if len(args_big_parts)==1 and (len(peaks_neg_fin_fin)+1) splitter_y_new[i] ) & (matrix_of_lines_ch[:,7]< splitter_y_new[i+1] ) ] + matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) & + (matrix_of_lines_ch[:,7]< splitter_y_new[i+1] )] #print(len( matrix_new[:,9][matrix_new[:,9]==1] )) - #print(matrix_new[:,8][matrix_new[:,9]==1],'gaddaaa') - # check to see is there any vertical separator to find holes. - if 1>0:#len( matrix_new[:,9][matrix_new[:,9]==1] )>0 and np.max(matrix_new[:,8][matrix_new[:,9]==1])>=0.1*(np.abs(splitter_y_new[i+1]-splitter_y_new[i] )): - + #if (len(matrix_new[:,9][matrix_new[:,9]==1]) > 0 and + # np.max(matrix_new[:,8][matrix_new[:,9]==1]) >= + # 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))): + if True: try: if erosion_hurts: - num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], num_col_classifier, tables, multiplier=6.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + num_col_classifier, tables, multiplier=6.) else: - num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],num_col_classifier, tables, multiplier=7.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + num_col_classifier, tables, multiplier=7.) except: peaks_neg_fin=[] num_col = 0 - - try: peaks_neg_fin_org=np.copy(peaks_neg_fin) if (len(peaks_neg_fin)+1)=len(peaks_neg_fin2): peaks_neg_fin=list(np.copy(peaks_neg_fin1)) else: peaks_neg_fin=list(np.copy(peaks_neg_fin2)) - - - peaks_neg_fin=list(np.array(peaks_neg_fin)+peaks_neg_fin_early[i_n]) if i_n!=(len(peaks_neg_fin_early)-2): @@ -1682,10 +1686,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho #print(peaks_neg_fin,'peaks_neg_fin') peaks_neg_fin_rev=peaks_neg_fin_rev+peaks_neg_fin - - - - if len(peaks_neg_fin_rev)>=len(peaks_neg_fin_org): peaks_neg_fin=list(np.sort(peaks_neg_fin_rev)) num_col=len(peaks_neg_fin) @@ -1696,7 +1696,9 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho #print(peaks_neg_fin,'peaks_neg_fin') except: pass - #num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.0) + #num_col, peaks_neg_fin = find_num_col( + # regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + # multiplier=7.0) x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ] x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ] cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ] @@ -1706,197 +1708,160 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho if right2left_readingorder: x_max_hor_some_new = regions_without_separators.shape[1] - x_min_hor_some x_min_hor_some_new = regions_without_separators.shape[1] - x_max_hor_some - x_min_hor_some =list(np.copy(x_min_hor_some_new)) x_max_hor_some =list(np.copy(x_max_hor_some_new)) - - - - peaks_neg_tot=return_points_with_boundies(peaks_neg_fin,0, regions_without_separators[:,:].shape[1]) - peaks_neg_tot_tables.append(peaks_neg_tot) - reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff) - - - if (reading_order_type==1) or (reading_order_type==0 and (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1)): - - + reading_order_type, x_starting, x_ending, y_type_2, y_diff_type_2, \ + y_lines_without_mother, x_start_without_mother, x_end_without_mother, there_is_sep_with_child, \ + y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \ + new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order( + x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff) + x_starting = np.array(x_starting) + x_ending = np.array(x_ending) + y_type_2 = np.array(y_type_2) + y_diff_type_2 = np.array(y_diff_type_2) + + if ((reading_order_type==1) or + (reading_order_type==0 and + (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))): try: y_grenze=int(splitter_y_new[i])+300 - - - #check if there is a big separator in this y_mains_sep_ohne_grenzen - args_early_ys=np.array(range(len(y_type_2))) - + args_early_ys=np.arange(len(y_type_2)) #print(args_early_ys,'args_early_ys') #print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) - - y_type_2_up=np.array(y_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_starting_up=np.array(x_starting)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_ending_up=np.array(x_ending)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - y_diff_type_2_up=np.array(y_diff_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - args_up=args_early_ys[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - - - - if len(y_type_2_up)>0: - y_main_separator_up=y_type_2_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] - y_diff_main_separator_up=y_diff_type_2_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] - args_main_to_deleted=args_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] + + x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + if len(y_type_2_up) > 0: + y_main_separator_up = y_type_2_up [(x_starting_up==0) & + (x_ending_up==(len(peaks_neg_tot)-1) )] + y_diff_main_separator_up = y_diff_type_2_up[(x_starting_up==0) & + (x_ending_up==(len(peaks_neg_tot)-1) )] + args_main_to_deleted = args_up[(x_starting_up==0) & + (x_ending_up==(len(peaks_neg_tot)-1) )] #print(y_main_separator_up,y_diff_main_separator_up,args_main_to_deleted,'fffffjammmm') - - if len(y_diff_main_separator_up)>0: - args_to_be_kept=np.array( list( set(args_early_ys)-set(args_main_to_deleted) ) ) + if len(y_diff_main_separator_up) > 0: + args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) )) #print(args_to_be_kept,'args_to_be_kept') - boxes.append([0,peaks_neg_tot[len(peaks_neg_tot)-1],int(splitter_y_new[i]),int( np.max(y_diff_main_separator_up))]) + boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], + int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))]) splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0] #print(splitter_y_new[i],'splitter_y_new[i]') - y_type_2=np.array(y_type_2)[args_to_be_kept] - x_starting=np.array(x_starting)[args_to_be_kept] - x_ending=np.array(x_ending)[args_to_be_kept] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept] + y_type_2 = y_type_2[args_to_be_kept] + x_starting = x_starting[args_to_be_kept] + x_ending = x_ending[args_to_be_kept] + y_diff_type_2 = y_diff_type_2[args_to_be_kept] #print('galdiha') y_grenze=int(splitter_y_new[i])+200 - - - args_early_ys2=np.array(range(len(y_type_2))) - y_type_2_up=np.array(y_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_starting_up=np.array(x_starting)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_ending_up=np.array(x_ending)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - y_diff_type_2_up=np.array(y_diff_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - args_up2=args_early_ys2[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - - + args_early_ys2=np.arange(len(y_type_2)) + y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] #print(y_type_2_up,x_starting_up,x_ending_up,'didid') - - nodes_in=[] + nodes_in = [] for ij in range(len(x_starting_up)): - nodes_in=nodes_in+list(np.array(range(x_starting_up[ij],x_ending_up[ij]))) - - #print(np.unique(nodes_in),'nodes_in') + nodes_in = nodes_in + list(range(x_starting_up[ij], + x_ending_up[ij])) + nodes_in = np.unique(nodes_in) + #print(nodes_in,'nodes_in') - if set(np.unique(nodes_in))==set(np.array(range(len(peaks_neg_tot)-1)) ): + if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): pass - elif set( np.unique(nodes_in) )==set( np.array(range(1,len(peaks_neg_tot)-1)) ): + elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') - args_to_be_kept2=np.array( list( set(args_early_ys2)-set(args_up2) ) ) + args_to_be_kept2=np.array(list( set(args_early_ys2)-set(args_up2) )) if len(args_to_be_kept2)>0: - y_type_2=np.array(y_type_2)[args_to_be_kept2] - x_starting=np.array(x_starting)[args_to_be_kept2] - x_ending=np.array(x_ending)[args_to_be_kept2] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept2] + y_type_2 = y_type_2[args_to_be_kept2] + x_starting = x_starting[args_to_be_kept2] + x_ending = x_ending[args_to_be_kept2] + y_diff_type_2 = y_diff_type_2[args_to_be_kept2] else: pass - #print('burdaydikh2') - - - elif len(y_diff_main_separator_up)==0: - nodes_in=[] + nodes_in = [] for ij in range(len(x_starting_up)): - nodes_in=nodes_in+list(np.array(range(x_starting_up[ij],x_ending_up[ij]))) - - #print(np.unique(nodes_in),'nodes_in2') + nodes_in = nodes_in + list(range(x_starting_up[ij], + x_ending_up[ij])) + nodes_in = np.unique(nodes_in) + #print(nodes_in,'nodes_in2') #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') - - - if set(np.unique(nodes_in))==set(np.array(range(len(peaks_neg_tot)-1)) ): + if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): pass - elif set(np.unique(nodes_in) )==set( np.array(range(1,len(peaks_neg_tot)-1)) ): + elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)): pass else: #print('burdaydikh') #print(args_early_ys,'args_early_ys') #print(args_up,'args_up') - args_to_be_kept2=np.array( list( set(args_early_ys)-set(args_up) ) ) + args_to_be_kept2=np.array(list( set(args_early_ys) - set(args_up) )) #print(args_to_be_kept2,'args_to_be_kept2') - #print(len(y_type_2),len(x_starting),len(x_ending),len(y_diff_type_2)) - if len(args_to_be_kept2)>0: - y_type_2=np.array(y_type_2)[args_to_be_kept2] - x_starting=np.array(x_starting)[args_to_be_kept2] - x_ending=np.array(x_ending)[args_to_be_kept2] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept2] + y_type_2 = y_type_2[args_to_be_kept2] + x_starting = x_starting[args_to_be_kept2] + x_ending = x_ending[args_to_be_kept2] + y_diff_type_2 = y_diff_type_2[args_to_be_kept2] else: pass - #print('burdaydikh2') - - - - - - - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - y_type_2=np.array(y_type_2) - y_diff_type_2_up=np.array(y_diff_type_2_up) #int(splitter_y_new[i]) - y_lines_by_order=[] x_start_by_order=[] x_end_by_order=[] - if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1: - - if reading_order_type==1: y_lines_by_order.append(int(splitter_y_new[i])) x_start_by_order.append(0) x_end_by_order.append(len(peaks_neg_tot)-2) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - - columns_covered_by_mothers=[] - + columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_start_without_mother[dj],x_end_without_mother[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - all_columns=np.array(range(len(peaks_neg_tot)-1)) - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - for lk in range(len(x_start_without_mother)): - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(x_start_without_mother[lk]) - x_ending.append(x_end_without_mother[lk]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - - - - - ind_args=np.array(range(len(y_type_2))) + columns_covered_by_mothers = columns_covered_by_mothers + \ + list(range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) + + all_columns=np.arange(len(peaks_neg_tot)-1) + columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, x_start_without_mother) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, x_end_without_mother) + + ind_args=np.arange(len(y_type_2)) #ind_args=np.array(ind_args) #print(ind_args,'ind_args') for column in range(len(peaks_neg_tot)-1): @@ -1920,159 +1885,115 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - else: - #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - - columns_covered_by_mothers=[] - + columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_start_without_mother[dj],x_end_without_mother[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - all_columns=np.array(range(len(peaks_neg_tot)-1)) + columns_covered_by_mothers = columns_covered_by_mothers + \ + list(range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - for lk in range(len(x_start_without_mother)): - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(x_start_without_mother[lk]) - x_ending.append(x_end_without_mother[lk]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - - columns_covered_by_with_child_no_mothers=[] + all_columns=np.arange(len(peaks_neg_tot)-1) + columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, x_start_without_mother) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, x_end_without_mother) + columns_covered_by_with_child_no_mothers = [] for dj in range(len(x_end_with_child_without_mother)): - columns_covered_by_with_child_no_mothers=columns_covered_by_with_child_no_mothers+list(np.array(range(x_start_with_child_without_mother[dj],x_end_with_child_without_mother[dj])) ) - columns_covered_by_with_child_no_mothers=list(set(columns_covered_by_with_child_no_mothers)) + columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ + list(range(x_start_with_child_without_mother[dj], + x_end_with_child_without_mother[dj])) + columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) - all_columns=np.array(range(len(peaks_neg_tot)-1)) - - columns_not_covered_child_no_mother=list( set(all_columns)-set(columns_covered_by_with_child_no_mothers) ) + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers)) #indexes_to_be_spanned=[] - for i_s in range( len(x_end_with_child_without_mother) ): + for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) - - - - columns_not_covered_child_no_mother=np.sort(columns_not_covered_child_no_mother) - - - - ind_args=np.array(range(len(y_type_2))) - - - + columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother) + ind_args = np.arange(len(y_type_2)) + x_end_with_child_without_mother = np.array(x_end_with_child_without_mother) + x_start_with_child_without_mother = np.array(x_start_with_child_without_mother) for i_s_nc in columns_not_covered_child_no_mother: if i_s_nc in x_start_with_child_without_mother: - x_end_biggest_column=np.array(x_end_with_child_without_mother)[np.array(x_start_with_child_without_mother)==i_s_nc][0] - args_all_biggest_lines=ind_args[(x_starting==i_s_nc) & (x_ending==x_end_biggest_column)] - - args_all_biggest_lines=np.array(args_all_biggest_lines) - y_column_nc=y_type_2[args_all_biggest_lines] - x_start_column_nc=x_starting[args_all_biggest_lines] - x_end_column_nc=x_ending[args_all_biggest_lines] - - y_column_nc=np.sort(y_column_nc) - + x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] + args_all_biggest_lines = ind_args[(x_starting==i_s_nc) & + (x_ending==x_end_biggest_column)] + y_column_nc = y_type_2[args_all_biggest_lines] + x_start_column_nc = x_starting[args_all_biggest_lines] + x_end_column_nc = x_ending[args_all_biggest_lines] + y_column_nc = np.sort(y_column_nc) for i_c in range(len(y_column_nc)): if i_c==(len(y_column_nc)-1): - ind_all_lines_betweeen_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] + ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & + (y_type_2=i_s_nc) & + (x_ending<=x_end_biggest_column)] else: - ind_all_lines_betweeen_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] - - y_all_between_nm_wc=y_type_2[ind_all_lines_betweeen_nm_wc] - x_starting_all_between_nm_wc=x_starting[ind_all_lines_betweeen_nm_wc] - x_ending_all_between_nm_wc=x_ending[ind_all_lines_betweeen_nm_wc] - - x_diff_all_between_nm_wc=x_ending_all_between_nm_wc-x_starting_all_between_nm_wc - - + ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & + (y_type_2=i_s_nc) & + (x_ending<=x_end_biggest_column)] + y_all_between_nm_wc = y_type_2[ind_all_lines_between_nm_wc] + x_starting_all_between_nm_wc = x_starting[ind_all_lines_between_nm_wc] + x_ending_all_between_nm_wc = x_ending[ind_all_lines_between_nm_wc] + + x_diff_all_between_nm_wc = x_ending_all_between_nm_wc - x_starting_all_between_nm_wc if len(x_diff_all_between_nm_wc)>0: biggest=np.argmax(x_diff_all_between_nm_wc) - - columns_covered_by_mothers=[] - + columns_covered_by_mothers = [] for dj in range(len(x_starting_all_between_nm_wc)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_starting_all_between_nm_wc[dj],x_ending_all_between_nm_wc[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - - all_columns=np.array(range(i_s_nc,x_end_biggest_column)) + columns_covered_by_mothers = columns_covered_by_mothers + \ + list(range(x_starting_all_between_nm_wc[dj], + x_ending_all_between_nm_wc[dj])) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) + all_columns=np.arange(i_s_nc, x_end_biggest_column) + columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) should_longest_line_be_extended=0 - if len(x_diff_all_between_nm_wc)>0 and set( list( np.array(range(x_starting_all_between_nm_wc[biggest],x_ending_all_between_nm_wc[biggest])) )+list(columns_not_covered) ) !=set(all_columns): + if (len(x_diff_all_between_nm_wc) > 0 and + set(list(range(x_starting_all_between_nm_wc[biggest], + x_ending_all_between_nm_wc[biggest])) + + list(columns_not_covered)) != set(all_columns)): should_longest_line_be_extended=1 - - index_lines_so_close_to_top_separator=np.array(range(len(y_all_between_nm_wc)))[(y_all_between_nm_wc>y_column_nc[i_c]) & (y_all_between_nm_wc<=(y_column_nc[i_c]+500))] - - - if len(index_lines_so_close_to_top_separator)>0: - indexes_remained_after_deleting_closed_lines= np.array( list ( set( list( np.array(range(len(y_all_between_nm_wc))) ) ) -set(list( index_lines_so_close_to_top_separator) ) ) ) - - if len(indexes_remained_after_deleting_closed_lines)>0: - y_all_between_nm_wc=y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_starting_all_between_nm_wc=x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_ending_all_between_nm_wc=x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - - - y_all_between_nm_wc=list(y_all_between_nm_wc) - x_starting_all_between_nm_wc=list(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=list(x_ending_all_between_nm_wc) - - - y_all_between_nm_wc.append(y_column_nc[i_c] ) - x_starting_all_between_nm_wc.append(i_s_nc) - x_ending_all_between_nm_wc.append(x_end_biggest_column) - - - + index_lines_so_close_to_top_separator = \ + np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & + (y_all_between_nm_wc<=(y_column_nc[i_c]+500))] + if len(index_lines_so_close_to_top_separator) > 0: + indexes_remained_after_deleting_closed_lines= \ + np.array(list(set(list(range(len(y_all_between_nm_wc)))) - + set(list(index_lines_so_close_to_top_separator)))) + if len(indexes_remained_after_deleting_closed_lines) > 0: + y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] + x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - y_all_between_nm_wc=list(y_all_between_nm_wc) - x_starting_all_between_nm_wc=list(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=list(x_ending_all_between_nm_wc) + y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_end_biggest_column) - if len(x_diff_all_between_nm_wc)>0: + if len(x_diff_all_between_nm_wc) > 0: try: - x_starting_all_between_nm_wc.append(x_starting_all_between_nm_wc[biggest]) - x_ending_all_between_nm_wc.append(x_ending_all_between_nm_wc[biggest]) - y_all_between_nm_wc.append(y_column_nc[i_c]) + y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest]) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest]) except: pass - - - for c_n_c in columns_not_covered: - y_all_between_nm_wc.append(y_column_nc[i_c]) - x_starting_all_between_nm_wc.append(c_n_c) - x_ending_all_between_nm_wc.append(c_n_c+1) - - y_all_between_nm_wc=np.array(y_all_between_nm_wc) - x_starting_all_between_nm_wc=np.array(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=np.array(x_ending_all_between_nm_wc) + y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered)) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1) - ind_args_between=np.array(range(len(x_ending_all_between_nm_wc))) - - for column in range(i_s_nc,x_end_biggest_column): + ind_args_between=np.arange(len(x_ending_all_between_nm_wc)) + for column in range(i_s_nc, x_end_biggest_column): ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column] #print('babali2') #print(ind_args_in_col,'ind_args_in_col') @@ -2092,14 +2013,7 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - - - - - - else: - #print(column,'column') ind_args_in_col=ind_args[x_starting==i_s_nc] #print('babali2') @@ -2119,15 +2033,11 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - - for il in range(len(y_lines_by_order)): - - - y_copy=list( np.copy(y_lines_by_order) ) - x_start_copy=list( np.copy(x_start_by_order) ) - x_end_copy=list ( np.copy(x_end_by_order) ) + y_copy = list(y_lines_by_order) + x_start_copy = list(x_start_by_order) + x_end_copy = list(x_end_by_order) #print(y_copy,'y_copy') y_itself=y_copy.pop(il) @@ -2135,13 +2045,14 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho x_end_itself=x_end_copy.pop(il) #print(y_copy,'y_copy2') - - for column in range(x_start_itself,x_end_itself+1): + for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] for yic in range(len(y_copy)): #print('burda') - if y_copy[yic]>y_itself and column>=x_start_copy[yic] and column<=x_end_copy[yic]: + if (y_copy[yic]>y_itself and + column>=x_start_copy[yic] and + column<=x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') @@ -2150,81 +2061,48 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho else: y_down=[int(splitter_y_new[i+1])][0] #print(y_itself,'y_itself') - boxes.append([peaks_neg_tot[column],peaks_neg_tot[column+1],y_itself,y_down]) + boxes.append([peaks_neg_tot[column], + peaks_neg_tot[column+1], + y_itself, + y_down]) except: - boxes.append([0,peaks_neg_tot[len(peaks_neg_tot)-1],int(splitter_y_new[i]),int(splitter_y_new[i+1])]) - - - + boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], + int(splitter_y_new[i]), int(splitter_y_new[i+1])]) else: y_lines_by_order=[] x_start_by_order=[] x_end_by_order=[] if len(x_starting)>0: - all_columns = np.array(range(len(peaks_neg_tot)-1)) - columns_covered_by_lines_covered_more_than_2col=[] - + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_covered_by_lines_covered_more_than_2col = [] for dj in range(len(x_starting)): - if set( list(np.array(range(x_starting[dj],x_ending[dj])) ) ) == set(all_columns): + if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns): pass else: - columns_covered_by_lines_covered_more_than_2col=columns_covered_by_lines_covered_more_than_2col+list(np.array(range(x_starting[dj],x_ending[dj])) ) - columns_covered_by_lines_covered_more_than_2col=list(set(columns_covered_by_lines_covered_more_than_2col)) - - - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_lines_covered_more_than_2col) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - - #y_type_2.append(int(splitter_y_new[i])) - #x_starting.append(x_starting[0]) - #x_ending.append(x_ending[0]) - - if len(new_main_sep_y)>0: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(0) - x_ending.append(len(peaks_neg_tot)-1) + columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ + list(range(x_starting[dj],x_ending[dj])) + columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col)) + columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col)) + + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + if len(new_main_sep_y) > 0: + x_starting = np.append(x_starting, 0) + x_ending = np.append(x_ending, len(peaks_neg_tot)-1) else: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(x_starting[0]) - x_ending.append(x_ending[0]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) + x_starting = np.append(x_starting, x_starting[0]) + x_ending = np.append(x_ending, x_ending[0]) else: - all_columns=np.array(range(len(peaks_neg_tot)-1)) - columns_not_covered=list( set(all_columns) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - - for lj in columns_not_covered: - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_not_covered = list(set(all_columns)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) + ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) + x_starting = np.append(x_starting, columns_not_covered) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) ind_args=np.array(range(len(y_type_2))) #ind_args=np.array(ind_args) @@ -2248,13 +2126,10 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho x_start_by_order.append(x_start_column_sort[ii]) x_end_by_order.append(x_end_column_sort[ii]-1) - for il in range(len(y_lines_by_order)): - - - y_copy=list( np.copy(y_lines_by_order) ) - x_start_copy=list( np.copy(x_start_by_order) ) - x_end_copy=list ( np.copy(x_end_by_order) ) + y_copy = list(y_lines_by_order) + x_start_copy = list(x_start_by_order) + x_end_copy = list(x_end_by_order) #print(y_copy,'y_copy') y_itself=y_copy.pop(il) @@ -2262,13 +2137,14 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho x_end_itself=x_end_copy.pop(il) #print(y_copy,'y_copy2') - - for column in range(x_start_itself,x_end_itself+1): + for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] for yic in range(len(y_copy)): #print('burda') - if y_copy[yic]>y_itself and column>=x_start_copy[yic] and column<=x_end_copy[yic]: + if (y_copy[yic]>y_itself and + column>=x_start_copy[yic] and + column<=x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') @@ -2277,10 +2153,10 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho else: y_down=[int(splitter_y_new[i+1])][0] #print(y_itself,'y_itself') - boxes.append([peaks_neg_tot[column],peaks_neg_tot[column+1],y_itself,y_down]) - - - + boxes.append([peaks_neg_tot[column], + peaks_neg_tot[column+1], + y_itself, + y_down]) #else: #boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]]) @@ -2291,7 +2167,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho peaks_neg_tot_tables_ind = regions_without_separators.shape[1] - np.array(peaks_tab_ind) peaks_neg_tot_tables_ind = list(peaks_neg_tot_tables_ind[::-1]) peaks_neg_tot_tables_new.append(peaks_neg_tot_tables_ind) - for i in range(len(boxes)): x_start_new = regions_without_separators.shape[1] - boxes[i][1] diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index e47c5e7..be00db0 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -27,35 +27,33 @@ def find_contours_mean_y_diff(contours_main): cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] return np.mean(np.diff(np.sort(np.array(cy_main)))) - def get_text_region_boxes_by_given_contours(contours): - kernel = np.ones((5, 5), np.uint8) boxes = [] contours_new = [] for jj in range(len(contours)): - x, y, w, h = cv2.boundingRect(contours[jj]) - - boxes.append([x, y, w, h]) + box = cv2.boundingRect(contours[jj]) + boxes.append(box) contours_new.append(contours[jj]) return boxes, contours_new def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): - found_polygons_early = list() - + found_polygons_early = [] for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue polygon = geometry.Polygon([point[0] for point in c]) area = polygon.area - if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1: # and hierarchy[0][jv][3]==-1 : - found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) + if (area >= min_area * np.prod(image.shape[:2]) and + area <= max_area * np.prod(image.shape[:2]) and + hierarchy[0][jv][3] == -1): + found_polygons_early.append(np.array([[point] + for point in polygon.exterior.coords], dtype=np.uint)) return found_polygons_early def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): - found_polygons_early = list() - + found_polygons_early = [] for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -66,48 +64,59 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area # print(hierarchy[0][jv][3],hierarchy ) - if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : + if (area >= min_area * np.prod(image.shape[:2]) and + area <= max_area * np.prod(image.shape[:2]) and + # hierarchy[0][jv][3]==-1 + True): # print(c[0][0][1]) - found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) + found_polygons_early.append(np.array([[point] + for point in polygon.exterior.coords], dtype=np.int32)) return found_polygons_early def find_new_features_of_contours(contours_main): - - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + areas_main = np.array([cv2.contourArea(contours_main[j]) + for j in range(len(contours_main))]) + M_main = [cv2.moments(contours_main[j]) + for j in range(len(contours_main))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) + for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) + for j in range(len(M_main))] try: - x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] for j in range(len(contours_main))]) - - x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - - y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) + x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) + for j in range(len(contours_main))]) + argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) + for j in range(len(contours_main))]) + x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] + for j in range(len(contours_main))]) + y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] + for j in range(len(contours_main))]) + x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) + for j in range(len(contours_main))]) + y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) + for j in range(len(contours_main))]) + y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) + for j in range(len(contours_main))]) except: - x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))]) - - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))]) - - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))]) - - x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))]) - - y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))]) - + x_min_main = np.array([np.min(contours_main[j][:, 0]) + for j in range(len(contours_main))]) + argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) + for j in range(len(contours_main))]) + x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] + for j in range(len(contours_main))]) + y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] + for j in range(len(contours_main))]) + x_max_main = np.array([np.max(contours_main[j][:, 0]) + for j in range(len(contours_main))]) + y_min_main = np.array([np.min(contours_main[j][:, 1]) + for j in range(len(contours_main))]) + y_max_main = np.array([np.max(contours_main[j][:, 1]) + for j in range(len(contours_main))]) # dis_x=np.abs(x_max_main-x_min_main) return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin -def find_features_of_contours(contours_main): - +def find_features_of_contours(contours_main): areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] @@ -118,14 +127,15 @@ def find_features_of_contours(contours_main): y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) - return y_min_main, y_max_main + def return_parent_contours(contours, hierarchy): - contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1] + contours_parent = [contours[i] + for i in range(len(contours)) + if hierarchy[0][i][3] == -1] return contours_parent def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -137,10 +147,9 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=min_area) - + contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, + max_area=1, min_area=min_area) return contours_imgs def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): @@ -148,7 +157,6 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) img_copy = rotation_image_new(img_copy, -slope_first) - img_copy = img_copy.astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -158,7 +166,6 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - return cont_int[0], index_r_con def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): @@ -172,7 +179,6 @@ def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): return tuple(zip(*results)) def get_textregion_contours_in_org_image(cnts, img, slope_first): - cnts_org = [] # print(cnts,'cnts') for i in range(len(cnts)): @@ -193,7 +199,6 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): ret, thresh = cv2.threshold(imgray, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) # print(np.shape(cont_int[0])) @@ -202,32 +207,23 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): return cnts_org def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): - - h_o = img.shape[0] - w_o = img.shape[1] - - img = cv2.resize(img, (int(img.shape[1]/3.), int(img.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) - #cnts = cnts/2 - cnts = [(i/ 3).astype(np.int32) for i in cnts] + zoom = 3 + img = cv2.resize(img, (img.shape[1] // zoom, + img.shape[0] // zoom), + interpolation=cv2.INTER_NEAREST) cnts_org = [] - #print(cnts,'cnts') - for i in range(len(cnts)): + for cnt in cnts: img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) - - img_copy = rotation_image_new(img_copy, -slope_first) + img_copy = cv2.fillPoly(img_copy, pts=[(cnt / zoom).astype(int)], color=(1, 1, 1)) - img_copy = img_copy.astype(np.uint8) + img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - # print(np.shape(cont_int[0])) - cnts_org.append(cont_int[0]*3) + cnts_org.append(cont_int[0] * zoom) return cnts_org @@ -235,14 +231,11 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) - img_copy = rotation_image_new(img_copy, -slope_first) - - img_copy = img_copy.astype(np.uint8) + img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) # print(np.shape(cont_int[0])) @@ -264,7 +257,6 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map): return [i*6 for i in contours] def return_contours_of_interested_textline(region_pre_p, pixel): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -277,11 +269,11 @@ def return_contours_of_interested_textline(region_pre_p, pixel): contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003) + contours_imgs = filter_contours_area_of_image_tables( + thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003) return contours_imgs def return_contours_of_image(image): - if len(image.shape) == 2: image = np.repeat(image[:, :, np.newaxis], 3, axis=2) image = image.astype(np.uint8) @@ -293,7 +285,6 @@ def return_contours_of_image(image): return contours, hierarchy def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -305,14 +296,13 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size) + contours_imgs = filter_contours_area_of_image_tables( + thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size) return contours_imgs def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): - # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 @@ -325,9 +315,11 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) + contours_imgs = filter_contours_area_of_image_tables( + thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3)) img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1)) + return img_ret[:, :, 0] diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index f037a9f..7e77afe 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -41,9 +41,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - textline_patch_sum_along_width = img_patch.sum(axis=axis) - first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] @@ -52,11 +50,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if 1 > 0: - try: - y_padded_smoothed_e = gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e = -y_padded + np.max(y_padded) y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40) @@ -67,7 +62,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -78,12 +73,11 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -93,7 +87,6 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -106,9 +99,10 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks_new_tot = peaks_e[:] textline_con, hierarchy = return_contours_of_image(img_patch) - textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) # print(sigma_gaus,'sigma_gaus') except: @@ -126,10 +120,18 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks, _ = find_peaks(y_padded_smoothed, height=0) peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) - return x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix + return (x, y, + x_d, y_d, + xv, + x_min_cont, y_min_cont, + x_max_cont, y_max_cont, + first_nonzero, + y_padded_up_to_down_padded, + y_padded_smoothed, + peaks, peaks_neg, + rotation_matrix) def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): - (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) @@ -151,9 +153,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - textline_patch_sum_along_width = img_patch.sum(axis=1) - first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] @@ -162,11 +162,8 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if 1>0: - try: - y_padded_smoothed_e= gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e=-y_padded+np.max(y_padded) y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40) @@ -178,27 +175,22 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted= np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3 ] + arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) - - arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] - peaks_new=peaks_e[:] peaks_neg_new=peaks_neg_e[:] clusters_to_be_deleted=[] if len(arg_diff_cluster)>0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) for i in range(len(arg_diff_cluster)-1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: + arg_diff_cluster[i+1]+1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) - - if len(clusters_to_be_deleted)>0: peaks_new_extra=[] for m in range(len(clusters_to_be_deleted)): @@ -208,7 +200,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]] peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]] peaks_new_tot=[] for i1 in peaks_new: @@ -216,16 +207,14 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): for i1 in peaks_new_extra: peaks_new_tot.append(i1) peaks_new_tot=np.sort(peaks_new_tot) - - else: peaks_new_tot=peaks_e[:] - textline_con,hierarchy=return_contours_of_image(img_patch) - textline_con_fil=filter_contours_area_of_image(img_patch,textline_con,hierarchy,max_area=1,min_area=0.0008) + textline_con_fil=filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus=int( y_diff_mean * (7./40.0) ) #print(sigma_gaus,'sigma_gaus') except: @@ -234,60 +223,41 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): sigma_gaus=3 #print(sigma_gaus,'sigma') - y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus) y_padded_up_to_down=-y_padded+np.max(y_padded) y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40) y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) - peaks, _ = find_peaks(y_padded_smoothed, height=0) peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) - - - - try: neg_peaks_max=np.max(y_padded_smoothed[peaks]) - - - arg_neg_must_be_deleted= np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42 ] - - + arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) - - arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] except: arg_neg_must_be_deleted=[] arg_diff_cluster=[] - - try: peaks_new=peaks[:] peaks_neg_new=peaks_neg[:] clusters_to_be_deleted=[] - if len(arg_diff_cluster)>=2 and len(arg_diff_cluster)>0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) for i in range(len(arg_diff_cluster)-1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: + arg_diff_cluster[i+1]+1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) elif len(arg_neg_must_be_deleted)>=2 and len(arg_diff_cluster)==0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - - if len(arg_neg_must_be_deleted)==1: clusters_to_be_deleted.append(arg_neg_must_be_deleted) - - if len(clusters_to_be_deleted)>0: peaks_new_extra=[] for m in range(len(clusters_to_be_deleted)): @@ -297,7 +267,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]-1]] peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]]] - peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg[clusters_to_be_deleted[m][m1]]] peaks_new_tot=[] for i1 in peaks_new: @@ -321,36 +290,27 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): ##plt.plot(y_padded_smoothed) ##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*') ##plt.show() - peaks=peaks_new_tot[:] peaks_neg=peaks_neg_new[:] - - else: peaks_new_tot=peaks[:] peaks=peaks_new_tot[:] peaks_neg=peaks_neg_new[:] except: pass - mean_value_of_peaks=np.mean(y_padded_smoothed[peaks]) std_value_of_peaks=np.std(y_padded_smoothed[peaks]) peaks_values=y_padded_smoothed[peaks] - peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 - for jj in range(len(peaks_neg)): if peaks_neg[jj] > len(x) - 1: peaks_neg[jj] = len(x) - 1 - for jj in range(len(peaks)): if peaks[jj] > len(x) - 1: peaks[jj] = len(x) - 1 - - textline_boxes = [] textline_boxes_rot = [] @@ -386,7 +346,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) - if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 @@ -423,8 +382,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): if point_up_rot2<0: point_up_rot2=0 - - x_min_rot1=x_min_rot1-x_help x_max_rot2=x_max_rot2-x_help x_max_rot3=x_max_rot3-x_help @@ -435,29 +392,24 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - elif len(peaks) < 1: pass elif len(peaks) == 1: - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[0] + first_nonzero])), True) - for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[0] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -480,7 +432,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - if x_min_rot1<0: x_min_rot1=0 if x_min_rot4<0: @@ -489,7 +440,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot1=0 if point_up_rot2<0: point_up_rot2=0 - x_min_rot1=x_min_rot1-x_help x_max_rot2=x_max_rot2-x_help @@ -500,22 +450,15 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot2=point_up_rot2-y_help point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(y_min)], [int(x_max), int(y_min)], [int(x_max), int(y_max)], [int(x_min), int(y_max)]])) - - - elif len(peaks) == 2: dis_to_next = np.abs(peaks[1] - peaks[0]) for jj in range(len(peaks)): @@ -533,12 +476,12 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): except: point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) - for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -556,8 +499,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - - if x_min_rot1<0: x_min_rot1=0 if x_min_rot4<0: @@ -577,21 +518,16 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) else: for jj in range(len(peaks)): - if jj == 0: dis_to_next = peaks[jj + 1] - peaks[jj] # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) @@ -615,12 +551,12 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next_up) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) - for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -646,7 +582,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot1=0 if point_up_rot2<0: point_up_rot2=0 - x_min_rot1=x_min_rot1-x_help x_max_rot2=x_max_rot2-x_help @@ -657,29 +592,24 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up_rot2=point_up_rot2-y_help point_down_rot3=point_down_rot3-y_help point_down_rot4=point_down_rot4-y_help - - - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - return peaks, textline_boxes_rot def separate_lines_vertical(img_patch, contour_text_interest, thetha): - thetha = thetha + 90 contour_text_interest_copy = contour_text_interest.copy() - x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 0) - + x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, \ + first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, \ + peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 0) # plt.plot(y_padded_up_to_down_padded) # plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*') @@ -693,8 +623,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] - + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -705,17 +634,15 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): clusters_to_be_deleted = [] if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - if len(arg_neg_must_be_deleted) == 1: clusters_to_be_deleted.append(arg_neg_must_be_deleted) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -725,7 +652,6 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -796,7 +722,6 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -823,13 +748,16 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) elif len(peaks) < 1: pass - elif len(peaks) == 1: x_min = x_min_cont x_max = x_max_cont @@ -856,10 +784,14 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(y_min)], [int(x_max), int(y_min)], [int(x_max), int(y_max)], [int(x_min), int(y_max)]])) - + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(y_min)], + [int(x_max), int(y_min)], + [int(x_max), int(y_max)], + [int(x_min), int(y_max)]])) elif len(peaks) == 2: dis_to_next = np.abs(peaks[1] - peaks[0]) for jj in range(len(peaks)): @@ -874,11 +806,12 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_down = img_patch.shape[0] - 2 point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -905,12 +838,16 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) else: for jj in range(len(peaks)): - if jj == 0: dis_to_next = peaks[jj + 1] - peaks[jj] # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) @@ -934,11 +871,12 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_up = peaks[jj] + first_nonzero - int(1.0 / 1.9 * dis_to_next_up) point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down) - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] - if len(xvinside) == 0: x_min = x_min_cont x_max = x_max_cont @@ -965,14 +903,17 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_up_rot2 < 0: point_up_rot2 = 0 - textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]])) - - textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) return peaks, textline_boxes_rot def separate_lines_new_inside_tiles2(img_patch, thetha): - (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) @@ -994,9 +935,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - textline_patch_sum_along_width = img_patch.sum(axis=1) - first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] @@ -1006,9 +945,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) if 1 > 0: - try: - y_padded_smoothed_e = gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e = -y_padded + np.max(y_padded) y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40) @@ -1019,7 +956,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -1030,12 +967,10 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1045,7 +980,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -1053,12 +987,13 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): for i1 in peaks_new_extra: peaks_new_tot.append(i1) peaks_new_tot = np.sort(peaks_new_tot) - else: peaks_new_tot = peaks_e[:] textline_con, hierarchy = return_contours_of_image(img_patch) - textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) @@ -1084,27 +1019,23 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): try: neg_peaks_max = np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted = np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] - + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1] clusters_to_be_deleted = [] - if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + arg_diff_cluster[i + 1] + 1]) clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - if len(arg_neg_must_be_deleted) == 1: clusters_to_be_deleted.append(arg_neg_must_be_deleted) - if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1114,7 +1045,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): for m1 in range(len(clusters_to_be_deleted[m])): peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1] - 1]] peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1]]] - peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg[clusters_to_be_deleted[m][m1]]] peaks_new_tot = [] for i1 in peaks_new: @@ -1138,7 +1068,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): # plt.plot(y_padded_smoothed) # plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*') # plt.show() - peaks = peaks_new_tot[:] peaks_neg = peaks_neg_new[:] except: @@ -1166,7 +1095,6 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): # print(peaks_neg_true) for i in range(len(peaks_neg_true)): img_patch[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0 - else: pass @@ -1346,14 +1274,14 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) - + contours_imgs = filter_contours_area_of_image_tables(thresh, + contours_imgs, hierarchy, + max_area=max_area, min_area=min_area) cont_final = [] ###print(add_boxes_coor_into_textlines,'ikki') for i in range(len(contours_imgs)): img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3)) img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255)) - img_contour = img_contour.astype(np.uint8) img_contour = cv2.dilate(img_contour, kernel, iterations=4) @@ -1373,9 +1301,7 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i ##print(cont_final,'nadizzzz') return None, cont_final - def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): - textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = textline_mask.astype(np.uint8) kernel = np.ones((5, 5), np.uint8) @@ -1400,8 +1326,10 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest x_help = 30 y_help = 2 - textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), textline_mask.shape[1] + int(2 * x_help), 3)) - textline_mask_help[y_help : y_help + textline_mask.shape[0], x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) + textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), + textline_mask.shape[1] + int(2 * x_help), 3)) + textline_mask_help[y_help : y_help + textline_mask.shape[0], + x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) dst = rotate_image(textline_mask_help, slope) dst = dst[:, :, 0] @@ -1412,7 +1340,6 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest # plt.show() contour_text_copy = contour_text_interest.copy() - contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0] contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] @@ -1423,12 +1350,12 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest # plt.imshow(img_contour) # plt.show() - img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), img_contour.shape[1] + int(2 * x_help), 3)) - - img_contour_help[y_help : y_help + img_contour.shape[0], x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) + img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), + img_contour.shape[1] + int(2 * x_help), 3)) + img_contour_help[y_help : y_help + img_contour.shape[0], + x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) img_contour_rot = rotate_image(img_contour_help, slope) - # plt.imshow(img_contour_rot_help) # plt.show() @@ -1454,12 +1381,13 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest # print('juzaa') if abs(slope) > 45: # print(add_boxes_coor_into_textlines,'avval') - _, contours_rotated_clean = separate_lines_vertical_cont(textline_mask, contours_text_rot[ind_big_con], box_ind, slope, add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) + _, contours_rotated_clean = separate_lines_vertical_cont( + textline_mask, contours_text_rot[ind_big_con], box_ind, slope, + add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) else: - _, contours_rotated_clean = separate_lines(dst, contours_text_rot[ind_big_con], slope, x_help, y_help) - + _, contours_rotated_clean = separate_lines( + dst, contours_text_rot[ind_big_con], slope, x_help, y_help) except: - contours_rotated_clean = [] return contours_rotated_clean @@ -1487,11 +1415,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # print(margin,'margin') # if margin<=4: # margin = int(0.08 * length_x) - # margin=0 width_mid = length_x - 2 * margin - nxf = img_path.shape[1] / float(width_mid) if nxf > int(nxf): @@ -1553,8 +1479,8 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3)))) - - img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] = img_int[:, :] + img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], + int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] = img_int[:, :] # plt.imshow(img_xline) # plt.show() img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i]) @@ -1565,7 +1491,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_patch_separated_returned = rotate_image(img_patch_separated, -slopes_tile_wise[i]) img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1 - img_patch_separated_returned_true_size = img_patch_separated_returned[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] + img_patch_separated_returned_true_size = img_patch_separated_returned[ + int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], + int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size @@ -1594,27 +1522,19 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] - - max_shape=np.max(img_int.shape) img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) )) - onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) - #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) - - - #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] #print(img_resized.shape,'img_resizedshape') #plt.imshow(img_resized) #plt.show() - if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: #plt.imshow(img_resized) #plt.show() @@ -1623,7 +1543,6 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - elif main_page: #plt.imshow(img_resized) #plt.show() @@ -1637,7 +1556,6 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, else: angles = np.linspace(90, 12, n_tot_angles) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - else: angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) @@ -1695,7 +1613,9 @@ def do_work_of_slopes_new( else: try: textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, + hierarchy, + max_area=1, min_area=0.00008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if np.isnan(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1733,7 +1653,6 @@ def do_work_of_slopes_new( return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope - def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, @@ -1759,7 +1678,9 @@ def do_work_of_slopes_new_curved( else: try: textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, + hierarchy, + max_area=1, min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if np.isnan(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1788,7 +1709,8 @@ def do_work_of_slopes_new_curved( textline_biggest_region = mask_biggest * textline_mask_tot_ea # print(slope_for_all,'slope_for_all') - textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all, + textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, + num_col, slope_for_all, logger=logger, plotter=plotter) # new line added From 33fda2f8be1378181bd51818f45149e62740ef36 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 26 Dec 2024 22:45:40 +0100 Subject: [PATCH 101/107] changing cnn ocr model name --- src/eynollah/eynollah.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 95033e9..1bc837b 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -2191,18 +2191,18 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) if not self.dir_in: - ###prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, model_textline, n_batch_inference=3) + ###prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, model_textline, n_batch_inference=3) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - ##prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) - prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, self.model_textline, n_batch_inference=3) + ##prediction_textline = self.do_prediction_new_concept_scatter_nd(patches, img, self.model_textline, n_batch_inference=3) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) @@ -2482,17 +2482,17 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) - ###prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) + ##prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept_scatter_nd(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) - ##prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + ###prediction_regions_page = self.do_prediction_new_concept_scatter_nd(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - ###prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) - prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) + ###prediction_regions_org = self.do_prediction_new_concept_scatter_nd(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: @@ -5638,7 +5638,7 @@ class Eynollah_ocr: self.model_ocr.to(self.device) else: - self.model_ocr_dir = dir_models + "/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + self.model_ocr_dir = dir_models + "/model_1_ocrcnn"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( From 25116a2c79440ea16d8a5e28c6f1b7f08da5c6b6 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 19 Feb 2025 00:35:48 +0100 Subject: [PATCH 102/107] resolved 2 errors --- src/eynollah/eynollah.py | 6 ++++-- src/eynollah/utils/__init__.py | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 25d5ec4..9158168 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4074,8 +4074,10 @@ class Eynollah: ind_textline_inside_tr = list(range(len(contours[jj]))) index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr - ind_ins = [0] * len(contours[jj]) + jj - indexes_of_textline_tot = indexes_of_textline_tot + ind_ins + #ind_ins = [0] * len(contours[jj]) + jj + ind_ins = np.zeros( len(contours[jj]) ) + jj + list_ind_ins = list(ind_ins) + indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index feab341..a67fc38 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -237,8 +237,11 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( if len(remained_sep_indexes)>1: #print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)') #print(np.array(mother),'mother') - remained_sep_indexes_without_mother = remained_sep_indexes[mother==0] - remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1] + ##remained_sep_indexes_without_mother = remained_sep_indexes[mother==0] + ##remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1] + remained_sep_indexes_without_mother=np.array(list(remained_sep_indexes))[np.array(mother)==0] + remained_sep_indexes_with_child_without_mother=np.array(list(remained_sep_indexes))[(np.array(mother)==0) & (np.array(child)==1)] + #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') x_end_with_child_without_mother = x_end[remained_sep_indexes_with_child_without_mother] From 7110bd971f719bd3d86457bd3a1b6375ca952921 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 27 Feb 2025 19:11:15 +0100 Subject: [PATCH 103/107] resolved an error for light version in the case that slope_deskew is smaller than slope_threshold --- src/eynollah/eynollah.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 9158168..6802e47 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4120,7 +4120,7 @@ class Eynollah: def filter_contours_without_textline_inside( self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): - + ###contours_txtline_of_all_textregions = [] ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] @@ -4156,7 +4156,8 @@ class Eynollah: contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) text_con_org.pop(ind_u_a_trs) - contours_only_text_parent_d_ordered.pop(ind_u_a_trs) + if len(contours_only_text_parent_d_ordered) > 0: + contours_only_text_parent_d_ordered.pop(ind_u_a_trs) return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) @@ -4518,7 +4519,6 @@ class Eynollah: ###min_con_area = 0.000005 contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - if len(contours_only_text_parent) > 0: areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) @@ -4619,8 +4619,7 @@ class Eynollah: else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] - contours_only_text_parent = [] - + #contours_only_text_parent = [] if not len(contours_only_text_parent): # stop early empty_marginals = [[]] * len(polygons_of_marginals) @@ -4690,8 +4689,7 @@ class Eynollah: all_found_textline_polygons_marginals) contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( - contours_only_text_parent, txt_con_org, all_found_textline_polygons, - contours_only_text_parent_d_ordered) + contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ From a4f1f351259a03cd25a4900f52ec2ff1f4cef3c1 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 7 Mar 2025 13:19:56 +0100 Subject: [PATCH 104/107] Resolving test failure --- src/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index e1d3895..9cce812 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1899,7 +1899,7 @@ class Eynollah: ##self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light) if num_col_classifier == 1 or num_col_classifier == 2: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: self.logger.debug("resized to %dx%d for %d cols", img_resized.shape[1], img_resized.shape[0], num_col_classifier) From aa72ca3006288ea5dc6e05ec987cb44b3d234687 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 13 Mar 2025 15:02:38 +0100 Subject: [PATCH 105/107] Resolved an issue in the OCR-D framework where dir_out received a None value --- src/eynollah/processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index ed510c8..4eced21 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -45,6 +45,7 @@ class EynollahProcessor(Processor): image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(local_filename=page.imageFilename))).local_filename eynollah_kwargs = { 'dir_models': self.resolve_resource(self.parameter['models']), + 'dir_out': self.output_file_grp, 'allow_enhancement': False, 'curved_line': self.parameter['curved_line'], 'full_layout': self.parameter['full_layout'], From c8b85299514e900707f8e0f6bdc966d1c4a191cf Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 17 Mar 2025 19:50:58 +0100 Subject: [PATCH 106/107] For the CNN-RNN OCR model, long text lines are split into two segments --- src/eynollah/eynollah.py | 74 +++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 9cce812..65e85c5 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4961,7 +4961,7 @@ class Eynollah_ocr: self.model_ocr.to(self.device) else: - self.model_ocr_dir = dir_models + "/model_1_ocrcnn"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + self.model_ocr_dir = dir_models + "/model_1_new_ocrcnn"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5080,6 +5080,18 @@ class Eynollah_ocr: return [image1, image2] else: return None + def preprocess_and_resize_image_for_ocrcnn_model(self, img, image_height, image_width): + ratio = image_height /float(img.shape[0]) + w_ratio = int(ratio * img.shape[1]) + if w_ratio <= image_width: + width_new = w_ratio + else: + width_new = image_width + img = resize_image(img, image_height, width_new) + img_fin = np.ones((image_height, image_width, 3))*255 + img_fin[:,:width_new,:] = img[:,:,:] + img_fin = img_fin / 255. + return img_fin def run(self): ls_imgs = os.listdir(self.dir_in) @@ -5214,7 +5226,7 @@ class Eynollah_ocr: else: max_len = 512 padding_token = 299 - image_width = max_len * 4 + image_width = 512#max_len * 4 image_height = 32 b_s = 8 @@ -5265,10 +5277,31 @@ class Eynollah_ocr: mask_poly = mask_poly[y:y+h, x:x+w, :] img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 - img_crop = tf.reverse(img_crop,axis=[-1]) - img_crop = self.distortion_free_resize(img_crop, img_size) - img_crop = tf.cast(img_crop, tf.float32) / 255.0 - cropped_lines.append(img_crop) + + if h2w_ratio > 0.05: + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(0) + else: + splited_images = self.return_textlines_split_if_needed(img_crop) + #print(splited_images) + if splited_images: + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(1) + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(-1) + else: + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(0) + #img_crop = tf.reverse(img_crop,axis=[-1]) + #img_crop = self.distortion_free_resize(img_crop, img_size) + #img_crop = tf.cast(img_crop, tf.float32) / 255.0 + + #cropped_lines.append(img_crop) indexer_text_region = indexer_text_region +1 @@ -5282,12 +5315,12 @@ class Eynollah_ocr: n_start = i*b_s imgs = cropped_lines[n_start:] imgs = np.array(imgs) - imgs = imgs.reshape(imgs.shape[0], image_width, image_height, 3) + imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3) else: n_start = i*b_s n_end = (i+1)*b_s imgs = cropped_lines[n_start:n_end] - imgs = np.array(imgs).reshape(b_s, image_width, image_height, 3) + imgs = np.array(imgs).reshape(b_s, image_height, image_width, 3) preds = self.prediction_model.predict(imgs, verbose=0) @@ -5296,15 +5329,32 @@ class Eynollah_ocr: for ib in range(imgs.shape[0]): pred_texts_ib = pred_texts[ib].strip("[UNK]") extracted_texts.append(pred_texts_ib) - + + + extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + + extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] + #print(extracted_texts_merged, len(extracted_texts_merged)) + unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - + + #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts)[np.array(cropped_lines_region_indexer)==ind] + extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] text_by_textregion.append(" ".join(extracted_texts_merged_un)) + + + ##unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) + + ##text_by_textregion = [] + ##for ind in unique_cropped_lines_region_indexer: + ##extracted_texts_merged_un = np.array(extracted_texts)[np.array(cropped_lines_region_indexer)==ind] + + ##text_by_textregion.append(" ".join(extracted_texts_merged_un)) + indexer = 0 indexer_textregion = 0 for nn in root1.iter(region_tags): @@ -5317,7 +5367,7 @@ class Eynollah_ocr: if child_textregion.tag.endswith("TextLine"): text_subelement = ET.SubElement(child_textregion, 'TextEquiv') unicode_textline = ET.SubElement(text_subelement, 'Unicode') - unicode_textline.text = extracted_texts[indexer] + unicode_textline.text = extracted_texts_merged[indexer] indexer = indexer + 1 has_textline = True if has_textline: From d3a4c06e7f5aaafae356d228df6bf54851bc7b82 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 20 Mar 2025 18:21:44 +0100 Subject: [PATCH 107/107] This commit enables the export of cropped text line images along with their corresponding texts from a Page-XML file. These exported text line images and texts can be utilized for training a text line-based OCR model. --- src/eynollah/cli.py | 16 +++- src/eynollah/eynollah.py | 188 ++++++++++++++++++++------------------- 2 files changed, 110 insertions(+), 94 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index ff612b2..c306ac5 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -347,6 +347,18 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ is_flag=True, help="if this parameter set to true, transformer ocr will be applied, otherwise cnn_rnn model.", ) +@click.option( + "--export_textline_images_and_text", + "-etit/-noetit", + is_flag=True, + help="if this parameter set to true, images and text in xml will be exported into output dir. This files can be used for training a OCR engine.", +) +@click.option( + "--do_not_mask_with_textline_contour", + "-nmtc/-mtc", + is_flag=True, + help="if this parameter set to true, cropped textline images will not be masked with textline contour.", +) @click.option( "--log_level", "-l", @@ -354,7 +366,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="Override log level globally to this", ) -def ocr(dir_in, out, dir_xmls, model, tr_ocr, log_level): +def ocr(dir_in, out, dir_xmls, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -364,6 +376,8 @@ def ocr(dir_in, out, dir_xmls, model, tr_ocr, log_level): dir_out=out, dir_models=model, tr_ocr=tr_ocr, + export_textline_images_and_text=export_textline_images_and_text, + do_not_mask_with_textline_contour=do_not_mask_with_textline_contour, ) eynollah_ocr.run() diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 65e85c5..7acee39 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4946,6 +4946,8 @@ class Eynollah_ocr: dir_in=None, dir_out=None, tr_ocr=False, + export_textline_images_and_text=False, + do_not_mask_with_textline_contour=False, logger=None, ): self.dir_in = dir_in @@ -4953,6 +4955,8 @@ class Eynollah_ocr: self.dir_xmls = dir_xmls self.dir_models = dir_models self.tr_ocr = tr_ocr + self.export_textline_images_and_text = export_textline_images_and_text + self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour if tr_ocr: self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -4961,7 +4965,7 @@ class Eynollah_ocr: self.model_ocr.to(self.device) else: - self.model_ocr_dir = dir_models + "/model_1_new_ocrcnn"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + self.model_ocr_dir = dir_models + "/model_3_new_ocrcnn"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5107,7 +5111,7 @@ class Eynollah_ocr: img = cv2.imread(dir_img) ##file_name = Path(dir_xmls).stem - tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding = 'iso-8859-5')) + tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8")) root1=tree1.getroot() alltags=[elem.tag for elem in root1.iter()] link=alltags[0].split('}')[0]+'}' @@ -5241,7 +5245,7 @@ class Eynollah_ocr: out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') img = cv2.imread(dir_img) - tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding = 'iso-8859-5')) + tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8")) root1=tree1.getroot() alltags=[elem.tag for elem in root1.iter()] link=alltags[0].split('}')[0]+'}' @@ -5257,15 +5261,16 @@ class Eynollah_ocr: tinl = time.time() indexer_text_region = 0 + indexer_textlines = 0 for nn in root1.iter(region_tags): for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): - for child_textlines in child_textregion: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) + x,y,w,h = cv2.boundingRect(textline_coords) h2w_ratio = h/float(w) @@ -5276,104 +5281,101 @@ class Eynollah_ocr: mask_poly = mask_poly[y:y+h, x:x+w, :] img_crop = img_poly_on_img[y:y+h, x:x+w, :] - img_crop[mask_poly==0] = 255 + if not self.do_not_mask_with_textline_contour: + img_crop[mask_poly==0] = 255 - if h2w_ratio > 0.05: - img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) - cropped_lines.append(img_fin) - cropped_lines_meging_indexing.append(0) - else: - splited_images = self.return_textlines_split_if_needed(img_crop) - #print(splited_images) - if splited_images: - img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) - cropped_lines.append(img_fin) - cropped_lines_meging_indexing.append(1) - img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) - - cropped_lines.append(img_fin) - cropped_lines_meging_indexing.append(-1) - else: + if not self.export_textline_images_and_text: + if h2w_ratio > 0.05: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) - #img_crop = tf.reverse(img_crop,axis=[-1]) - #img_crop = self.distortion_free_resize(img_crop, img_size) - #img_crop = tf.cast(img_crop, tf.float32) / 255.0 - - #cropped_lines.append(img_crop) - - indexer_text_region = indexer_text_region +1 + else: + splited_images = self.return_textlines_split_if_needed(img_crop) + if splited_images: + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(1) + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(-1) + else: + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(0) + + if self.export_textline_images_and_text: + if child_textlines.tag.endswith("TextEquiv"): + for cheild_text in child_textlines: + if cheild_text.tag.endswith("Unicode"): + textline_text = cheild_text.text + if not textline_text: + pass + else: + with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file: + text_file.write(textline_text) + + cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop ) + + indexer_textlines+=1 + + if not self.export_textline_images_and_text: + indexer_text_region = indexer_text_region +1 - - extracted_texts = [] + if not self.export_textline_images_and_text: + extracted_texts = [] - n_iterations = math.ceil(len(cropped_lines) / b_s) + n_iterations = math.ceil(len(cropped_lines) / b_s) - for i in range(n_iterations): - if i==(n_iterations-1): - n_start = i*b_s - imgs = cropped_lines[n_start:] - imgs = np.array(imgs) - imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3) - else: - n_start = i*b_s - n_end = (i+1)*b_s - imgs = cropped_lines[n_start:n_end] - imgs = np.array(imgs).reshape(b_s, image_height, image_width, 3) - + for i in range(n_iterations): + if i==(n_iterations-1): + n_start = i*b_s + imgs = cropped_lines[n_start:] + imgs = np.array(imgs) + imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3) + else: + n_start = i*b_s + n_end = (i+1)*b_s + imgs = cropped_lines[n_start:n_end] + imgs = np.array(imgs).reshape(b_s, image_height, image_width, 3) + - preds = self.prediction_model.predict(imgs, verbose=0) - pred_texts = self.decode_batch_predictions(preds) + preds = self.prediction_model.predict(imgs, verbose=0) + pred_texts = self.decode_batch_predictions(preds) - for ib in range(imgs.shape[0]): - pred_texts_ib = pred_texts[ib].strip("[UNK]") - extracted_texts.append(pred_texts_ib) - - - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] - - extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] - #print(extracted_texts_merged, len(extracted_texts_merged)) + for ib in range(imgs.shape[0]): + pred_texts_ib = pred_texts[ib].strip("[UNK]") + extracted_texts.append(pred_texts_ib) + + extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] - unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) + extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] + unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') - text_by_textregion = [] - for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] - - text_by_textregion.append(" ".join(extracted_texts_merged_un)) - - - - ##unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - - ##text_by_textregion = [] - ##for ind in unique_cropped_lines_region_indexer: - ##extracted_texts_merged_un = np.array(extracted_texts)[np.array(cropped_lines_region_indexer)==ind] - - ##text_by_textregion.append(" ".join(extracted_texts_merged_un)) - - indexer = 0 - indexer_textregion = 0 - for nn in root1.iter(region_tags): - text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') - unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') - - - has_textline = False - for child_textregion in nn: - if child_textregion.tag.endswith("TextLine"): - text_subelement = ET.SubElement(child_textregion, 'TextEquiv') - unicode_textline = ET.SubElement(text_subelement, 'Unicode') - unicode_textline.text = extracted_texts_merged[indexer] - indexer = indexer + 1 - has_textline = True - if has_textline: - unicode_textregion.text = text_by_textregion[indexer_textregion] - indexer_textregion = indexer_textregion + 1 + text_by_textregion = [] + for ind in unique_cropped_lines_region_indexer: + extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + text_by_textregion.append(" ".join(extracted_texts_merged_un)) + + indexer = 0 + indexer_textregion = 0 + for nn in root1.iter(region_tags): + text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') + unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') - ET.register_namespace("",name_space) - tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) - #print("Job done in %.1fs", time.time() - t0) + + has_textline = False + for child_textregion in nn: + if child_textregion.tag.endswith("TextLine"): + text_subelement = ET.SubElement(child_textregion, 'TextEquiv') + unicode_textline = ET.SubElement(text_subelement, 'Unicode') + unicode_textline.text = extracted_texts_merged[indexer] + indexer = indexer + 1 + has_textline = True + if has_textline: + unicode_textregion.text = text_by_textregion[indexer_textregion] + indexer_textregion = indexer_textregion + 1 + + ET.register_namespace("",name_space) + tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) + #print("Job done in %.1fs", time.time() - t0)